1#include "duckdb/common/local_file_system.hpp"
2
3#include "duckdb/common/checksum.hpp"
4#include "duckdb/common/exception.hpp"
5#include "duckdb/common/file_opener.hpp"
6#include "duckdb/common/helper.hpp"
7#include "duckdb/common/string_util.hpp"
8#include "duckdb/common/windows.hpp"
9#include "duckdb/function/scalar/string_functions.hpp"
10#include "duckdb/main/client_context.hpp"
11#include "duckdb/main/database.hpp"
12
13#include <cstdint>
14#include <cstdio>
15#include <sys/stat.h>
16
17#ifndef _WIN32
18#include <dirent.h>
19#include <fcntl.h>
20#include <string.h>
21#include <sys/types.h>
22#include <unistd.h>
23#else
24#include "duckdb/common/windows_util.hpp"
25
26#include <io.h>
27#include <string>
28
29#ifdef __MINGW32__
30// need to manually define this for mingw
31extern "C" WINBASEAPI BOOL WINAPI GetPhysicallyInstalledSystemMemory(PULONGLONG);
32#endif
33
34#undef FILE_CREATE // woo mingw
35#endif
36
37namespace duckdb {
38
39static void AssertValidFileFlags(uint8_t flags) {
40#ifdef DEBUG
41 bool is_read = flags & FileFlags::FILE_FLAGS_READ;
42 bool is_write = flags & FileFlags::FILE_FLAGS_WRITE;
43 // require either READ or WRITE (or both)
44 D_ASSERT(is_read || is_write);
45 // CREATE/Append flags require writing
46 D_ASSERT(is_write || !(flags & FileFlags::FILE_FLAGS_APPEND));
47 D_ASSERT(is_write || !(flags & FileFlags::FILE_FLAGS_FILE_CREATE));
48 D_ASSERT(is_write || !(flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW));
49 // cannot combine CREATE and CREATE_NEW flags
50 D_ASSERT(!(flags & FileFlags::FILE_FLAGS_FILE_CREATE && flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW));
51#endif
52}
53
54#ifndef _WIN32
55bool LocalFileSystem::FileExists(const string &filename) {
56 if (!filename.empty()) {
57 if (access(name: filename.c_str(), type: 0) == 0) {
58 struct stat status;
59 stat(file: filename.c_str(), buf: &status);
60 if (S_ISREG(status.st_mode)) {
61 return true;
62 }
63 }
64 }
65 // if any condition fails
66 return false;
67}
68
69bool LocalFileSystem::IsPipe(const string &filename) {
70 if (!filename.empty()) {
71 if (access(name: filename.c_str(), type: 0) == 0) {
72 struct stat status;
73 stat(file: filename.c_str(), buf: &status);
74 if (S_ISFIFO(status.st_mode)) {
75 return true;
76 }
77 }
78 }
79 // if any condition fails
80 return false;
81}
82
83#else
84bool LocalFileSystem::FileExists(const string &filename) {
85 auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str());
86 const wchar_t *wpath = unicode_path.c_str();
87 if (_waccess(wpath, 0) == 0) {
88 struct _stati64 status;
89 _wstati64(wpath, &status);
90 if (status.st_mode & S_IFREG) {
91 return true;
92 }
93 }
94 return false;
95}
96bool LocalFileSystem::IsPipe(const string &filename) {
97 auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str());
98 const wchar_t *wpath = unicode_path.c_str();
99 if (_waccess(wpath, 0) == 0) {
100 struct _stati64 status;
101 _wstati64(wpath, &status);
102 if (status.st_mode & _S_IFCHR) {
103 return true;
104 }
105 }
106 return false;
107}
108#endif
109
110#ifndef _WIN32
111// somehow sometimes this is missing
112#ifndef O_CLOEXEC
113#define O_CLOEXEC 0
114#endif
115
116// Solaris
117#ifndef O_DIRECT
118#define O_DIRECT 0
119#endif
120
121struct UnixFileHandle : public FileHandle {
122public:
123 UnixFileHandle(FileSystem &file_system, string path, int fd) : FileHandle(file_system, std::move(path)), fd(fd) {
124 }
125 ~UnixFileHandle() override {
126 UnixFileHandle::Close();
127 }
128
129 int fd;
130
131public:
132 void Close() override {
133 if (fd != -1) {
134 close(fd: fd);
135 fd = -1;
136 }
137 };
138};
139
140static FileType GetFileTypeInternal(int fd) { // LCOV_EXCL_START
141 struct stat s;
142 if (fstat(fd: fd, buf: &s) == -1) {
143 return FileType::FILE_TYPE_INVALID;
144 }
145 switch (s.st_mode & S_IFMT) {
146 case S_IFBLK:
147 return FileType::FILE_TYPE_BLOCKDEV;
148 case S_IFCHR:
149 return FileType::FILE_TYPE_CHARDEV;
150 case S_IFIFO:
151 return FileType::FILE_TYPE_FIFO;
152 case S_IFDIR:
153 return FileType::FILE_TYPE_DIR;
154 case S_IFLNK:
155 return FileType::FILE_TYPE_LINK;
156 case S_IFREG:
157 return FileType::FILE_TYPE_REGULAR;
158 case S_IFSOCK:
159 return FileType::FILE_TYPE_SOCKET;
160 default:
161 return FileType::FILE_TYPE_INVALID;
162 }
163} // LCOV_EXCL_STOP
164
165unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, uint8_t flags, FileLockType lock_type,
166 FileCompressionType compression, FileOpener *opener) {
167 auto path = FileSystem::ExpandPath(path: path_p, opener);
168 if (compression != FileCompressionType::UNCOMPRESSED) {
169 throw NotImplementedException("Unsupported compression type for default file system");
170 }
171
172 AssertValidFileFlags(flags);
173
174 int open_flags = 0;
175 int rc;
176 bool open_read = flags & FileFlags::FILE_FLAGS_READ;
177 bool open_write = flags & FileFlags::FILE_FLAGS_WRITE;
178 if (open_read && open_write) {
179 open_flags = O_RDWR;
180 } else if (open_read) {
181 open_flags = O_RDONLY;
182 } else if (open_write) {
183 open_flags = O_WRONLY;
184 } else {
185 throw InternalException("READ, WRITE or both should be specified when opening a file");
186 }
187 if (open_write) {
188 // need Read or Write
189 D_ASSERT(flags & FileFlags::FILE_FLAGS_WRITE);
190 open_flags |= O_CLOEXEC;
191 if (flags & FileFlags::FILE_FLAGS_FILE_CREATE) {
192 open_flags |= O_CREAT;
193 } else if (flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW) {
194 open_flags |= O_CREAT | O_TRUNC;
195 }
196 if (flags & FileFlags::FILE_FLAGS_APPEND) {
197 open_flags |= O_APPEND;
198 }
199 }
200 if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) {
201#if defined(__sun) && defined(__SVR4)
202 throw Exception("DIRECT_IO not supported on Solaris");
203#endif
204#if defined(__DARWIN__) || defined(__APPLE__) || defined(__OpenBSD__)
205 // OSX does not have O_DIRECT, instead we need to use fcntl afterwards to support direct IO
206 open_flags |= O_SYNC;
207#else
208 open_flags |= O_DIRECT | O_SYNC;
209#endif
210 }
211 int fd = open(file: path.c_str(), oflag: open_flags, 0666);
212 if (fd == -1) {
213 throw IOException("Cannot open file \"%s\": %s", path, strerror(errno));
214 }
215 // #if defined(__DARWIN__) || defined(__APPLE__)
216 // if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) {
217 // // OSX requires fcntl for Direct IO
218 // rc = fcntl(fd, F_NOCACHE, 1);
219 // if (fd == -1) {
220 // throw IOException("Could not enable direct IO for file \"%s\": %s", path, strerror(errno));
221 // }
222 // }
223 // #endif
224 if (lock_type != FileLockType::NO_LOCK) {
225 // set lock on file
226 // but only if it is not an input/output stream
227 auto file_type = GetFileTypeInternal(fd);
228 if (file_type != FileType::FILE_TYPE_FIFO && file_type != FileType::FILE_TYPE_SOCKET) {
229 struct flock fl;
230 memset(s: &fl, c: 0, n: sizeof fl);
231 fl.l_type = lock_type == FileLockType::READ_LOCK ? F_RDLCK : F_WRLCK;
232 fl.l_whence = SEEK_SET;
233 fl.l_start = 0;
234 fl.l_len = 0;
235 rc = fcntl(fd: fd, F_SETLK, &fl);
236 if (rc == -1) {
237 throw IOException("Could not set lock on file \"%s\": %s", path, strerror(errno));
238 }
239 }
240 }
241 return make_uniq<UnixFileHandle>(args&: *this, args&: path, args&: fd);
242}
243
244void LocalFileSystem::SetFilePointer(FileHandle &handle, idx_t location) {
245 int fd = handle.Cast<UnixFileHandle>().fd;
246 off_t offset = lseek(fd: fd, offset: location, SEEK_SET);
247 if (offset == (off_t)-1) {
248 throw IOException("Could not seek to location %lld for file \"%s\": %s", location, handle.path,
249 strerror(errno));
250 }
251}
252
253idx_t LocalFileSystem::GetFilePointer(FileHandle &handle) {
254 int fd = handle.Cast<UnixFileHandle>().fd;
255 off_t position = lseek(fd: fd, offset: 0, SEEK_CUR);
256 if (position == (off_t)-1) {
257 throw IOException("Could not get file position file \"%s\": %s", handle.path, strerror(errno));
258 }
259 return position;
260}
261
262void LocalFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) {
263 int fd = handle.Cast<UnixFileHandle>().fd;
264 int64_t bytes_read = pread(fd: fd, buf: buffer, nbytes: nr_bytes, offset: location);
265 if (bytes_read == -1) {
266 throw IOException("Could not read from file \"%s\": %s", handle.path, strerror(errno));
267 }
268 if (bytes_read != nr_bytes) {
269 throw IOException("Could not read all bytes from file \"%s\": wanted=%lld read=%lld", handle.path, nr_bytes,
270 bytes_read);
271 }
272}
273
274int64_t LocalFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) {
275 int fd = handle.Cast<UnixFileHandle>().fd;
276 int64_t bytes_read = read(fd: fd, buf: buffer, nbytes: nr_bytes);
277 if (bytes_read == -1) {
278 throw IOException("Could not read from file \"%s\": %s", handle.path, strerror(errno));
279 }
280 return bytes_read;
281}
282
283void LocalFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) {
284 int fd = handle.Cast<UnixFileHandle>().fd;
285 int64_t bytes_written = pwrite(fd: fd, buf: buffer, n: nr_bytes, offset: location);
286 if (bytes_written == -1) {
287 throw IOException("Could not write file \"%s\": %s", handle.path, strerror(errno));
288 }
289 if (bytes_written != nr_bytes) {
290 throw IOException("Could not write all bytes to file \"%s\": wanted=%lld wrote=%lld", handle.path, nr_bytes,
291 bytes_written);
292 }
293}
294
295int64_t LocalFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) {
296 int fd = handle.Cast<UnixFileHandle>().fd;
297 int64_t bytes_written = write(fd: fd, buf: buffer, n: nr_bytes);
298 if (bytes_written == -1) {
299 throw IOException("Could not write file \"%s\": %s", handle.path, strerror(errno));
300 }
301 return bytes_written;
302}
303
304int64_t LocalFileSystem::GetFileSize(FileHandle &handle) {
305 int fd = handle.Cast<UnixFileHandle>().fd;
306 struct stat s;
307 if (fstat(fd: fd, buf: &s) == -1) {
308 return -1;
309 }
310 return s.st_size;
311}
312
313time_t LocalFileSystem::GetLastModifiedTime(FileHandle &handle) {
314 int fd = handle.Cast<UnixFileHandle>().fd;
315 struct stat s;
316 if (fstat(fd: fd, buf: &s) == -1) {
317 return -1;
318 }
319 return s.st_mtime;
320}
321
322FileType LocalFileSystem::GetFileType(FileHandle &handle) {
323 int fd = handle.Cast<UnixFileHandle>().fd;
324 return GetFileTypeInternal(fd);
325}
326
327void LocalFileSystem::Truncate(FileHandle &handle, int64_t new_size) {
328 int fd = handle.Cast<UnixFileHandle>().fd;
329 if (ftruncate(fd: fd, length: new_size) != 0) {
330 throw IOException("Could not truncate file \"%s\": %s", handle.path, strerror(errno));
331 }
332}
333
334bool LocalFileSystem::DirectoryExists(const string &directory) {
335 if (!directory.empty()) {
336 if (access(name: directory.c_str(), type: 0) == 0) {
337 struct stat status;
338 stat(file: directory.c_str(), buf: &status);
339 if (status.st_mode & S_IFDIR) {
340 return true;
341 }
342 }
343 }
344 // if any condition fails
345 return false;
346}
347
348void LocalFileSystem::CreateDirectory(const string &directory) {
349 struct stat st;
350
351 if (stat(file: directory.c_str(), buf: &st) != 0) {
352 /* Directory does not exist. EEXIST for race condition */
353 if (mkdir(path: directory.c_str(), mode: 0755) != 0 && errno != EEXIST) {
354 throw IOException("Failed to create directory \"%s\"!", directory);
355 }
356 } else if (!S_ISDIR(st.st_mode)) {
357 throw IOException("Failed to create directory \"%s\": path exists but is not a directory!", directory);
358 }
359}
360
361int RemoveDirectoryRecursive(const char *path) {
362 DIR *d = opendir(name: path);
363 idx_t path_len = (idx_t)strlen(s: path);
364 int r = -1;
365
366 if (d) {
367 struct dirent *p;
368 r = 0;
369 while (!r && (p = readdir(dirp: d))) {
370 int r2 = -1;
371 char *buf;
372 idx_t len;
373 /* Skip the names "." and ".." as we don't want to recurse on them. */
374 if (!strcmp(s1: p->d_name, s2: ".") || !strcmp(s1: p->d_name, s2: "..")) {
375 continue;
376 }
377 len = path_len + (idx_t)strlen(s: p->d_name) + 2;
378 buf = new char[len];
379 if (buf) {
380 struct stat statbuf;
381 snprintf(s: buf, maxlen: len, format: "%s/%s", path, p->d_name);
382 if (!stat(file: buf, buf: &statbuf)) {
383 if (S_ISDIR(statbuf.st_mode)) {
384 r2 = RemoveDirectoryRecursive(path: buf);
385 } else {
386 r2 = unlink(name: buf);
387 }
388 }
389 delete[] buf;
390 }
391 r = r2;
392 }
393 closedir(dirp: d);
394 }
395 if (!r) {
396 r = rmdir(path: path);
397 }
398 return r;
399}
400
401void LocalFileSystem::RemoveDirectory(const string &directory) {
402 RemoveDirectoryRecursive(path: directory.c_str());
403}
404
405void LocalFileSystem::RemoveFile(const string &filename) {
406 if (std::remove(filename: filename.c_str()) != 0) {
407 throw IOException("Could not remove file \"%s\": %s", filename, strerror(errno));
408 }
409}
410
411bool LocalFileSystem::ListFiles(const string &directory, const std::function<void(const string &, bool)> &callback,
412 FileOpener *opener) {
413 if (!DirectoryExists(directory)) {
414 return false;
415 }
416 DIR *dir = opendir(name: directory.c_str());
417 if (!dir) {
418 return false;
419 }
420 struct dirent *ent;
421 // loop over all files in the directory
422 while ((ent = readdir(dirp: dir)) != nullptr) {
423 string name = string(ent->d_name);
424 // skip . .. and empty files
425 if (name.empty() || name == "." || name == "..") {
426 continue;
427 }
428 // now stat the file to figure out if it is a regular file or directory
429 string full_path = JoinPath(a: directory, b: name);
430 if (access(name: full_path.c_str(), type: 0) != 0) {
431 continue;
432 }
433 struct stat status;
434 stat(file: full_path.c_str(), buf: &status);
435 if (!(status.st_mode & S_IFREG) && !(status.st_mode & S_IFDIR)) {
436 // not a file or directory: skip
437 continue;
438 }
439 // invoke callback
440 callback(name, status.st_mode & S_IFDIR);
441 }
442 closedir(dirp: dir);
443 return true;
444}
445
446void LocalFileSystem::FileSync(FileHandle &handle) {
447 int fd = handle.Cast<UnixFileHandle>().fd;
448 if (fsync(fd: fd) != 0) {
449 throw FatalException("fsync failed!");
450 }
451}
452
453void LocalFileSystem::MoveFile(const string &source, const string &target) {
454 //! FIXME: rename does not guarantee atomicity or overwriting target file if it exists
455 if (rename(old: source.c_str(), new: target.c_str()) != 0) {
456 throw IOException("Could not rename file!");
457 }
458}
459
460std::string LocalFileSystem::GetLastErrorAsString() {
461 return string();
462}
463
464#else
465
466constexpr char PIPE_PREFIX[] = "\\\\.\\pipe\\";
467
468// Returns the last Win32 error, in string format. Returns an empty string if there is no error.
469std::string LocalFileSystem::GetLastErrorAsString() {
470 // Get the error message, if any.
471 DWORD errorMessageID = GetLastError();
472 if (errorMessageID == 0)
473 return std::string(); // No error message has been recorded
474
475 LPSTR messageBuffer = nullptr;
476 idx_t size =
477 FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
478 NULL, errorMessageID, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL);
479
480 std::string message(messageBuffer, size);
481
482 // Free the buffer.
483 LocalFree(messageBuffer);
484
485 return message;
486}
487
488struct WindowsFileHandle : public FileHandle {
489public:
490 WindowsFileHandle(FileSystem &file_system, string path, HANDLE fd)
491 : FileHandle(file_system, path), position(0), fd(fd) {
492 }
493 ~WindowsFileHandle() override {
494 Close();
495 }
496
497 idx_t position;
498 HANDLE fd;
499
500public:
501 void Close() override {
502 if (!fd) {
503 return;
504 }
505 CloseHandle(fd);
506 fd = nullptr;
507 };
508};
509
510unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, uint8_t flags, FileLockType lock_type,
511 FileCompressionType compression, FileOpener *opener) {
512 auto path = FileSystem::ExpandPath(path_p, opener);
513 if (compression != FileCompressionType::UNCOMPRESSED) {
514 throw NotImplementedException("Unsupported compression type for default file system");
515 }
516 AssertValidFileFlags(flags);
517
518 DWORD desired_access;
519 DWORD share_mode;
520 DWORD creation_disposition = OPEN_EXISTING;
521 DWORD flags_and_attributes = FILE_ATTRIBUTE_NORMAL;
522 bool open_read = flags & FileFlags::FILE_FLAGS_READ;
523 bool open_write = flags & FileFlags::FILE_FLAGS_WRITE;
524 if (open_read && open_write) {
525 desired_access = GENERIC_READ | GENERIC_WRITE;
526 share_mode = 0;
527 } else if (open_read) {
528 desired_access = GENERIC_READ;
529 share_mode = FILE_SHARE_READ;
530 } else if (open_write) {
531 desired_access = GENERIC_WRITE;
532 share_mode = 0;
533 } else {
534 throw InternalException("READ, WRITE or both should be specified when opening a file");
535 }
536 if (open_write) {
537 if (flags & FileFlags::FILE_FLAGS_FILE_CREATE) {
538 creation_disposition = OPEN_ALWAYS;
539 } else if (flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW) {
540 creation_disposition = CREATE_ALWAYS;
541 }
542 }
543 if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) {
544 flags_and_attributes |= FILE_FLAG_NO_BUFFERING;
545 }
546 auto unicode_path = WindowsUtil::UTF8ToUnicode(path.c_str());
547 HANDLE hFile = CreateFileW(unicode_path.c_str(), desired_access, share_mode, NULL, creation_disposition,
548 flags_and_attributes, NULL);
549 if (hFile == INVALID_HANDLE_VALUE) {
550 auto error = LocalFileSystem::GetLastErrorAsString();
551 throw IOException("Cannot open file \"%s\": %s", path.c_str(), error);
552 }
553 auto handle = make_uniq<WindowsFileHandle>(*this, path.c_str(), hFile);
554 if (flags & FileFlags::FILE_FLAGS_APPEND) {
555 auto file_size = GetFileSize(*handle);
556 SetFilePointer(*handle, file_size);
557 }
558 return std::move(handle);
559}
560
561void LocalFileSystem::SetFilePointer(FileHandle &handle, idx_t location) {
562 auto &whandle = (WindowsFileHandle &)handle;
563 whandle.position = location;
564 LARGE_INTEGER wlocation;
565 wlocation.QuadPart = location;
566 SetFilePointerEx(whandle.fd, wlocation, NULL, FILE_BEGIN);
567}
568
569idx_t LocalFileSystem::GetFilePointer(FileHandle &handle) {
570 return ((WindowsFileHandle &)handle).position;
571}
572
573static DWORD FSInternalRead(FileHandle &handle, HANDLE hFile, void *buffer, int64_t nr_bytes, idx_t location) {
574 DWORD bytes_read = 0;
575 OVERLAPPED ov = {};
576 ov.Internal = 0;
577 ov.InternalHigh = 0;
578 ov.Offset = location & 0xFFFFFFFF;
579 ov.OffsetHigh = location >> 32;
580 ov.hEvent = 0;
581 auto rc = ReadFile(hFile, buffer, (DWORD)nr_bytes, &bytes_read, &ov);
582 if (!rc) {
583 auto error = LocalFileSystem::GetLastErrorAsString();
584 throw IOException("Could not read file \"%s\" (error in ReadFile(location: %llu, nr_bytes: %lld)): %s",
585 handle.path, location, nr_bytes, error);
586 }
587 return bytes_read;
588}
589
590void LocalFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) {
591 HANDLE hFile = ((WindowsFileHandle &)handle).fd;
592 auto bytes_read = FSInternalRead(handle, hFile, buffer, nr_bytes, location);
593 if (bytes_read != nr_bytes) {
594 throw IOException("Could not read all bytes from file \"%s\": wanted=%lld read=%lld", handle.path, nr_bytes,
595 bytes_read);
596 }
597}
598
599int64_t LocalFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) {
600 HANDLE hFile = ((WindowsFileHandle &)handle).fd;
601 auto &pos = ((WindowsFileHandle &)handle).position;
602 auto n = std::min<idx_t>(std::max<idx_t>(GetFileSize(handle), pos) - pos, nr_bytes);
603 auto bytes_read = FSInternalRead(handle, hFile, buffer, n, pos);
604 pos += bytes_read;
605 return bytes_read;
606}
607
608static DWORD FSInternalWrite(FileHandle &handle, HANDLE hFile, void *buffer, int64_t nr_bytes, idx_t location) {
609 DWORD bytes_written = 0;
610 OVERLAPPED ov = {};
611 ov.Internal = 0;
612 ov.InternalHigh = 0;
613 ov.Offset = location & 0xFFFFFFFF;
614 ov.OffsetHigh = location >> 32;
615 ov.hEvent = 0;
616 auto rc = WriteFile(hFile, buffer, (DWORD)nr_bytes, &bytes_written, &ov);
617 if (!rc) {
618 auto error = LocalFileSystem::GetLastErrorAsString();
619 throw IOException("Could not write file \"%s\" (error in WriteFile): %s", handle.path, error);
620 }
621 return bytes_written;
622}
623
624void LocalFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) {
625 HANDLE hFile = ((WindowsFileHandle &)handle).fd;
626 auto bytes_written = FSInternalWrite(handle, hFile, buffer, nr_bytes, location);
627 if (bytes_written != nr_bytes) {
628 throw IOException("Could not write all bytes from file \"%s\": wanted=%lld wrote=%lld", handle.path, nr_bytes,
629 bytes_written);
630 }
631}
632
633int64_t LocalFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) {
634 HANDLE hFile = ((WindowsFileHandle &)handle).fd;
635 auto &pos = ((WindowsFileHandle &)handle).position;
636 auto bytes_written = FSInternalWrite(handle, hFile, buffer, nr_bytes, pos);
637 pos += bytes_written;
638 return bytes_written;
639}
640
641int64_t LocalFileSystem::GetFileSize(FileHandle &handle) {
642 HANDLE hFile = ((WindowsFileHandle &)handle).fd;
643 LARGE_INTEGER result;
644 if (!GetFileSizeEx(hFile, &result)) {
645 return -1;
646 }
647 return result.QuadPart;
648}
649
650time_t LocalFileSystem::GetLastModifiedTime(FileHandle &handle) {
651 HANDLE hFile = ((WindowsFileHandle &)handle).fd;
652
653 // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfiletime
654 FILETIME last_write;
655 if (GetFileTime(hFile, nullptr, nullptr, &last_write) == 0) {
656 return -1;
657 }
658
659 // https://stackoverflow.com/questions/29266743/what-is-dwlowdatetime-and-dwhighdatetime
660 ULARGE_INTEGER ul;
661 ul.LowPart = last_write.dwLowDateTime;
662 ul.HighPart = last_write.dwHighDateTime;
663 int64_t fileTime64 = ul.QuadPart;
664
665 // fileTime64 contains a 64-bit value representing the number of
666 // 100-nanosecond intervals since January 1, 1601 (UTC).
667 // https://docs.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-filetime
668
669 // Adapted from: https://stackoverflow.com/questions/6161776/convert-windows-filetime-to-second-in-unix-linux
670 const auto WINDOWS_TICK = 10000000;
671 const auto SEC_TO_UNIX_EPOCH = 11644473600LL;
672 time_t result = (fileTime64 / WINDOWS_TICK - SEC_TO_UNIX_EPOCH);
673 return result;
674}
675
676void LocalFileSystem::Truncate(FileHandle &handle, int64_t new_size) {
677 HANDLE hFile = ((WindowsFileHandle &)handle).fd;
678 // seek to the location
679 SetFilePointer(handle, new_size);
680 // now set the end of file position
681 if (!SetEndOfFile(hFile)) {
682 auto error = LocalFileSystem::GetLastErrorAsString();
683 throw IOException("Failure in SetEndOfFile call on file \"%s\": %s", handle.path, error);
684 }
685}
686
687static DWORD WindowsGetFileAttributes(const string &filename) {
688 auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str());
689 return GetFileAttributesW(unicode_path.c_str());
690}
691
692bool LocalFileSystem::DirectoryExists(const string &directory) {
693 DWORD attrs = WindowsGetFileAttributes(directory);
694 return (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_DIRECTORY));
695}
696
697void LocalFileSystem::CreateDirectory(const string &directory) {
698 if (DirectoryExists(directory)) {
699 return;
700 }
701 auto unicode_path = WindowsUtil::UTF8ToUnicode(directory.c_str());
702 if (directory.empty() || !CreateDirectoryW(unicode_path.c_str(), NULL) || !DirectoryExists(directory)) {
703 throw IOException("Could not create directory!");
704 }
705}
706
707static void DeleteDirectoryRecursive(FileSystem &fs, string directory) {
708 fs.ListFiles(directory, [&](const string &fname, bool is_directory) {
709 if (is_directory) {
710 DeleteDirectoryRecursive(fs, fs.JoinPath(directory, fname));
711 } else {
712 fs.RemoveFile(fs.JoinPath(directory, fname));
713 }
714 });
715 auto unicode_path = WindowsUtil::UTF8ToUnicode(directory.c_str());
716 if (!RemoveDirectoryW(unicode_path.c_str())) {
717 auto error = LocalFileSystem::GetLastErrorAsString();
718 throw IOException("Failed to delete directory \"%s\": %s", directory, error);
719 }
720}
721
722void LocalFileSystem::RemoveDirectory(const string &directory) {
723 if (FileExists(directory)) {
724 throw IOException("Attempting to delete directory \"%s\", but it is a file and not a directory!", directory);
725 }
726 if (!DirectoryExists(directory)) {
727 return;
728 }
729 DeleteDirectoryRecursive(*this, directory.c_str());
730}
731
732void LocalFileSystem::RemoveFile(const string &filename) {
733 auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str());
734 if (!DeleteFileW(unicode_path.c_str())) {
735 auto error = LocalFileSystem::GetLastErrorAsString();
736 throw IOException("Failed to delete file \"%s\": %s", filename, error);
737 }
738}
739
740bool LocalFileSystem::ListFiles(const string &directory, const std::function<void(const string &, bool)> &callback,
741 FileOpener *opener) {
742 string search_dir = JoinPath(directory, "*");
743
744 auto unicode_path = WindowsUtil::UTF8ToUnicode(search_dir.c_str());
745
746 WIN32_FIND_DATAW ffd;
747 HANDLE hFind = FindFirstFileW(unicode_path.c_str(), &ffd);
748 if (hFind == INVALID_HANDLE_VALUE) {
749 return false;
750 }
751 do {
752 string cFileName = WindowsUtil::UnicodeToUTF8(ffd.cFileName);
753 if (cFileName == "." || cFileName == "..") {
754 continue;
755 }
756 callback(cFileName, ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY);
757 } while (FindNextFileW(hFind, &ffd) != 0);
758
759 DWORD dwError = GetLastError();
760 if (dwError != ERROR_NO_MORE_FILES) {
761 FindClose(hFind);
762 return false;
763 }
764
765 FindClose(hFind);
766 return true;
767}
768
769void LocalFileSystem::FileSync(FileHandle &handle) {
770 HANDLE hFile = ((WindowsFileHandle &)handle).fd;
771 if (FlushFileBuffers(hFile) == 0) {
772 throw IOException("Could not flush file handle to disk!");
773 }
774}
775
776void LocalFileSystem::MoveFile(const string &source, const string &target) {
777 auto source_unicode = WindowsUtil::UTF8ToUnicode(source.c_str());
778 auto target_unicode = WindowsUtil::UTF8ToUnicode(target.c_str());
779 if (!MoveFileW(source_unicode.c_str(), target_unicode.c_str())) {
780 throw IOException("Could not move file: %s", GetLastErrorAsString());
781 }
782}
783
784FileType LocalFileSystem::GetFileType(FileHandle &handle) {
785 auto path = ((WindowsFileHandle &)handle).path;
786 // pipes in windows are just files in '\\.\pipe\' folder
787 if (strncmp(path.c_str(), PIPE_PREFIX, strlen(PIPE_PREFIX)) == 0) {
788 return FileType::FILE_TYPE_FIFO;
789 }
790 DWORD attrs = WindowsGetFileAttributes(path.c_str());
791 if (attrs != INVALID_FILE_ATTRIBUTES) {
792 if (attrs & FILE_ATTRIBUTE_DIRECTORY) {
793 return FileType::FILE_TYPE_DIR;
794 } else {
795 return FileType::FILE_TYPE_REGULAR;
796 }
797 }
798 return FileType::FILE_TYPE_INVALID;
799}
800#endif
801
802bool LocalFileSystem::CanSeek() {
803 return true;
804}
805
806bool LocalFileSystem::OnDiskFile(FileHandle &handle) {
807 return true;
808}
809
810void LocalFileSystem::Seek(FileHandle &handle, idx_t location) {
811 if (!CanSeek()) {
812 throw IOException("Cannot seek in files of this type");
813 }
814 SetFilePointer(handle, location);
815}
816
817idx_t LocalFileSystem::SeekPosition(FileHandle &handle) {
818 if (!CanSeek()) {
819 throw IOException("Cannot seek in files of this type");
820 }
821 return GetFilePointer(handle);
822}
823
824static bool IsCrawl(const string &glob) {
825 // glob must match exactly
826 return glob == "**";
827}
828static bool HasMultipleCrawl(const vector<string> &splits) {
829 return std::count(first: splits.begin(), last: splits.end(), value: "**") > 1;
830}
831static bool IsSymbolicLink(const string &path) {
832#ifndef _WIN32
833 struct stat status;
834 return (lstat(file: path.c_str(), buf: &status) != -1 && S_ISLNK(status.st_mode));
835#else
836 auto attributes = WindowsGetFileAttributes(path);
837 if (attributes == INVALID_FILE_ATTRIBUTES)
838 return false;
839 return attributes & FILE_ATTRIBUTE_REPARSE_POINT;
840#endif
841}
842
843static void RecursiveGlobDirectories(FileSystem &fs, const string &path, vector<string> &result, bool match_directory,
844 bool join_path) {
845
846 fs.ListFiles(directory: path, callback: [&](const string &fname, bool is_directory) {
847 string concat;
848 if (join_path) {
849 concat = fs.JoinPath(a: path, b: fname);
850 } else {
851 concat = fname;
852 }
853 if (IsSymbolicLink(path: concat)) {
854 return;
855 }
856 if (is_directory == match_directory) {
857 result.push_back(x: concat);
858 }
859 if (is_directory) {
860 RecursiveGlobDirectories(fs, path: concat, result, match_directory, join_path: true);
861 }
862 });
863}
864
865static void GlobFilesInternal(FileSystem &fs, const string &path, const string &glob, bool match_directory,
866 vector<string> &result, bool join_path) {
867 fs.ListFiles(directory: path, callback: [&](const string &fname, bool is_directory) {
868 if (is_directory != match_directory) {
869 return;
870 }
871 if (LikeFun::Glob(s: fname.c_str(), slen: fname.size(), pattern: glob.c_str(), plen: glob.size())) {
872 if (join_path) {
873 result.push_back(x: fs.JoinPath(a: path, b: fname));
874 } else {
875 result.push_back(x: fname);
876 }
877 }
878 });
879}
880
881vector<string> LocalFileSystem::FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path) {
882 vector<string> result;
883 if (FileExists(filename: path) || IsPipe(filename: path)) {
884 result.push_back(x: path);
885 } else if (!absolute_path) {
886 Value value;
887 if (opener && opener->TryGetCurrentSetting(key: "file_search_path", result&: value)) {
888 auto search_paths_str = value.ToString();
889 vector<std::string> search_paths = StringUtil::Split(str: search_paths_str, delimiter: ',');
890 for (const auto &search_path : search_paths) {
891 auto joined_path = JoinPath(a: search_path, b: path);
892 if (FileExists(filename: joined_path) || IsPipe(filename: joined_path)) {
893 result.push_back(x: joined_path);
894 }
895 }
896 }
897 }
898 return result;
899}
900
901vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
902 if (path.empty()) {
903 return vector<string>();
904 }
905 // split up the path into separate chunks
906 vector<string> splits;
907 idx_t last_pos = 0;
908 for (idx_t i = 0; i < path.size(); i++) {
909 if (path[i] == '\\' || path[i] == '/') {
910 if (i == last_pos) {
911 // empty: skip this position
912 last_pos = i + 1;
913 continue;
914 }
915 if (splits.empty()) {
916 splits.push_back(x: path.substr(pos: 0, n: i));
917 } else {
918 splits.push_back(x: path.substr(pos: last_pos, n: i - last_pos));
919 }
920 last_pos = i + 1;
921 }
922 }
923 splits.push_back(x: path.substr(pos: last_pos, n: path.size() - last_pos));
924 // handle absolute paths
925 bool absolute_path = false;
926 if (path[0] == '/') {
927 // first character is a slash - unix absolute path
928 absolute_path = true;
929 } else if (StringUtil::Contains(haystack: splits[0], needle: ":")) {
930 // first split has a colon - windows absolute path
931 absolute_path = true;
932 } else if (splits[0] == "~") {
933 // starts with home directory
934 auto home_directory = GetHomeDirectory(opener);
935 if (!home_directory.empty()) {
936 absolute_path = true;
937 splits[0] = home_directory;
938 D_ASSERT(path[0] == '~');
939 if (!HasGlob(str: path)) {
940 return Glob(path: home_directory + path.substr(pos: 1));
941 }
942 }
943 }
944 // Check if the path has a glob at all
945 if (!HasGlob(str: path)) {
946 // no glob: return only the file (if it exists or is a pipe)
947 return FetchFileWithoutGlob(path, opener, absolute_path);
948 }
949 vector<string> previous_directories;
950 if (absolute_path) {
951 // for absolute paths, we don't start by scanning the current directory
952 previous_directories.push_back(x: splits[0]);
953 } else {
954 // If file_search_path is set, use those paths as the first glob elements
955 Value value;
956 if (opener && opener->TryGetCurrentSetting(key: "file_search_path", result&: value)) {
957 auto search_paths_str = value.ToString();
958 vector<std::string> search_paths = StringUtil::Split(str: search_paths_str, delimiter: ',');
959 for (const auto &search_path : search_paths) {
960 previous_directories.push_back(x: search_path);
961 }
962 }
963 }
964
965 if (HasMultipleCrawl(splits)) {
966 throw IOException("Cannot use multiple \'**\' in one path");
967 }
968
969 for (idx_t i = absolute_path ? 1 : 0; i < splits.size(); i++) {
970 bool is_last_chunk = i + 1 == splits.size();
971 bool has_glob = HasGlob(str: splits[i]);
972 // if it's the last chunk we need to find files, otherwise we find directories
973 // not the last chunk: gather a list of all directories that match the glob pattern
974 vector<string> result;
975 if (!has_glob) {
976 // no glob, just append as-is
977 if (previous_directories.empty()) {
978 result.push_back(x: splits[i]);
979 } else {
980 if (is_last_chunk) {
981 for (auto &prev_directory : previous_directories) {
982 const string filename = JoinPath(a: prev_directory, b: splits[i]);
983 if (FileExists(filename) || DirectoryExists(directory: filename)) {
984 result.push_back(x: filename);
985 }
986 }
987 } else {
988 for (auto &prev_directory : previous_directories) {
989 result.push_back(x: JoinPath(a: prev_directory, b: splits[i]));
990 }
991 }
992 }
993 } else {
994 if (IsCrawl(glob: splits[i])) {
995 if (!is_last_chunk) {
996 result = previous_directories;
997 }
998 if (previous_directories.empty()) {
999 RecursiveGlobDirectories(fs&: *this, path: ".", result, match_directory: !is_last_chunk, join_path: false);
1000 } else {
1001 for (auto &prev_dir : previous_directories) {
1002 RecursiveGlobDirectories(fs&: *this, path: prev_dir, result, match_directory: !is_last_chunk, join_path: true);
1003 }
1004 }
1005 } else {
1006 if (previous_directories.empty()) {
1007 // no previous directories: list in the current path
1008 GlobFilesInternal(fs&: *this, path: ".", glob: splits[i], match_directory: !is_last_chunk, result, join_path: false);
1009 } else {
1010 // previous directories
1011 // we iterate over each of the previous directories, and apply the glob of the current directory
1012 for (auto &prev_directory : previous_directories) {
1013 GlobFilesInternal(fs&: *this, path: prev_directory, glob: splits[i], match_directory: !is_last_chunk, result, join_path: true);
1014 }
1015 }
1016 }
1017 }
1018 if (result.empty()) {
1019 // no result found that matches the glob
1020 // last ditch effort: search the path as a string literal
1021 return FetchFileWithoutGlob(path, opener, absolute_path);
1022 }
1023 if (is_last_chunk) {
1024 return result;
1025 }
1026 previous_directories = std::move(result);
1027 }
1028 return vector<string>();
1029}
1030
1031unique_ptr<FileSystem> FileSystem::CreateLocal() {
1032 return make_uniq<LocalFileSystem>();
1033}
1034
1035} // namespace duckdb
1036