1 | #include "duckdb/common/local_file_system.hpp" |
2 | |
3 | #include "duckdb/common/checksum.hpp" |
4 | #include "duckdb/common/exception.hpp" |
5 | #include "duckdb/common/file_opener.hpp" |
6 | #include "duckdb/common/helper.hpp" |
7 | #include "duckdb/common/string_util.hpp" |
8 | #include "duckdb/common/windows.hpp" |
9 | #include "duckdb/function/scalar/string_functions.hpp" |
10 | #include "duckdb/main/client_context.hpp" |
11 | #include "duckdb/main/database.hpp" |
12 | |
13 | #include <cstdint> |
14 | #include <cstdio> |
15 | #include <sys/stat.h> |
16 | |
17 | #ifndef _WIN32 |
18 | #include <dirent.h> |
19 | #include <fcntl.h> |
20 | #include <string.h> |
21 | #include <sys/types.h> |
22 | #include <unistd.h> |
23 | #else |
24 | #include "duckdb/common/windows_util.hpp" |
25 | |
26 | #include <io.h> |
27 | #include <string> |
28 | |
29 | #ifdef __MINGW32__ |
30 | // need to manually define this for mingw |
31 | extern "C" WINBASEAPI BOOL WINAPI GetPhysicallyInstalledSystemMemory(PULONGLONG); |
32 | #endif |
33 | |
34 | #undef FILE_CREATE // woo mingw |
35 | #endif |
36 | |
37 | namespace duckdb { |
38 | |
39 | static void AssertValidFileFlags(uint8_t flags) { |
40 | #ifdef DEBUG |
41 | bool is_read = flags & FileFlags::FILE_FLAGS_READ; |
42 | bool is_write = flags & FileFlags::FILE_FLAGS_WRITE; |
43 | // require either READ or WRITE (or both) |
44 | D_ASSERT(is_read || is_write); |
45 | // CREATE/Append flags require writing |
46 | D_ASSERT(is_write || !(flags & FileFlags::FILE_FLAGS_APPEND)); |
47 | D_ASSERT(is_write || !(flags & FileFlags::FILE_FLAGS_FILE_CREATE)); |
48 | D_ASSERT(is_write || !(flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW)); |
49 | // cannot combine CREATE and CREATE_NEW flags |
50 | D_ASSERT(!(flags & FileFlags::FILE_FLAGS_FILE_CREATE && flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW)); |
51 | #endif |
52 | } |
53 | |
54 | #ifndef _WIN32 |
55 | bool LocalFileSystem::FileExists(const string &filename) { |
56 | if (!filename.empty()) { |
57 | if (access(name: filename.c_str(), type: 0) == 0) { |
58 | struct stat status; |
59 | stat(file: filename.c_str(), buf: &status); |
60 | if (S_ISREG(status.st_mode)) { |
61 | return true; |
62 | } |
63 | } |
64 | } |
65 | // if any condition fails |
66 | return false; |
67 | } |
68 | |
69 | bool LocalFileSystem::IsPipe(const string &filename) { |
70 | if (!filename.empty()) { |
71 | if (access(name: filename.c_str(), type: 0) == 0) { |
72 | struct stat status; |
73 | stat(file: filename.c_str(), buf: &status); |
74 | if (S_ISFIFO(status.st_mode)) { |
75 | return true; |
76 | } |
77 | } |
78 | } |
79 | // if any condition fails |
80 | return false; |
81 | } |
82 | |
83 | #else |
84 | bool LocalFileSystem::FileExists(const string &filename) { |
85 | auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str()); |
86 | const wchar_t *wpath = unicode_path.c_str(); |
87 | if (_waccess(wpath, 0) == 0) { |
88 | struct _stati64 status; |
89 | _wstati64(wpath, &status); |
90 | if (status.st_mode & S_IFREG) { |
91 | return true; |
92 | } |
93 | } |
94 | return false; |
95 | } |
96 | bool LocalFileSystem::IsPipe(const string &filename) { |
97 | auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str()); |
98 | const wchar_t *wpath = unicode_path.c_str(); |
99 | if (_waccess(wpath, 0) == 0) { |
100 | struct _stati64 status; |
101 | _wstati64(wpath, &status); |
102 | if (status.st_mode & _S_IFCHR) { |
103 | return true; |
104 | } |
105 | } |
106 | return false; |
107 | } |
108 | #endif |
109 | |
110 | #ifndef _WIN32 |
111 | // somehow sometimes this is missing |
112 | #ifndef O_CLOEXEC |
113 | #define O_CLOEXEC 0 |
114 | #endif |
115 | |
116 | // Solaris |
117 | #ifndef O_DIRECT |
118 | #define O_DIRECT 0 |
119 | #endif |
120 | |
121 | struct UnixFileHandle : public FileHandle { |
122 | public: |
123 | UnixFileHandle(FileSystem &file_system, string path, int fd) : FileHandle(file_system, std::move(path)), fd(fd) { |
124 | } |
125 | ~UnixFileHandle() override { |
126 | UnixFileHandle::Close(); |
127 | } |
128 | |
129 | int fd; |
130 | |
131 | public: |
132 | void Close() override { |
133 | if (fd != -1) { |
134 | close(fd: fd); |
135 | fd = -1; |
136 | } |
137 | }; |
138 | }; |
139 | |
140 | static FileType GetFileTypeInternal(int fd) { // LCOV_EXCL_START |
141 | struct stat s; |
142 | if (fstat(fd: fd, buf: &s) == -1) { |
143 | return FileType::FILE_TYPE_INVALID; |
144 | } |
145 | switch (s.st_mode & S_IFMT) { |
146 | case S_IFBLK: |
147 | return FileType::FILE_TYPE_BLOCKDEV; |
148 | case S_IFCHR: |
149 | return FileType::FILE_TYPE_CHARDEV; |
150 | case S_IFIFO: |
151 | return FileType::FILE_TYPE_FIFO; |
152 | case S_IFDIR: |
153 | return FileType::FILE_TYPE_DIR; |
154 | case S_IFLNK: |
155 | return FileType::FILE_TYPE_LINK; |
156 | case S_IFREG: |
157 | return FileType::FILE_TYPE_REGULAR; |
158 | case S_IFSOCK: |
159 | return FileType::FILE_TYPE_SOCKET; |
160 | default: |
161 | return FileType::FILE_TYPE_INVALID; |
162 | } |
163 | } // LCOV_EXCL_STOP |
164 | |
165 | unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, uint8_t flags, FileLockType lock_type, |
166 | FileCompressionType compression, FileOpener *opener) { |
167 | auto path = FileSystem::ExpandPath(path: path_p, opener); |
168 | if (compression != FileCompressionType::UNCOMPRESSED) { |
169 | throw NotImplementedException("Unsupported compression type for default file system" ); |
170 | } |
171 | |
172 | AssertValidFileFlags(flags); |
173 | |
174 | int open_flags = 0; |
175 | int rc; |
176 | bool open_read = flags & FileFlags::FILE_FLAGS_READ; |
177 | bool open_write = flags & FileFlags::FILE_FLAGS_WRITE; |
178 | if (open_read && open_write) { |
179 | open_flags = O_RDWR; |
180 | } else if (open_read) { |
181 | open_flags = O_RDONLY; |
182 | } else if (open_write) { |
183 | open_flags = O_WRONLY; |
184 | } else { |
185 | throw InternalException("READ, WRITE or both should be specified when opening a file" ); |
186 | } |
187 | if (open_write) { |
188 | // need Read or Write |
189 | D_ASSERT(flags & FileFlags::FILE_FLAGS_WRITE); |
190 | open_flags |= O_CLOEXEC; |
191 | if (flags & FileFlags::FILE_FLAGS_FILE_CREATE) { |
192 | open_flags |= O_CREAT; |
193 | } else if (flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW) { |
194 | open_flags |= O_CREAT | O_TRUNC; |
195 | } |
196 | if (flags & FileFlags::FILE_FLAGS_APPEND) { |
197 | open_flags |= O_APPEND; |
198 | } |
199 | } |
200 | if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) { |
201 | #if defined(__sun) && defined(__SVR4) |
202 | throw Exception("DIRECT_IO not supported on Solaris" ); |
203 | #endif |
204 | #if defined(__DARWIN__) || defined(__APPLE__) || defined(__OpenBSD__) |
205 | // OSX does not have O_DIRECT, instead we need to use fcntl afterwards to support direct IO |
206 | open_flags |= O_SYNC; |
207 | #else |
208 | open_flags |= O_DIRECT | O_SYNC; |
209 | #endif |
210 | } |
211 | int fd = open(file: path.c_str(), oflag: open_flags, 0666); |
212 | if (fd == -1) { |
213 | throw IOException("Cannot open file \"%s\": %s" , path, strerror(errno)); |
214 | } |
215 | // #if defined(__DARWIN__) || defined(__APPLE__) |
216 | // if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) { |
217 | // // OSX requires fcntl for Direct IO |
218 | // rc = fcntl(fd, F_NOCACHE, 1); |
219 | // if (fd == -1) { |
220 | // throw IOException("Could not enable direct IO for file \"%s\": %s", path, strerror(errno)); |
221 | // } |
222 | // } |
223 | // #endif |
224 | if (lock_type != FileLockType::NO_LOCK) { |
225 | // set lock on file |
226 | // but only if it is not an input/output stream |
227 | auto file_type = GetFileTypeInternal(fd); |
228 | if (file_type != FileType::FILE_TYPE_FIFO && file_type != FileType::FILE_TYPE_SOCKET) { |
229 | struct flock fl; |
230 | memset(s: &fl, c: 0, n: sizeof fl); |
231 | fl.l_type = lock_type == FileLockType::READ_LOCK ? F_RDLCK : F_WRLCK; |
232 | fl.l_whence = SEEK_SET; |
233 | fl.l_start = 0; |
234 | fl.l_len = 0; |
235 | rc = fcntl(fd: fd, F_SETLK, &fl); |
236 | if (rc == -1) { |
237 | throw IOException("Could not set lock on file \"%s\": %s" , path, strerror(errno)); |
238 | } |
239 | } |
240 | } |
241 | return make_uniq<UnixFileHandle>(args&: *this, args&: path, args&: fd); |
242 | } |
243 | |
244 | void LocalFileSystem::SetFilePointer(FileHandle &handle, idx_t location) { |
245 | int fd = handle.Cast<UnixFileHandle>().fd; |
246 | off_t offset = lseek(fd: fd, offset: location, SEEK_SET); |
247 | if (offset == (off_t)-1) { |
248 | throw IOException("Could not seek to location %lld for file \"%s\": %s" , location, handle.path, |
249 | strerror(errno)); |
250 | } |
251 | } |
252 | |
253 | idx_t LocalFileSystem::GetFilePointer(FileHandle &handle) { |
254 | int fd = handle.Cast<UnixFileHandle>().fd; |
255 | off_t position = lseek(fd: fd, offset: 0, SEEK_CUR); |
256 | if (position == (off_t)-1) { |
257 | throw IOException("Could not get file position file \"%s\": %s" , handle.path, strerror(errno)); |
258 | } |
259 | return position; |
260 | } |
261 | |
262 | void LocalFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { |
263 | int fd = handle.Cast<UnixFileHandle>().fd; |
264 | int64_t bytes_read = pread(fd: fd, buf: buffer, nbytes: nr_bytes, offset: location); |
265 | if (bytes_read == -1) { |
266 | throw IOException("Could not read from file \"%s\": %s" , handle.path, strerror(errno)); |
267 | } |
268 | if (bytes_read != nr_bytes) { |
269 | throw IOException("Could not read all bytes from file \"%s\": wanted=%lld read=%lld" , handle.path, nr_bytes, |
270 | bytes_read); |
271 | } |
272 | } |
273 | |
274 | int64_t LocalFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { |
275 | int fd = handle.Cast<UnixFileHandle>().fd; |
276 | int64_t bytes_read = read(fd: fd, buf: buffer, nbytes: nr_bytes); |
277 | if (bytes_read == -1) { |
278 | throw IOException("Could not read from file \"%s\": %s" , handle.path, strerror(errno)); |
279 | } |
280 | return bytes_read; |
281 | } |
282 | |
283 | void LocalFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { |
284 | int fd = handle.Cast<UnixFileHandle>().fd; |
285 | int64_t bytes_written = pwrite(fd: fd, buf: buffer, n: nr_bytes, offset: location); |
286 | if (bytes_written == -1) { |
287 | throw IOException("Could not write file \"%s\": %s" , handle.path, strerror(errno)); |
288 | } |
289 | if (bytes_written != nr_bytes) { |
290 | throw IOException("Could not write all bytes to file \"%s\": wanted=%lld wrote=%lld" , handle.path, nr_bytes, |
291 | bytes_written); |
292 | } |
293 | } |
294 | |
295 | int64_t LocalFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { |
296 | int fd = handle.Cast<UnixFileHandle>().fd; |
297 | int64_t bytes_written = write(fd: fd, buf: buffer, n: nr_bytes); |
298 | if (bytes_written == -1) { |
299 | throw IOException("Could not write file \"%s\": %s" , handle.path, strerror(errno)); |
300 | } |
301 | return bytes_written; |
302 | } |
303 | |
304 | int64_t LocalFileSystem::GetFileSize(FileHandle &handle) { |
305 | int fd = handle.Cast<UnixFileHandle>().fd; |
306 | struct stat s; |
307 | if (fstat(fd: fd, buf: &s) == -1) { |
308 | return -1; |
309 | } |
310 | return s.st_size; |
311 | } |
312 | |
313 | time_t LocalFileSystem::GetLastModifiedTime(FileHandle &handle) { |
314 | int fd = handle.Cast<UnixFileHandle>().fd; |
315 | struct stat s; |
316 | if (fstat(fd: fd, buf: &s) == -1) { |
317 | return -1; |
318 | } |
319 | return s.st_mtime; |
320 | } |
321 | |
322 | FileType LocalFileSystem::GetFileType(FileHandle &handle) { |
323 | int fd = handle.Cast<UnixFileHandle>().fd; |
324 | return GetFileTypeInternal(fd); |
325 | } |
326 | |
327 | void LocalFileSystem::Truncate(FileHandle &handle, int64_t new_size) { |
328 | int fd = handle.Cast<UnixFileHandle>().fd; |
329 | if (ftruncate(fd: fd, length: new_size) != 0) { |
330 | throw IOException("Could not truncate file \"%s\": %s" , handle.path, strerror(errno)); |
331 | } |
332 | } |
333 | |
334 | bool LocalFileSystem::DirectoryExists(const string &directory) { |
335 | if (!directory.empty()) { |
336 | if (access(name: directory.c_str(), type: 0) == 0) { |
337 | struct stat status; |
338 | stat(file: directory.c_str(), buf: &status); |
339 | if (status.st_mode & S_IFDIR) { |
340 | return true; |
341 | } |
342 | } |
343 | } |
344 | // if any condition fails |
345 | return false; |
346 | } |
347 | |
348 | void LocalFileSystem::CreateDirectory(const string &directory) { |
349 | struct stat st; |
350 | |
351 | if (stat(file: directory.c_str(), buf: &st) != 0) { |
352 | /* Directory does not exist. EEXIST for race condition */ |
353 | if (mkdir(path: directory.c_str(), mode: 0755) != 0 && errno != EEXIST) { |
354 | throw IOException("Failed to create directory \"%s\"!" , directory); |
355 | } |
356 | } else if (!S_ISDIR(st.st_mode)) { |
357 | throw IOException("Failed to create directory \"%s\": path exists but is not a directory!" , directory); |
358 | } |
359 | } |
360 | |
361 | int RemoveDirectoryRecursive(const char *path) { |
362 | DIR *d = opendir(name: path); |
363 | idx_t path_len = (idx_t)strlen(s: path); |
364 | int r = -1; |
365 | |
366 | if (d) { |
367 | struct dirent *p; |
368 | r = 0; |
369 | while (!r && (p = readdir(dirp: d))) { |
370 | int r2 = -1; |
371 | char *buf; |
372 | idx_t len; |
373 | /* Skip the names "." and ".." as we don't want to recurse on them. */ |
374 | if (!strcmp(s1: p->d_name, s2: "." ) || !strcmp(s1: p->d_name, s2: ".." )) { |
375 | continue; |
376 | } |
377 | len = path_len + (idx_t)strlen(s: p->d_name) + 2; |
378 | buf = new char[len]; |
379 | if (buf) { |
380 | struct stat statbuf; |
381 | snprintf(s: buf, maxlen: len, format: "%s/%s" , path, p->d_name); |
382 | if (!stat(file: buf, buf: &statbuf)) { |
383 | if (S_ISDIR(statbuf.st_mode)) { |
384 | r2 = RemoveDirectoryRecursive(path: buf); |
385 | } else { |
386 | r2 = unlink(name: buf); |
387 | } |
388 | } |
389 | delete[] buf; |
390 | } |
391 | r = r2; |
392 | } |
393 | closedir(dirp: d); |
394 | } |
395 | if (!r) { |
396 | r = rmdir(path: path); |
397 | } |
398 | return r; |
399 | } |
400 | |
401 | void LocalFileSystem::RemoveDirectory(const string &directory) { |
402 | RemoveDirectoryRecursive(path: directory.c_str()); |
403 | } |
404 | |
405 | void LocalFileSystem::RemoveFile(const string &filename) { |
406 | if (std::remove(filename: filename.c_str()) != 0) { |
407 | throw IOException("Could not remove file \"%s\": %s" , filename, strerror(errno)); |
408 | } |
409 | } |
410 | |
411 | bool LocalFileSystem::ListFiles(const string &directory, const std::function<void(const string &, bool)> &callback, |
412 | FileOpener *opener) { |
413 | if (!DirectoryExists(directory)) { |
414 | return false; |
415 | } |
416 | DIR *dir = opendir(name: directory.c_str()); |
417 | if (!dir) { |
418 | return false; |
419 | } |
420 | struct dirent *ent; |
421 | // loop over all files in the directory |
422 | while ((ent = readdir(dirp: dir)) != nullptr) { |
423 | string name = string(ent->d_name); |
424 | // skip . .. and empty files |
425 | if (name.empty() || name == "." || name == ".." ) { |
426 | continue; |
427 | } |
428 | // now stat the file to figure out if it is a regular file or directory |
429 | string full_path = JoinPath(a: directory, b: name); |
430 | if (access(name: full_path.c_str(), type: 0) != 0) { |
431 | continue; |
432 | } |
433 | struct stat status; |
434 | stat(file: full_path.c_str(), buf: &status); |
435 | if (!(status.st_mode & S_IFREG) && !(status.st_mode & S_IFDIR)) { |
436 | // not a file or directory: skip |
437 | continue; |
438 | } |
439 | // invoke callback |
440 | callback(name, status.st_mode & S_IFDIR); |
441 | } |
442 | closedir(dirp: dir); |
443 | return true; |
444 | } |
445 | |
446 | void LocalFileSystem::FileSync(FileHandle &handle) { |
447 | int fd = handle.Cast<UnixFileHandle>().fd; |
448 | if (fsync(fd: fd) != 0) { |
449 | throw FatalException("fsync failed!" ); |
450 | } |
451 | } |
452 | |
453 | void LocalFileSystem::MoveFile(const string &source, const string &target) { |
454 | //! FIXME: rename does not guarantee atomicity or overwriting target file if it exists |
455 | if (rename(old: source.c_str(), new: target.c_str()) != 0) { |
456 | throw IOException("Could not rename file!" ); |
457 | } |
458 | } |
459 | |
460 | std::string LocalFileSystem::GetLastErrorAsString() { |
461 | return string(); |
462 | } |
463 | |
464 | #else |
465 | |
466 | constexpr char PIPE_PREFIX[] = "\\\\.\\pipe\\" ; |
467 | |
468 | // Returns the last Win32 error, in string format. Returns an empty string if there is no error. |
469 | std::string LocalFileSystem::GetLastErrorAsString() { |
470 | // Get the error message, if any. |
471 | DWORD errorMessageID = GetLastError(); |
472 | if (errorMessageID == 0) |
473 | return std::string(); // No error message has been recorded |
474 | |
475 | LPSTR messageBuffer = nullptr; |
476 | idx_t size = |
477 | FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, |
478 | NULL, errorMessageID, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); |
479 | |
480 | std::string message(messageBuffer, size); |
481 | |
482 | // Free the buffer. |
483 | LocalFree(messageBuffer); |
484 | |
485 | return message; |
486 | } |
487 | |
488 | struct WindowsFileHandle : public FileHandle { |
489 | public: |
490 | WindowsFileHandle(FileSystem &file_system, string path, HANDLE fd) |
491 | : FileHandle(file_system, path), position(0), fd(fd) { |
492 | } |
493 | ~WindowsFileHandle() override { |
494 | Close(); |
495 | } |
496 | |
497 | idx_t position; |
498 | HANDLE fd; |
499 | |
500 | public: |
501 | void Close() override { |
502 | if (!fd) { |
503 | return; |
504 | } |
505 | CloseHandle(fd); |
506 | fd = nullptr; |
507 | }; |
508 | }; |
509 | |
510 | unique_ptr<FileHandle> LocalFileSystem::OpenFile(const string &path_p, uint8_t flags, FileLockType lock_type, |
511 | FileCompressionType compression, FileOpener *opener) { |
512 | auto path = FileSystem::ExpandPath(path_p, opener); |
513 | if (compression != FileCompressionType::UNCOMPRESSED) { |
514 | throw NotImplementedException("Unsupported compression type for default file system" ); |
515 | } |
516 | AssertValidFileFlags(flags); |
517 | |
518 | DWORD desired_access; |
519 | DWORD share_mode; |
520 | DWORD creation_disposition = OPEN_EXISTING; |
521 | DWORD flags_and_attributes = FILE_ATTRIBUTE_NORMAL; |
522 | bool open_read = flags & FileFlags::FILE_FLAGS_READ; |
523 | bool open_write = flags & FileFlags::FILE_FLAGS_WRITE; |
524 | if (open_read && open_write) { |
525 | desired_access = GENERIC_READ | GENERIC_WRITE; |
526 | share_mode = 0; |
527 | } else if (open_read) { |
528 | desired_access = GENERIC_READ; |
529 | share_mode = FILE_SHARE_READ; |
530 | } else if (open_write) { |
531 | desired_access = GENERIC_WRITE; |
532 | share_mode = 0; |
533 | } else { |
534 | throw InternalException("READ, WRITE or both should be specified when opening a file" ); |
535 | } |
536 | if (open_write) { |
537 | if (flags & FileFlags::FILE_FLAGS_FILE_CREATE) { |
538 | creation_disposition = OPEN_ALWAYS; |
539 | } else if (flags & FileFlags::FILE_FLAGS_FILE_CREATE_NEW) { |
540 | creation_disposition = CREATE_ALWAYS; |
541 | } |
542 | } |
543 | if (flags & FileFlags::FILE_FLAGS_DIRECT_IO) { |
544 | flags_and_attributes |= FILE_FLAG_NO_BUFFERING; |
545 | } |
546 | auto unicode_path = WindowsUtil::UTF8ToUnicode(path.c_str()); |
547 | HANDLE hFile = CreateFileW(unicode_path.c_str(), desired_access, share_mode, NULL, creation_disposition, |
548 | flags_and_attributes, NULL); |
549 | if (hFile == INVALID_HANDLE_VALUE) { |
550 | auto error = LocalFileSystem::GetLastErrorAsString(); |
551 | throw IOException("Cannot open file \"%s\": %s" , path.c_str(), error); |
552 | } |
553 | auto handle = make_uniq<WindowsFileHandle>(*this, path.c_str(), hFile); |
554 | if (flags & FileFlags::FILE_FLAGS_APPEND) { |
555 | auto file_size = GetFileSize(*handle); |
556 | SetFilePointer(*handle, file_size); |
557 | } |
558 | return std::move(handle); |
559 | } |
560 | |
561 | void LocalFileSystem::SetFilePointer(FileHandle &handle, idx_t location) { |
562 | auto &whandle = (WindowsFileHandle &)handle; |
563 | whandle.position = location; |
564 | LARGE_INTEGER wlocation; |
565 | wlocation.QuadPart = location; |
566 | SetFilePointerEx(whandle.fd, wlocation, NULL, FILE_BEGIN); |
567 | } |
568 | |
569 | idx_t LocalFileSystem::GetFilePointer(FileHandle &handle) { |
570 | return ((WindowsFileHandle &)handle).position; |
571 | } |
572 | |
573 | static DWORD FSInternalRead(FileHandle &handle, HANDLE hFile, void *buffer, int64_t nr_bytes, idx_t location) { |
574 | DWORD bytes_read = 0; |
575 | OVERLAPPED ov = {}; |
576 | ov.Internal = 0; |
577 | ov.InternalHigh = 0; |
578 | ov.Offset = location & 0xFFFFFFFF; |
579 | ov.OffsetHigh = location >> 32; |
580 | ov.hEvent = 0; |
581 | auto rc = ReadFile(hFile, buffer, (DWORD)nr_bytes, &bytes_read, &ov); |
582 | if (!rc) { |
583 | auto error = LocalFileSystem::GetLastErrorAsString(); |
584 | throw IOException("Could not read file \"%s\" (error in ReadFile(location: %llu, nr_bytes: %lld)): %s" , |
585 | handle.path, location, nr_bytes, error); |
586 | } |
587 | return bytes_read; |
588 | } |
589 | |
590 | void LocalFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { |
591 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
592 | auto bytes_read = FSInternalRead(handle, hFile, buffer, nr_bytes, location); |
593 | if (bytes_read != nr_bytes) { |
594 | throw IOException("Could not read all bytes from file \"%s\": wanted=%lld read=%lld" , handle.path, nr_bytes, |
595 | bytes_read); |
596 | } |
597 | } |
598 | |
599 | int64_t LocalFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { |
600 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
601 | auto &pos = ((WindowsFileHandle &)handle).position; |
602 | auto n = std::min<idx_t>(std::max<idx_t>(GetFileSize(handle), pos) - pos, nr_bytes); |
603 | auto bytes_read = FSInternalRead(handle, hFile, buffer, n, pos); |
604 | pos += bytes_read; |
605 | return bytes_read; |
606 | } |
607 | |
608 | static DWORD FSInternalWrite(FileHandle &handle, HANDLE hFile, void *buffer, int64_t nr_bytes, idx_t location) { |
609 | DWORD bytes_written = 0; |
610 | OVERLAPPED ov = {}; |
611 | ov.Internal = 0; |
612 | ov.InternalHigh = 0; |
613 | ov.Offset = location & 0xFFFFFFFF; |
614 | ov.OffsetHigh = location >> 32; |
615 | ov.hEvent = 0; |
616 | auto rc = WriteFile(hFile, buffer, (DWORD)nr_bytes, &bytes_written, &ov); |
617 | if (!rc) { |
618 | auto error = LocalFileSystem::GetLastErrorAsString(); |
619 | throw IOException("Could not write file \"%s\" (error in WriteFile): %s" , handle.path, error); |
620 | } |
621 | return bytes_written; |
622 | } |
623 | |
624 | void LocalFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { |
625 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
626 | auto bytes_written = FSInternalWrite(handle, hFile, buffer, nr_bytes, location); |
627 | if (bytes_written != nr_bytes) { |
628 | throw IOException("Could not write all bytes from file \"%s\": wanted=%lld wrote=%lld" , handle.path, nr_bytes, |
629 | bytes_written); |
630 | } |
631 | } |
632 | |
633 | int64_t LocalFileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { |
634 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
635 | auto &pos = ((WindowsFileHandle &)handle).position; |
636 | auto bytes_written = FSInternalWrite(handle, hFile, buffer, nr_bytes, pos); |
637 | pos += bytes_written; |
638 | return bytes_written; |
639 | } |
640 | |
641 | int64_t LocalFileSystem::GetFileSize(FileHandle &handle) { |
642 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
643 | LARGE_INTEGER result; |
644 | if (!GetFileSizeEx(hFile, &result)) { |
645 | return -1; |
646 | } |
647 | return result.QuadPart; |
648 | } |
649 | |
650 | time_t LocalFileSystem::GetLastModifiedTime(FileHandle &handle) { |
651 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
652 | |
653 | // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfiletime |
654 | FILETIME last_write; |
655 | if (GetFileTime(hFile, nullptr, nullptr, &last_write) == 0) { |
656 | return -1; |
657 | } |
658 | |
659 | // https://stackoverflow.com/questions/29266743/what-is-dwlowdatetime-and-dwhighdatetime |
660 | ULARGE_INTEGER ul; |
661 | ul.LowPart = last_write.dwLowDateTime; |
662 | ul.HighPart = last_write.dwHighDateTime; |
663 | int64_t fileTime64 = ul.QuadPart; |
664 | |
665 | // fileTime64 contains a 64-bit value representing the number of |
666 | // 100-nanosecond intervals since January 1, 1601 (UTC). |
667 | // https://docs.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-filetime |
668 | |
669 | // Adapted from: https://stackoverflow.com/questions/6161776/convert-windows-filetime-to-second-in-unix-linux |
670 | const auto WINDOWS_TICK = 10000000; |
671 | const auto SEC_TO_UNIX_EPOCH = 11644473600LL; |
672 | time_t result = (fileTime64 / WINDOWS_TICK - SEC_TO_UNIX_EPOCH); |
673 | return result; |
674 | } |
675 | |
676 | void LocalFileSystem::Truncate(FileHandle &handle, int64_t new_size) { |
677 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
678 | // seek to the location |
679 | SetFilePointer(handle, new_size); |
680 | // now set the end of file position |
681 | if (!SetEndOfFile(hFile)) { |
682 | auto error = LocalFileSystem::GetLastErrorAsString(); |
683 | throw IOException("Failure in SetEndOfFile call on file \"%s\": %s" , handle.path, error); |
684 | } |
685 | } |
686 | |
687 | static DWORD WindowsGetFileAttributes(const string &filename) { |
688 | auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str()); |
689 | return GetFileAttributesW(unicode_path.c_str()); |
690 | } |
691 | |
692 | bool LocalFileSystem::DirectoryExists(const string &directory) { |
693 | DWORD attrs = WindowsGetFileAttributes(directory); |
694 | return (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_DIRECTORY)); |
695 | } |
696 | |
697 | void LocalFileSystem::CreateDirectory(const string &directory) { |
698 | if (DirectoryExists(directory)) { |
699 | return; |
700 | } |
701 | auto unicode_path = WindowsUtil::UTF8ToUnicode(directory.c_str()); |
702 | if (directory.empty() || !CreateDirectoryW(unicode_path.c_str(), NULL) || !DirectoryExists(directory)) { |
703 | throw IOException("Could not create directory!" ); |
704 | } |
705 | } |
706 | |
707 | static void DeleteDirectoryRecursive(FileSystem &fs, string directory) { |
708 | fs.ListFiles(directory, [&](const string &fname, bool is_directory) { |
709 | if (is_directory) { |
710 | DeleteDirectoryRecursive(fs, fs.JoinPath(directory, fname)); |
711 | } else { |
712 | fs.RemoveFile(fs.JoinPath(directory, fname)); |
713 | } |
714 | }); |
715 | auto unicode_path = WindowsUtil::UTF8ToUnicode(directory.c_str()); |
716 | if (!RemoveDirectoryW(unicode_path.c_str())) { |
717 | auto error = LocalFileSystem::GetLastErrorAsString(); |
718 | throw IOException("Failed to delete directory \"%s\": %s" , directory, error); |
719 | } |
720 | } |
721 | |
722 | void LocalFileSystem::RemoveDirectory(const string &directory) { |
723 | if (FileExists(directory)) { |
724 | throw IOException("Attempting to delete directory \"%s\", but it is a file and not a directory!" , directory); |
725 | } |
726 | if (!DirectoryExists(directory)) { |
727 | return; |
728 | } |
729 | DeleteDirectoryRecursive(*this, directory.c_str()); |
730 | } |
731 | |
732 | void LocalFileSystem::RemoveFile(const string &filename) { |
733 | auto unicode_path = WindowsUtil::UTF8ToUnicode(filename.c_str()); |
734 | if (!DeleteFileW(unicode_path.c_str())) { |
735 | auto error = LocalFileSystem::GetLastErrorAsString(); |
736 | throw IOException("Failed to delete file \"%s\": %s" , filename, error); |
737 | } |
738 | } |
739 | |
740 | bool LocalFileSystem::ListFiles(const string &directory, const std::function<void(const string &, bool)> &callback, |
741 | FileOpener *opener) { |
742 | string search_dir = JoinPath(directory, "*" ); |
743 | |
744 | auto unicode_path = WindowsUtil::UTF8ToUnicode(search_dir.c_str()); |
745 | |
746 | WIN32_FIND_DATAW ffd; |
747 | HANDLE hFind = FindFirstFileW(unicode_path.c_str(), &ffd); |
748 | if (hFind == INVALID_HANDLE_VALUE) { |
749 | return false; |
750 | } |
751 | do { |
752 | string cFileName = WindowsUtil::UnicodeToUTF8(ffd.cFileName); |
753 | if (cFileName == "." || cFileName == ".." ) { |
754 | continue; |
755 | } |
756 | callback(cFileName, ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY); |
757 | } while (FindNextFileW(hFind, &ffd) != 0); |
758 | |
759 | DWORD dwError = GetLastError(); |
760 | if (dwError != ERROR_NO_MORE_FILES) { |
761 | FindClose(hFind); |
762 | return false; |
763 | } |
764 | |
765 | FindClose(hFind); |
766 | return true; |
767 | } |
768 | |
769 | void LocalFileSystem::FileSync(FileHandle &handle) { |
770 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
771 | if (FlushFileBuffers(hFile) == 0) { |
772 | throw IOException("Could not flush file handle to disk!" ); |
773 | } |
774 | } |
775 | |
776 | void LocalFileSystem::MoveFile(const string &source, const string &target) { |
777 | auto source_unicode = WindowsUtil::UTF8ToUnicode(source.c_str()); |
778 | auto target_unicode = WindowsUtil::UTF8ToUnicode(target.c_str()); |
779 | if (!MoveFileW(source_unicode.c_str(), target_unicode.c_str())) { |
780 | throw IOException("Could not move file: %s" , GetLastErrorAsString()); |
781 | } |
782 | } |
783 | |
784 | FileType LocalFileSystem::GetFileType(FileHandle &handle) { |
785 | auto path = ((WindowsFileHandle &)handle).path; |
786 | // pipes in windows are just files in '\\.\pipe\' folder |
787 | if (strncmp(path.c_str(), PIPE_PREFIX, strlen(PIPE_PREFIX)) == 0) { |
788 | return FileType::FILE_TYPE_FIFO; |
789 | } |
790 | DWORD attrs = WindowsGetFileAttributes(path.c_str()); |
791 | if (attrs != INVALID_FILE_ATTRIBUTES) { |
792 | if (attrs & FILE_ATTRIBUTE_DIRECTORY) { |
793 | return FileType::FILE_TYPE_DIR; |
794 | } else { |
795 | return FileType::FILE_TYPE_REGULAR; |
796 | } |
797 | } |
798 | return FileType::FILE_TYPE_INVALID; |
799 | } |
800 | #endif |
801 | |
802 | bool LocalFileSystem::CanSeek() { |
803 | return true; |
804 | } |
805 | |
806 | bool LocalFileSystem::OnDiskFile(FileHandle &handle) { |
807 | return true; |
808 | } |
809 | |
810 | void LocalFileSystem::Seek(FileHandle &handle, idx_t location) { |
811 | if (!CanSeek()) { |
812 | throw IOException("Cannot seek in files of this type" ); |
813 | } |
814 | SetFilePointer(handle, location); |
815 | } |
816 | |
817 | idx_t LocalFileSystem::SeekPosition(FileHandle &handle) { |
818 | if (!CanSeek()) { |
819 | throw IOException("Cannot seek in files of this type" ); |
820 | } |
821 | return GetFilePointer(handle); |
822 | } |
823 | |
824 | static bool IsCrawl(const string &glob) { |
825 | // glob must match exactly |
826 | return glob == "**" ; |
827 | } |
828 | static bool HasMultipleCrawl(const vector<string> &splits) { |
829 | return std::count(first: splits.begin(), last: splits.end(), value: "**" ) > 1; |
830 | } |
831 | static bool IsSymbolicLink(const string &path) { |
832 | #ifndef _WIN32 |
833 | struct stat status; |
834 | return (lstat(file: path.c_str(), buf: &status) != -1 && S_ISLNK(status.st_mode)); |
835 | #else |
836 | auto attributes = WindowsGetFileAttributes(path); |
837 | if (attributes == INVALID_FILE_ATTRIBUTES) |
838 | return false; |
839 | return attributes & FILE_ATTRIBUTE_REPARSE_POINT; |
840 | #endif |
841 | } |
842 | |
843 | static void RecursiveGlobDirectories(FileSystem &fs, const string &path, vector<string> &result, bool match_directory, |
844 | bool join_path) { |
845 | |
846 | fs.ListFiles(directory: path, callback: [&](const string &fname, bool is_directory) { |
847 | string concat; |
848 | if (join_path) { |
849 | concat = fs.JoinPath(a: path, b: fname); |
850 | } else { |
851 | concat = fname; |
852 | } |
853 | if (IsSymbolicLink(path: concat)) { |
854 | return; |
855 | } |
856 | if (is_directory == match_directory) { |
857 | result.push_back(x: concat); |
858 | } |
859 | if (is_directory) { |
860 | RecursiveGlobDirectories(fs, path: concat, result, match_directory, join_path: true); |
861 | } |
862 | }); |
863 | } |
864 | |
865 | static void GlobFilesInternal(FileSystem &fs, const string &path, const string &glob, bool match_directory, |
866 | vector<string> &result, bool join_path) { |
867 | fs.ListFiles(directory: path, callback: [&](const string &fname, bool is_directory) { |
868 | if (is_directory != match_directory) { |
869 | return; |
870 | } |
871 | if (LikeFun::Glob(s: fname.c_str(), slen: fname.size(), pattern: glob.c_str(), plen: glob.size())) { |
872 | if (join_path) { |
873 | result.push_back(x: fs.JoinPath(a: path, b: fname)); |
874 | } else { |
875 | result.push_back(x: fname); |
876 | } |
877 | } |
878 | }); |
879 | } |
880 | |
881 | vector<string> LocalFileSystem::FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path) { |
882 | vector<string> result; |
883 | if (FileExists(filename: path) || IsPipe(filename: path)) { |
884 | result.push_back(x: path); |
885 | } else if (!absolute_path) { |
886 | Value value; |
887 | if (opener && opener->TryGetCurrentSetting(key: "file_search_path" , result&: value)) { |
888 | auto search_paths_str = value.ToString(); |
889 | vector<std::string> search_paths = StringUtil::Split(str: search_paths_str, delimiter: ','); |
890 | for (const auto &search_path : search_paths) { |
891 | auto joined_path = JoinPath(a: search_path, b: path); |
892 | if (FileExists(filename: joined_path) || IsPipe(filename: joined_path)) { |
893 | result.push_back(x: joined_path); |
894 | } |
895 | } |
896 | } |
897 | } |
898 | return result; |
899 | } |
900 | |
901 | vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) { |
902 | if (path.empty()) { |
903 | return vector<string>(); |
904 | } |
905 | // split up the path into separate chunks |
906 | vector<string> splits; |
907 | idx_t last_pos = 0; |
908 | for (idx_t i = 0; i < path.size(); i++) { |
909 | if (path[i] == '\\' || path[i] == '/') { |
910 | if (i == last_pos) { |
911 | // empty: skip this position |
912 | last_pos = i + 1; |
913 | continue; |
914 | } |
915 | if (splits.empty()) { |
916 | splits.push_back(x: path.substr(pos: 0, n: i)); |
917 | } else { |
918 | splits.push_back(x: path.substr(pos: last_pos, n: i - last_pos)); |
919 | } |
920 | last_pos = i + 1; |
921 | } |
922 | } |
923 | splits.push_back(x: path.substr(pos: last_pos, n: path.size() - last_pos)); |
924 | // handle absolute paths |
925 | bool absolute_path = false; |
926 | if (path[0] == '/') { |
927 | // first character is a slash - unix absolute path |
928 | absolute_path = true; |
929 | } else if (StringUtil::Contains(haystack: splits[0], needle: ":" )) { |
930 | // first split has a colon - windows absolute path |
931 | absolute_path = true; |
932 | } else if (splits[0] == "~" ) { |
933 | // starts with home directory |
934 | auto home_directory = GetHomeDirectory(opener); |
935 | if (!home_directory.empty()) { |
936 | absolute_path = true; |
937 | splits[0] = home_directory; |
938 | D_ASSERT(path[0] == '~'); |
939 | if (!HasGlob(str: path)) { |
940 | return Glob(path: home_directory + path.substr(pos: 1)); |
941 | } |
942 | } |
943 | } |
944 | // Check if the path has a glob at all |
945 | if (!HasGlob(str: path)) { |
946 | // no glob: return only the file (if it exists or is a pipe) |
947 | return FetchFileWithoutGlob(path, opener, absolute_path); |
948 | } |
949 | vector<string> previous_directories; |
950 | if (absolute_path) { |
951 | // for absolute paths, we don't start by scanning the current directory |
952 | previous_directories.push_back(x: splits[0]); |
953 | } else { |
954 | // If file_search_path is set, use those paths as the first glob elements |
955 | Value value; |
956 | if (opener && opener->TryGetCurrentSetting(key: "file_search_path" , result&: value)) { |
957 | auto search_paths_str = value.ToString(); |
958 | vector<std::string> search_paths = StringUtil::Split(str: search_paths_str, delimiter: ','); |
959 | for (const auto &search_path : search_paths) { |
960 | previous_directories.push_back(x: search_path); |
961 | } |
962 | } |
963 | } |
964 | |
965 | if (HasMultipleCrawl(splits)) { |
966 | throw IOException("Cannot use multiple \'**\' in one path" ); |
967 | } |
968 | |
969 | for (idx_t i = absolute_path ? 1 : 0; i < splits.size(); i++) { |
970 | bool is_last_chunk = i + 1 == splits.size(); |
971 | bool has_glob = HasGlob(str: splits[i]); |
972 | // if it's the last chunk we need to find files, otherwise we find directories |
973 | // not the last chunk: gather a list of all directories that match the glob pattern |
974 | vector<string> result; |
975 | if (!has_glob) { |
976 | // no glob, just append as-is |
977 | if (previous_directories.empty()) { |
978 | result.push_back(x: splits[i]); |
979 | } else { |
980 | if (is_last_chunk) { |
981 | for (auto &prev_directory : previous_directories) { |
982 | const string filename = JoinPath(a: prev_directory, b: splits[i]); |
983 | if (FileExists(filename) || DirectoryExists(directory: filename)) { |
984 | result.push_back(x: filename); |
985 | } |
986 | } |
987 | } else { |
988 | for (auto &prev_directory : previous_directories) { |
989 | result.push_back(x: JoinPath(a: prev_directory, b: splits[i])); |
990 | } |
991 | } |
992 | } |
993 | } else { |
994 | if (IsCrawl(glob: splits[i])) { |
995 | if (!is_last_chunk) { |
996 | result = previous_directories; |
997 | } |
998 | if (previous_directories.empty()) { |
999 | RecursiveGlobDirectories(fs&: *this, path: "." , result, match_directory: !is_last_chunk, join_path: false); |
1000 | } else { |
1001 | for (auto &prev_dir : previous_directories) { |
1002 | RecursiveGlobDirectories(fs&: *this, path: prev_dir, result, match_directory: !is_last_chunk, join_path: true); |
1003 | } |
1004 | } |
1005 | } else { |
1006 | if (previous_directories.empty()) { |
1007 | // no previous directories: list in the current path |
1008 | GlobFilesInternal(fs&: *this, path: "." , glob: splits[i], match_directory: !is_last_chunk, result, join_path: false); |
1009 | } else { |
1010 | // previous directories |
1011 | // we iterate over each of the previous directories, and apply the glob of the current directory |
1012 | for (auto &prev_directory : previous_directories) { |
1013 | GlobFilesInternal(fs&: *this, path: prev_directory, glob: splits[i], match_directory: !is_last_chunk, result, join_path: true); |
1014 | } |
1015 | } |
1016 | } |
1017 | } |
1018 | if (result.empty()) { |
1019 | // no result found that matches the glob |
1020 | // last ditch effort: search the path as a string literal |
1021 | return FetchFileWithoutGlob(path, opener, absolute_path); |
1022 | } |
1023 | if (is_last_chunk) { |
1024 | return result; |
1025 | } |
1026 | previous_directories = std::move(result); |
1027 | } |
1028 | return vector<string>(); |
1029 | } |
1030 | |
1031 | unique_ptr<FileSystem> FileSystem::CreateLocal() { |
1032 | return make_uniq<LocalFileSystem>(); |
1033 | } |
1034 | |
1035 | } // namespace duckdb |
1036 | |