1 | #include "duckdb/common/file_system.hpp" |
2 | |
3 | #include "duckdb/common/exception.hpp" |
4 | #include "duckdb/common/helper.hpp" |
5 | #include "duckdb/common/string_util.hpp" |
6 | #include "duckdb/common/checksum.hpp" |
7 | #include "duckdb/main/client_context.hpp" |
8 | #include "duckdb/main/database.hpp" |
9 | |
10 | using namespace duckdb; |
11 | using namespace std; |
12 | |
13 | #include <cstdio> |
14 | |
15 | FileSystem &FileSystem::GetFileSystem(ClientContext &context) { |
16 | return *context.db.file_system; |
17 | } |
18 | |
19 | static void AssertValidFileFlags(uint8_t flags) { |
20 | // cannot combine Read and Write flags |
21 | assert(!(flags & FileFlags::READ && flags & FileFlags::WRITE)); |
22 | // cannot combine Read and Append flags |
23 | assert(!(flags & FileFlags::READ && flags & FileFlags::APPEND)); |
24 | // cannot combine Read and CREATE flags |
25 | assert(!(flags & FileFlags::READ && flags & FileFlags::CREATE)); |
26 | } |
27 | |
28 | #ifndef _WIN32 |
29 | #include <dirent.h> |
30 | #include <fcntl.h> |
31 | #include <string.h> |
32 | #include <sys/stat.h> |
33 | #include <sys/types.h> |
34 | #include <unistd.h> |
35 | |
36 | // somehow sometimes this is missing |
37 | #ifndef O_CLOEXEC |
38 | #define O_CLOEXEC 0 |
39 | #endif |
40 | |
41 | // Solaris |
42 | #ifndef O_DIRECT |
43 | #define O_DIRECT 0 |
44 | #endif |
45 | |
46 | struct UnixFileHandle : public FileHandle { |
47 | public: |
48 | UnixFileHandle(FileSystem &file_system, string path, int fd) : FileHandle(file_system, path), fd(fd) { |
49 | } |
50 | virtual ~UnixFileHandle() { |
51 | Close(); |
52 | } |
53 | |
54 | protected: |
55 | void Close() override { |
56 | if (fd != -1) { |
57 | close(fd); |
58 | } |
59 | }; |
60 | |
61 | public: |
62 | int fd; |
63 | }; |
64 | |
65 | unique_ptr<FileHandle> FileSystem::OpenFile(const char *path, uint8_t flags, FileLockType lock_type) { |
66 | AssertValidFileFlags(flags); |
67 | |
68 | int open_flags = 0; |
69 | int rc; |
70 | if (flags & FileFlags::READ) { |
71 | open_flags = O_RDONLY; |
72 | } else { |
73 | // need Read or Write |
74 | assert(flags & FileFlags::WRITE); |
75 | open_flags = O_RDWR | O_CLOEXEC; |
76 | if (flags & FileFlags::CREATE) { |
77 | open_flags |= O_CREAT; |
78 | } |
79 | if (flags & FileFlags::APPEND) { |
80 | open_flags |= O_APPEND; |
81 | } |
82 | } |
83 | if (flags & FileFlags::DIRECT_IO) { |
84 | #if defined(__sun) && defined(__SVR4) |
85 | throw Exception("DIRECT_IO not supported on Solaris" ); |
86 | #endif |
87 | #if defined(__DARWIN__) || defined(__APPLE__) || defined(__OpenBSD__) |
88 | // OSX does not have O_DIRECT, instead we need to use fcntl afterwards to support direct IO |
89 | open_flags |= O_SYNC; |
90 | #else |
91 | open_flags |= O_DIRECT | O_SYNC; |
92 | #endif |
93 | } |
94 | int fd = open(path, open_flags, 0666); |
95 | if (fd == -1) { |
96 | throw IOException("Cannot open file \"%s\": %s" , path, strerror(errno)); |
97 | } |
98 | // #if defined(__DARWIN__) || defined(__APPLE__) |
99 | // if (flags & FileFlags::DIRECT_IO) { |
100 | // // OSX requires fcntl for Direct IO |
101 | // rc = fcntl(fd, F_NOCACHE, 1); |
102 | // if (fd == -1) { |
103 | // throw IOException("Could not enable direct IO for file \"%s\": %s", path, strerror(errno)); |
104 | // } |
105 | // } |
106 | // #endif |
107 | if (lock_type != FileLockType::NO_LOCK) { |
108 | // set lock on file |
109 | struct flock fl; |
110 | memset(&fl, 0, sizeof fl); |
111 | fl.l_type = lock_type == FileLockType::READ_LOCK ? F_RDLCK : F_WRLCK; |
112 | fl.l_whence = SEEK_SET; |
113 | fl.l_start = 0; |
114 | fl.l_len = 0; |
115 | rc = fcntl(fd, F_SETLK, &fl); |
116 | if (rc == -1) { |
117 | throw IOException("Could not set lock on file \"%s\": %s" , path, strerror(errno)); |
118 | } |
119 | } |
120 | return make_unique<UnixFileHandle>(*this, path, fd); |
121 | } |
122 | |
123 | void FileSystem::SetFilePointer(FileHandle &handle, idx_t location) { |
124 | int fd = ((UnixFileHandle &)handle).fd; |
125 | off_t offset = lseek(fd, location, SEEK_SET); |
126 | if (offset == (off_t)-1) { |
127 | throw IOException("Could not seek to location %lld for file \"%s\": %s" , location, handle.path.c_str(), |
128 | strerror(errno)); |
129 | } |
130 | } |
131 | |
132 | int64_t FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { |
133 | int fd = ((UnixFileHandle &)handle).fd; |
134 | int64_t bytes_read = read(fd, buffer, nr_bytes); |
135 | if (bytes_read == -1) { |
136 | throw IOException("Could not read from file \"%s\": %s" , handle.path.c_str(), strerror(errno)); |
137 | } |
138 | return bytes_read; |
139 | } |
140 | |
141 | int64_t FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { |
142 | int fd = ((UnixFileHandle &)handle).fd; |
143 | int64_t bytes_written = write(fd, buffer, nr_bytes); |
144 | if (bytes_written == -1) { |
145 | throw IOException("Could not write file \"%s\": %s" , handle.path.c_str(), strerror(errno)); |
146 | } |
147 | return bytes_written; |
148 | } |
149 | |
150 | int64_t FileSystem::GetFileSize(FileHandle &handle) { |
151 | int fd = ((UnixFileHandle &)handle).fd; |
152 | struct stat s; |
153 | if (fstat(fd, &s) == -1) { |
154 | return -1; |
155 | } |
156 | return s.st_size; |
157 | } |
158 | |
159 | void FileSystem::Truncate(FileHandle &handle, int64_t new_size) { |
160 | int fd = ((UnixFileHandle &)handle).fd; |
161 | if (ftruncate(fd, new_size) != 0) { |
162 | throw IOException("Could not truncate file \"%s\": %s" , handle.path.c_str(), strerror(errno)); |
163 | } |
164 | } |
165 | |
166 | bool FileSystem::DirectoryExists(const string &directory) { |
167 | if (!directory.empty()) { |
168 | if (access(directory.c_str(), 0) == 0) { |
169 | struct stat status; |
170 | stat(directory.c_str(), &status); |
171 | if (status.st_mode & S_IFDIR) |
172 | return true; |
173 | } |
174 | } |
175 | // if any condition fails |
176 | return false; |
177 | } |
178 | |
179 | bool FileSystem::FileExists(const string &filename) { |
180 | if (!filename.empty()) { |
181 | if (access(filename.c_str(), 0) == 0) { |
182 | struct stat status; |
183 | stat(filename.c_str(), &status); |
184 | if (!(status.st_mode & S_IFDIR)) |
185 | return true; |
186 | } |
187 | } |
188 | // if any condition fails |
189 | return false; |
190 | } |
191 | |
192 | void FileSystem::CreateDirectory(const string &directory) { |
193 | struct stat st; |
194 | |
195 | if (stat(directory.c_str(), &st) != 0) { |
196 | /* Directory does not exist. EEXIST for race condition */ |
197 | if (mkdir(directory.c_str(), 0755) != 0 && errno != EEXIST) { |
198 | throw IOException("Failed to create directory \"%s\"!" , directory.c_str()); |
199 | } |
200 | } else if (!S_ISDIR(st.st_mode)) { |
201 | throw IOException("Failed to create directory \"%s\": path exists but is not a directory!" , directory.c_str()); |
202 | } |
203 | } |
204 | |
205 | int remove_directory_recursively(const char *path) { |
206 | DIR *d = opendir(path); |
207 | idx_t path_len = (idx_t)strlen(path); |
208 | int r = -1; |
209 | |
210 | if (d) { |
211 | struct dirent *p; |
212 | r = 0; |
213 | while (!r && (p = readdir(d))) { |
214 | int r2 = -1; |
215 | char *buf; |
216 | idx_t len; |
217 | /* Skip the names "." and ".." as we don't want to recurse on them. */ |
218 | if (!strcmp(p->d_name, "." ) || !strcmp(p->d_name, ".." )) { |
219 | continue; |
220 | } |
221 | len = path_len + (idx_t)strlen(p->d_name) + 2; |
222 | buf = new char[len]; |
223 | if (buf) { |
224 | struct stat statbuf; |
225 | snprintf(buf, len, "%s/%s" , path, p->d_name); |
226 | if (!stat(buf, &statbuf)) { |
227 | if (S_ISDIR(statbuf.st_mode)) { |
228 | r2 = remove_directory_recursively(buf); |
229 | } else { |
230 | r2 = unlink(buf); |
231 | } |
232 | } |
233 | delete[] buf; |
234 | } |
235 | r = r2; |
236 | } |
237 | closedir(d); |
238 | } |
239 | if (!r) { |
240 | r = rmdir(path); |
241 | } |
242 | return r; |
243 | } |
244 | |
245 | void FileSystem::RemoveDirectory(const string &directory) { |
246 | remove_directory_recursively(directory.c_str()); |
247 | } |
248 | |
249 | void FileSystem::RemoveFile(const string &filename) { |
250 | if (std::remove(filename.c_str()) != 0) { |
251 | throw IOException("Could not remove file \"%s\": %s" , filename.c_str(), strerror(errno)); |
252 | } |
253 | } |
254 | |
255 | bool FileSystem::ListFiles(const string &directory, function<void(string)> callback) { |
256 | if (!DirectoryExists(directory)) { |
257 | return false; |
258 | } |
259 | DIR *dir; |
260 | struct dirent *ent; |
261 | if ((dir = opendir(directory.c_str())) != NULL) { |
262 | /* print all the files and directories within directory */ |
263 | while ((ent = readdir(dir)) != NULL) { |
264 | string name = string(ent->d_name); |
265 | if (!name.empty() && name[0] != '.') { |
266 | callback(name); |
267 | } |
268 | } |
269 | closedir(dir); |
270 | } else { |
271 | return false; |
272 | } |
273 | return true; |
274 | } |
275 | |
276 | string FileSystem::PathSeparator() { |
277 | return "/" ; |
278 | } |
279 | |
280 | void FileSystem::FileSync(FileHandle &handle) { |
281 | int fd = ((UnixFileHandle &)handle).fd; |
282 | if (fsync(fd) != 0) { |
283 | throw FatalException("fsync failed!" ); |
284 | } |
285 | } |
286 | |
287 | void FileSystem::MoveFile(const string &source, const string &target) { |
288 | //! FIXME: rename does not guarantee atomicity or overwriting target file if it exists |
289 | if (rename(source.c_str(), target.c_str()) != 0) { |
290 | throw IOException("Could not rename file!" ); |
291 | } |
292 | } |
293 | |
294 | #else |
295 | |
296 | #include <string> |
297 | #ifndef NOMINMAX |
298 | #define NOMINMAX |
299 | #endif |
300 | #include <windows.h> |
301 | |
302 | #undef CreateDirectory |
303 | #undef MoveFile |
304 | #undef RemoveDirectory |
305 | |
306 | // Returns the last Win32 error, in string format. Returns an empty string if there is no error. |
307 | std::string GetLastErrorAsString() { |
308 | // Get the error message, if any. |
309 | DWORD errorMessageID = ::GetLastError(); |
310 | if (errorMessageID == 0) |
311 | return std::string(); // No error message has been recorded |
312 | |
313 | LPSTR messageBuffer = nullptr; |
314 | idx_t size = |
315 | FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, |
316 | NULL, errorMessageID, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); |
317 | |
318 | std::string message(messageBuffer, size); |
319 | |
320 | // Free the buffer. |
321 | LocalFree(messageBuffer); |
322 | |
323 | return message; |
324 | } |
325 | |
326 | struct WindowsFileHandle : public FileHandle { |
327 | public: |
328 | WindowsFileHandle(FileSystem &file_system, string path, HANDLE fd) : FileHandle(file_system, path), fd(fd) { |
329 | } |
330 | virtual ~WindowsFileHandle() { |
331 | Close(); |
332 | } |
333 | |
334 | protected: |
335 | void Close() override { |
336 | CloseHandle(fd); |
337 | }; |
338 | |
339 | public: |
340 | HANDLE fd; |
341 | }; |
342 | |
343 | unique_ptr<FileHandle> FileSystem::OpenFile(const char *path, uint8_t flags, FileLockType lock_type) { |
344 | AssertValidFileFlags(flags); |
345 | |
346 | DWORD desired_access; |
347 | DWORD share_mode; |
348 | DWORD creation_disposition = OPEN_EXISTING; |
349 | DWORD flags_and_attributes = FILE_ATTRIBUTE_NORMAL; |
350 | if (flags & FileFlags::READ) { |
351 | desired_access = GENERIC_READ; |
352 | share_mode = FILE_SHARE_READ; |
353 | } else { |
354 | // need Read or Write |
355 | assert(flags & FileFlags::WRITE); |
356 | desired_access = GENERIC_READ | GENERIC_WRITE; |
357 | share_mode = 0; |
358 | if (flags & FileFlags::CREATE) { |
359 | creation_disposition = OPEN_ALWAYS; |
360 | } |
361 | if (flags & FileFlags::DIRECT_IO) { |
362 | flags_and_attributes |= FILE_FLAG_WRITE_THROUGH; |
363 | } |
364 | } |
365 | if (flags & FileFlags::DIRECT_IO) { |
366 | flags_and_attributes |= FILE_FLAG_NO_BUFFERING; |
367 | } |
368 | HANDLE hFile = |
369 | CreateFileA(path, desired_access, share_mode, NULL, creation_disposition, flags_and_attributes, NULL); |
370 | if (hFile == INVALID_HANDLE_VALUE) { |
371 | auto error = GetLastErrorAsString(); |
372 | throw IOException("Cannot open file \"%s\": %s" , path, error.c_str()); |
373 | } |
374 | auto handle = make_unique<WindowsFileHandle>(*this, path, hFile); |
375 | if (flags & FileFlags::APPEND) { |
376 | SetFilePointer(*handle, GetFileSize(*handle)); |
377 | } |
378 | return move(handle); |
379 | } |
380 | |
381 | void FileSystem::SetFilePointer(FileHandle &handle, idx_t location) { |
382 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
383 | LARGE_INTEGER loc; |
384 | loc.QuadPart = location; |
385 | auto rc = SetFilePointerEx(hFile, loc, NULL, FILE_BEGIN); |
386 | if (rc == 0) { |
387 | auto error = GetLastErrorAsString(); |
388 | throw IOException("Could not seek to location %lld for file \"%s\": %s" , location, handle.path.c_str(), |
389 | error.c_str()); |
390 | } |
391 | } |
392 | |
393 | int64_t FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { |
394 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
395 | DWORD bytes_read; |
396 | auto rc = ReadFile(hFile, buffer, (DWORD)nr_bytes, &bytes_read, NULL); |
397 | if (rc == 0) { |
398 | auto error = GetLastErrorAsString(); |
399 | throw IOException("Could not write file \"%s\": %s" , handle.path.c_str(), error.c_str()); |
400 | } |
401 | return bytes_read; |
402 | } |
403 | |
404 | int64_t FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { |
405 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
406 | DWORD bytes_read; |
407 | auto rc = WriteFile(hFile, buffer, (DWORD)nr_bytes, &bytes_read, NULL); |
408 | if (rc == 0) { |
409 | auto error = GetLastErrorAsString(); |
410 | throw IOException("Could not write file \"%s\": %s" , handle.path.c_str(), error.c_str()); |
411 | } |
412 | return bytes_read; |
413 | } |
414 | |
415 | int64_t FileSystem::GetFileSize(FileHandle &handle) { |
416 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
417 | LARGE_INTEGER result; |
418 | if (!GetFileSizeEx(hFile, &result)) { |
419 | return -1; |
420 | } |
421 | return result.QuadPart; |
422 | } |
423 | |
424 | void FileSystem::Truncate(FileHandle &handle, int64_t new_size) { |
425 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
426 | // seek to the location |
427 | SetFilePointer(handle, new_size); |
428 | // now set the end of file position |
429 | if (!SetEndOfFile(hFile)) { |
430 | auto error = GetLastErrorAsString(); |
431 | throw IOException("Failure in SetEndOfFile call on file \"%s\": %s" , handle.path.c_str(), error.c_str()); |
432 | } |
433 | } |
434 | |
435 | bool FileSystem::DirectoryExists(const string &directory) { |
436 | DWORD attrs = GetFileAttributesA(directory.c_str()); |
437 | return (attrs != INVALID_FILE_ATTRIBUTES && (attrs & FILE_ATTRIBUTE_DIRECTORY)); |
438 | } |
439 | |
440 | bool FileSystem::FileExists(const string &filename) { |
441 | DWORD attrs = GetFileAttributesA(filename.c_str()); |
442 | return (attrs != INVALID_FILE_ATTRIBUTES && !(attrs & FILE_ATTRIBUTE_DIRECTORY)); |
443 | } |
444 | |
445 | void FileSystem::CreateDirectory(const string &directory) { |
446 | if (DirectoryExists(directory)) { |
447 | return; |
448 | } |
449 | if (directory.empty() || !CreateDirectoryA(directory.c_str(), NULL) || !DirectoryExists(directory)) { |
450 | throw IOException("Could not create directory!" ); |
451 | } |
452 | } |
453 | |
454 | static void delete_dir_special_snowflake_windows(string directory) { |
455 | if (directory.size() + 3 > MAX_PATH) { |
456 | throw IOException("Pathname too long" ); |
457 | } |
458 | // create search pattern |
459 | TCHAR szDir[MAX_PATH]; |
460 | snprintf(szDir, MAX_PATH, "%s\\*" , directory.c_str()); |
461 | |
462 | WIN32_FIND_DATA ffd; |
463 | HANDLE hFind = FindFirstFile(szDir, &ffd); |
464 | if (hFind == INVALID_HANDLE_VALUE) { |
465 | return; |
466 | } |
467 | |
468 | do { |
469 | if (string(ffd.cFileName) == "." || string(ffd.cFileName) == ".." ) { |
470 | continue; |
471 | } |
472 | if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { |
473 | // recurse to zap directory contents |
474 | FileSystem fs; |
475 | delete_dir_special_snowflake_windows(fs.JoinPath(directory, ffd.cFileName)); |
476 | } else { |
477 | if (strlen(ffd.cFileName) + directory.size() + 1 > MAX_PATH) { |
478 | throw IOException("Pathname too long" ); |
479 | } |
480 | // create search pattern |
481 | TCHAR del_path[MAX_PATH]; |
482 | snprintf(del_path, MAX_PATH, "%s\\%s" , directory.c_str(), ffd.cFileName); |
483 | if (!DeleteFileA(del_path)) { |
484 | throw IOException("Failed to delete directory entry" ); |
485 | } |
486 | } |
487 | } while (FindNextFile(hFind, &ffd) != 0); |
488 | |
489 | DWORD dwError = GetLastError(); |
490 | if (dwError != ERROR_NO_MORE_FILES) { |
491 | throw IOException("Something went wrong" ); |
492 | } |
493 | FindClose(hFind); |
494 | |
495 | if (!RemoveDirectoryA(directory.c_str())) { |
496 | throw IOException("Failed to delete directory" ); |
497 | } |
498 | } |
499 | |
500 | void FileSystem::RemoveDirectory(const string &directory) { |
501 | delete_dir_special_snowflake_windows(directory.c_str()); |
502 | } |
503 | |
504 | void FileSystem::RemoveFile(const string &filename) { |
505 | DeleteFileA(filename.c_str()); |
506 | } |
507 | |
508 | bool FileSystem::ListFiles(const string &directory, function<void(string)> callback) { |
509 | string search_dir = JoinPath(directory, "*" ); |
510 | |
511 | WIN32_FIND_DATA ffd; |
512 | HANDLE hFind = FindFirstFile(search_dir.c_str(), &ffd); |
513 | if (hFind == INVALID_HANDLE_VALUE) { |
514 | return false; |
515 | } |
516 | do { |
517 | if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { |
518 | continue; |
519 | } |
520 | |
521 | callback(string(ffd.cFileName)); |
522 | } while (FindNextFile(hFind, &ffd) != 0); |
523 | |
524 | DWORD dwError = GetLastError(); |
525 | if (dwError != ERROR_NO_MORE_FILES) { |
526 | FindClose(hFind); |
527 | return false; |
528 | } |
529 | |
530 | FindClose(hFind); |
531 | return true; |
532 | } |
533 | |
534 | string FileSystem::PathSeparator() { |
535 | return "\\" ; |
536 | } |
537 | |
538 | void FileSystem::FileSync(FileHandle &handle) { |
539 | HANDLE hFile = ((WindowsFileHandle &)handle).fd; |
540 | if (FlushFileBuffers(hFile) == 0) { |
541 | throw IOException("Could not flush file handle to disk!" ); |
542 | } |
543 | } |
544 | |
545 | void FileSystem::MoveFile(const string &source, const string &target) { |
546 | if (!MoveFileA(source.c_str(), target.c_str())) { |
547 | throw IOException("Could not move file" ); |
548 | } |
549 | } |
550 | #endif |
551 | |
552 | void FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { |
553 | // seek to the location |
554 | SetFilePointer(handle, location); |
555 | // now read from the location |
556 | int64_t bytes_read = Read(handle, buffer, nr_bytes); |
557 | if (bytes_read != nr_bytes) { |
558 | throw IOException("Could not read sufficient bytes from file \"%s\"" , handle.path.c_str()); |
559 | } |
560 | } |
561 | |
562 | void FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { |
563 | // seek to the location |
564 | SetFilePointer(handle, location); |
565 | // now write to the location |
566 | int64_t bytes_written = Write(handle, buffer, nr_bytes); |
567 | if (bytes_written != nr_bytes) { |
568 | throw IOException("Could not write sufficient bytes from file \"%s\"" , handle.path.c_str()); |
569 | } |
570 | } |
571 | |
572 | string FileSystem::JoinPath(const string &a, const string &b) { |
573 | // FIXME: sanitize paths |
574 | return a + PathSeparator() + b; |
575 | } |
576 | |
577 | void FileHandle::Read(void *buffer, idx_t nr_bytes, idx_t location) { |
578 | file_system.Read(*this, buffer, nr_bytes, location); |
579 | } |
580 | |
581 | void FileHandle::Write(void *buffer, idx_t nr_bytes, idx_t location) { |
582 | file_system.Write(*this, buffer, nr_bytes, location); |
583 | } |
584 | |
585 | void FileHandle::Sync() { |
586 | file_system.FileSync(*this); |
587 | } |
588 | |
589 | void FileHandle::Truncate(int64_t new_size) { |
590 | file_system.Truncate(*this, new_size); |
591 | } |
592 | |