1 | #include "duckdb/common/file_system.hpp" |
2 | |
3 | #include "duckdb/common/checksum.hpp" |
4 | #include "duckdb/common/exception.hpp" |
5 | #include "duckdb/common/file_opener.hpp" |
6 | #include "duckdb/common/helper.hpp" |
7 | #include "duckdb/common/string_util.hpp" |
8 | #include "duckdb/common/windows.hpp" |
9 | #include "duckdb/function/scalar/string_functions.hpp" |
10 | #include "duckdb/main/client_context.hpp" |
11 | #include "duckdb/main/client_data.hpp" |
12 | #include "duckdb/main/database.hpp" |
13 | #include "duckdb/main/extension_helper.hpp" |
14 | #include "duckdb/common/windows_util.hpp" |
15 | |
16 | #include <cstdint> |
17 | #include <cstdio> |
18 | |
19 | #ifndef _WIN32 |
20 | #include <dirent.h> |
21 | #include <fcntl.h> |
22 | #include <string.h> |
23 | #include <sys/stat.h> |
24 | #include <sys/types.h> |
25 | #include <unistd.h> |
26 | #else |
27 | #include <string> |
28 | #include <sysinfoapi.h> |
29 | |
30 | #ifdef __MINGW32__ |
31 | // need to manually define this for mingw |
32 | extern "C" WINBASEAPI BOOL WINAPI GetPhysicallyInstalledSystemMemory(PULONGLONG); |
33 | #endif |
34 | |
35 | #undef FILE_CREATE // woo mingw |
36 | #endif |
37 | |
38 | namespace duckdb { |
39 | |
40 | FileSystem::~FileSystem() { |
41 | } |
42 | |
43 | FileSystem &FileSystem::GetFileSystem(ClientContext &context) { |
44 | auto &client_data = ClientData::Get(context); |
45 | return *client_data.client_file_system; |
46 | } |
47 | |
48 | bool PathMatched(const string &path, const string &sub_path) { |
49 | if (path.rfind(str: sub_path, pos: 0) == 0) { |
50 | return true; |
51 | } |
52 | return false; |
53 | } |
54 | |
55 | #ifndef _WIN32 |
56 | |
57 | string FileSystem::GetEnvVariable(const string &name) { |
58 | const char *env = getenv(name: name.c_str()); |
59 | if (!env) { |
60 | return string(); |
61 | } |
62 | return env; |
63 | } |
64 | |
65 | bool FileSystem::IsPathAbsolute(const string &path) { |
66 | auto path_separator = FileSystem::PathSeparator(); |
67 | return PathMatched(path, sub_path: path_separator); |
68 | } |
69 | |
70 | string FileSystem::PathSeparator() { |
71 | return "/" ; |
72 | } |
73 | |
74 | void FileSystem::SetWorkingDirectory(const string &path) { |
75 | if (chdir(path: path.c_str()) != 0) { |
76 | throw IOException("Could not change working directory!" ); |
77 | } |
78 | } |
79 | |
80 | idx_t FileSystem::GetAvailableMemory() { |
81 | errno = 0; |
82 | idx_t max_memory = MinValue<idx_t>(a: (idx_t)sysconf(_SC_PHYS_PAGES) * (idx_t)sysconf(_SC_PAGESIZE), UINTPTR_MAX); |
83 | if (errno != 0) { |
84 | return DConstants::INVALID_INDEX; |
85 | } |
86 | return max_memory; |
87 | } |
88 | |
89 | string FileSystem::GetWorkingDirectory() { |
90 | auto buffer = make_unsafe_uniq_array<char>(PATH_MAX); |
91 | char *ret = getcwd(buf: buffer.get(), PATH_MAX); |
92 | if (!ret) { |
93 | throw IOException("Could not get working directory!" ); |
94 | } |
95 | return string(buffer.get()); |
96 | } |
97 | |
98 | string FileSystem::NormalizeAbsolutePath(const string &path) { |
99 | D_ASSERT(IsPathAbsolute(path)); |
100 | return path; |
101 | } |
102 | |
103 | #else |
104 | |
105 | string FileSystem::GetEnvVariable(const string &env) { |
106 | // first convert the environment variable name to the correct encoding |
107 | auto env_w = WindowsUtil::UTF8ToUnicode(env.c_str()); |
108 | // use _wgetenv to get the value |
109 | auto res_w = _wgetenv(env_w.c_str()); |
110 | if (!res_w) { |
111 | // no environment variable of this name found |
112 | return string(); |
113 | } |
114 | return WindowsUtil::UnicodeToUTF8(res_w); |
115 | } |
116 | |
117 | static bool StartsWithSingleBackslash(const string &path) { |
118 | if (path.size() < 2) { |
119 | return false; |
120 | } |
121 | if (path[0] != '/' && path[0] != '\\') { |
122 | return false; |
123 | } |
124 | if (path[1] == '/' || path[1] == '\\') { |
125 | return false; |
126 | } |
127 | return true; |
128 | } |
129 | |
130 | bool FileSystem::IsPathAbsolute(const string &path) { |
131 | // 1) A single backslash or forward-slash |
132 | if (StartsWithSingleBackslash(path)) { |
133 | return true; |
134 | } |
135 | // 2) A disk designator with a backslash (e.g., C:\ or C:/) |
136 | auto path_aux = path; |
137 | path_aux.erase(0, 1); |
138 | if (PathMatched(path_aux, ":\\" ) || PathMatched(path_aux, ":/" )) { |
139 | return true; |
140 | } |
141 | return false; |
142 | } |
143 | |
144 | string FileSystem::NormalizeAbsolutePath(const string &path) { |
145 | D_ASSERT(IsPathAbsolute(path)); |
146 | auto result = StringUtil::Lower(FileSystem::ConvertSeparators(path)); |
147 | if (StartsWithSingleBackslash(result)) { |
148 | // Path starts with a single backslash or forward slash |
149 | // prepend drive letter |
150 | return GetWorkingDirectory().substr(0, 2) + result; |
151 | } |
152 | return result; |
153 | } |
154 | |
155 | string FileSystem::PathSeparator() { |
156 | return "\\" ; |
157 | } |
158 | |
159 | void FileSystem::SetWorkingDirectory(const string &path) { |
160 | auto unicode_path = WindowsUtil::UTF8ToUnicode(path.c_str()); |
161 | if (!SetCurrentDirectoryW(unicode_path.c_str())) { |
162 | throw IOException("Could not change working directory to \"%s\"" , path); |
163 | } |
164 | } |
165 | |
166 | idx_t FileSystem::GetAvailableMemory() { |
167 | ULONGLONG available_memory_kb; |
168 | if (GetPhysicallyInstalledSystemMemory(&available_memory_kb)) { |
169 | return MinValue<idx_t>(available_memory_kb * 1000, UINTPTR_MAX); |
170 | } |
171 | // fallback: try GlobalMemoryStatusEx |
172 | MEMORYSTATUSEX mem_state; |
173 | mem_state.dwLength = sizeof(MEMORYSTATUSEX); |
174 | |
175 | if (GlobalMemoryStatusEx(&mem_state)) { |
176 | return MinValue<idx_t>(mem_state.ullTotalPhys, UINTPTR_MAX); |
177 | } |
178 | return DConstants::INVALID_INDEX; |
179 | } |
180 | |
181 | string FileSystem::GetWorkingDirectory() { |
182 | idx_t count = GetCurrentDirectoryW(0, nullptr); |
183 | if (count == 0) { |
184 | throw IOException("Could not get working directory!" ); |
185 | } |
186 | auto buffer = make_unsafe_uniq_array<wchar_t>(count); |
187 | idx_t ret = GetCurrentDirectoryW(count, buffer.get()); |
188 | if (count != ret + 1) { |
189 | throw IOException("Could not get working directory!" ); |
190 | } |
191 | return WindowsUtil::UnicodeToUTF8(buffer.get()); |
192 | } |
193 | |
194 | #endif |
195 | |
196 | string FileSystem::JoinPath(const string &a, const string &b) { |
197 | // FIXME: sanitize paths |
198 | return a + PathSeparator() + b; |
199 | } |
200 | |
201 | string FileSystem::ConvertSeparators(const string &path) { |
202 | auto separator_str = PathSeparator(); |
203 | char separator = separator_str[0]; |
204 | if (separator == '/') { |
205 | // on unix-based systems we only accept / as a separator |
206 | return path; |
207 | } |
208 | // on windows-based systems we accept both |
209 | return StringUtil::Replace(source: path, from: "/" , to: separator_str); |
210 | } |
211 | |
212 | string FileSystem::(const string &path) { |
213 | if (path.empty()) { |
214 | return string(); |
215 | } |
216 | auto normalized_path = ConvertSeparators(path); |
217 | auto sep = PathSeparator(); |
218 | auto splits = StringUtil::Split(input: normalized_path, split: sep); |
219 | D_ASSERT(!splits.empty()); |
220 | return splits.back(); |
221 | } |
222 | |
223 | string FileSystem::(const string &path) { |
224 | if (path.empty()) { |
225 | return string(); |
226 | } |
227 | auto vec = StringUtil::Split(input: ExtractName(path), split: "." ); |
228 | D_ASSERT(!vec.empty()); |
229 | return vec[0]; |
230 | } |
231 | |
232 | string FileSystem::GetHomeDirectory(optional_ptr<FileOpener> opener) { |
233 | // read the home_directory setting first, if it is set |
234 | if (opener) { |
235 | Value result; |
236 | if (opener->TryGetCurrentSetting(key: "home_directory" , result)) { |
237 | if (!result.IsNull() && !result.ToString().empty()) { |
238 | return result.ToString(); |
239 | } |
240 | } |
241 | } |
242 | // fallback to the default home directories for the specified system |
243 | #ifdef DUCKDB_WINDOWS |
244 | return FileSystem::GetEnvVariable("USERPROFILE" ); |
245 | #else |
246 | return FileSystem::GetEnvVariable(name: "HOME" ); |
247 | #endif |
248 | } |
249 | |
250 | string FileSystem::GetHomeDirectory() { |
251 | return GetHomeDirectory(opener: nullptr); |
252 | } |
253 | |
254 | string FileSystem::ExpandPath(const string &path, optional_ptr<FileOpener> opener) { |
255 | if (path.empty()) { |
256 | return path; |
257 | } |
258 | if (path[0] == '~') { |
259 | return GetHomeDirectory(opener) + path.substr(pos: 1); |
260 | } |
261 | return path; |
262 | } |
263 | |
264 | string FileSystem::ExpandPath(const string &path) { |
265 | return FileSystem::ExpandPath(path, opener: nullptr); |
266 | } |
267 | |
268 | // LCOV_EXCL_START |
269 | unique_ptr<FileHandle> FileSystem::OpenFile(const string &path, uint8_t flags, FileLockType lock, |
270 | FileCompressionType compression, FileOpener *opener) { |
271 | throw NotImplementedException("%s: OpenFile is not implemented!" , GetName()); |
272 | } |
273 | |
274 | void FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { |
275 | throw NotImplementedException("%s: Read (with location) is not implemented!" , GetName()); |
276 | } |
277 | |
278 | void FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) { |
279 | throw NotImplementedException("%s: Write (with location) is not implemented!" , GetName()); |
280 | } |
281 | |
282 | int64_t FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { |
283 | throw NotImplementedException("%s: Read is not implemented!" , GetName()); |
284 | } |
285 | |
286 | int64_t FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) { |
287 | throw NotImplementedException("%s: Write is not implemented!" , GetName()); |
288 | } |
289 | |
290 | int64_t FileSystem::GetFileSize(FileHandle &handle) { |
291 | throw NotImplementedException("%s: GetFileSize is not implemented!" , GetName()); |
292 | } |
293 | |
294 | time_t FileSystem::GetLastModifiedTime(FileHandle &handle) { |
295 | throw NotImplementedException("%s: GetLastModifiedTime is not implemented!" , GetName()); |
296 | } |
297 | |
298 | FileType FileSystem::GetFileType(FileHandle &handle) { |
299 | return FileType::FILE_TYPE_INVALID; |
300 | } |
301 | |
302 | void FileSystem::Truncate(FileHandle &handle, int64_t new_size) { |
303 | throw NotImplementedException("%s: Truncate is not implemented!" , GetName()); |
304 | } |
305 | |
306 | bool FileSystem::DirectoryExists(const string &directory) { |
307 | throw NotImplementedException("%s: DirectoryExists is not implemented!" , GetName()); |
308 | } |
309 | |
310 | void FileSystem::CreateDirectory(const string &directory) { |
311 | throw NotImplementedException("%s: CreateDirectory is not implemented!" , GetName()); |
312 | } |
313 | |
314 | void FileSystem::RemoveDirectory(const string &directory) { |
315 | throw NotImplementedException("%s: RemoveDirectory is not implemented!" , GetName()); |
316 | } |
317 | |
318 | bool FileSystem::ListFiles(const string &directory, const std::function<void(const string &, bool)> &callback, |
319 | FileOpener *opener) { |
320 | throw NotImplementedException("%s: ListFiles is not implemented!" , GetName()); |
321 | } |
322 | |
323 | void FileSystem::MoveFile(const string &source, const string &target) { |
324 | throw NotImplementedException("%s: MoveFile is not implemented!" , GetName()); |
325 | } |
326 | |
327 | bool FileSystem::FileExists(const string &filename) { |
328 | throw NotImplementedException("%s: FileExists is not implemented!" , GetName()); |
329 | } |
330 | |
331 | bool FileSystem::IsPipe(const string &filename) { |
332 | throw NotImplementedException("%s: IsPipe is not implemented!" , GetName()); |
333 | } |
334 | |
335 | void FileSystem::RemoveFile(const string &filename) { |
336 | throw NotImplementedException("%s: RemoveFile is not implemented!" , GetName()); |
337 | } |
338 | |
339 | void FileSystem::FileSync(FileHandle &handle) { |
340 | throw NotImplementedException("%s: FileSync is not implemented!" , GetName()); |
341 | } |
342 | |
343 | bool FileSystem::HasGlob(const string &str) { |
344 | for (idx_t i = 0; i < str.size(); i++) { |
345 | switch (str[i]) { |
346 | case '*': |
347 | case '?': |
348 | case '[': |
349 | return true; |
350 | default: |
351 | break; |
352 | } |
353 | } |
354 | return false; |
355 | } |
356 | |
357 | vector<string> FileSystem::Glob(const string &path, FileOpener *opener) { |
358 | throw NotImplementedException("%s: Glob is not implemented!" , GetName()); |
359 | } |
360 | |
361 | void FileSystem::RegisterSubSystem(unique_ptr<FileSystem> sub_fs) { |
362 | throw NotImplementedException("%s: Can't register a sub system on a non-virtual file system" , GetName()); |
363 | } |
364 | |
365 | void FileSystem::RegisterSubSystem(FileCompressionType compression_type, unique_ptr<FileSystem> sub_fs) { |
366 | throw NotImplementedException("%s: Can't register a sub system on a non-virtual file system" , GetName()); |
367 | } |
368 | |
369 | void FileSystem::UnregisterSubSystem(const string &name) { |
370 | throw NotImplementedException("%s: Can't unregister a sub system on a non-virtual file system" , GetName()); |
371 | } |
372 | |
373 | vector<string> FileSystem::ListSubSystems() { |
374 | throw NotImplementedException("%s: Can't list sub systems on a non-virtual file system" , GetName()); |
375 | } |
376 | |
377 | bool FileSystem::CanHandleFile(const string &fpath) { |
378 | throw NotImplementedException("%s: CanHandleFile is not implemented!" , GetName()); |
379 | } |
380 | |
381 | vector<string> FileSystem::GlobFiles(const string &pattern, ClientContext &context, FileGlobOptions options) { |
382 | auto result = Glob(path: pattern); |
383 | if (result.empty()) { |
384 | string required_extension; |
385 | if (FileSystem::IsRemoteFile(path: pattern)) { |
386 | required_extension = "httpfs" ; |
387 | } |
388 | if (!required_extension.empty() && !context.db->ExtensionIsLoaded(name: required_extension)) { |
389 | // an extension is required to read this file but it is not loaded - try to load it |
390 | ExtensionHelper::LoadExternalExtension(context, extension: required_extension); |
391 | // success! glob again |
392 | // check the extension is loaded just in case to prevent an infinite loop here |
393 | if (!context.db->ExtensionIsLoaded(name: required_extension)) { |
394 | throw InternalException("Extension load \"%s\" did not throw but somehow the extension was not loaded" , |
395 | required_extension); |
396 | } |
397 | return GlobFiles(pattern, context, options); |
398 | } |
399 | if (options == FileGlobOptions::DISALLOW_EMPTY) { |
400 | throw IOException("No files found that match the pattern \"%s\"" , pattern); |
401 | } |
402 | } |
403 | return result; |
404 | } |
405 | |
406 | void FileSystem::Seek(FileHandle &handle, idx_t location) { |
407 | throw NotImplementedException("%s: Seek is not implemented!" , GetName()); |
408 | } |
409 | |
410 | void FileSystem::Reset(FileHandle &handle) { |
411 | handle.Seek(location: 0); |
412 | } |
413 | |
414 | idx_t FileSystem::SeekPosition(FileHandle &handle) { |
415 | throw NotImplementedException("%s: SeekPosition is not implemented!" , GetName()); |
416 | } |
417 | |
418 | bool FileSystem::CanSeek() { |
419 | throw NotImplementedException("%s: CanSeek is not implemented!" , GetName()); |
420 | } |
421 | |
422 | unique_ptr<FileHandle> FileSystem::OpenCompressedFile(unique_ptr<FileHandle> handle, bool write) { |
423 | throw NotImplementedException("%s: OpenCompressedFile is not implemented!" , GetName()); |
424 | } |
425 | |
426 | bool FileSystem::OnDiskFile(FileHandle &handle) { |
427 | throw NotImplementedException("%s: OnDiskFile is not implemented!" , GetName()); |
428 | } |
429 | // LCOV_EXCL_STOP |
430 | |
431 | FileHandle::FileHandle(FileSystem &file_system, string path_p) : file_system(file_system), path(std::move(path_p)) { |
432 | } |
433 | |
434 | FileHandle::~FileHandle() { |
435 | } |
436 | |
437 | int64_t FileHandle::Read(void *buffer, idx_t nr_bytes) { |
438 | return file_system.Read(handle&: *this, buffer, nr_bytes); |
439 | } |
440 | |
441 | int64_t FileHandle::Write(void *buffer, idx_t nr_bytes) { |
442 | return file_system.Write(handle&: *this, buffer, nr_bytes); |
443 | } |
444 | |
445 | void FileHandle::Read(void *buffer, idx_t nr_bytes, idx_t location) { |
446 | file_system.Read(handle&: *this, buffer, nr_bytes, location); |
447 | } |
448 | |
449 | void FileHandle::Write(void *buffer, idx_t nr_bytes, idx_t location) { |
450 | file_system.Write(handle&: *this, buffer, nr_bytes, location); |
451 | } |
452 | |
453 | void FileHandle::Seek(idx_t location) { |
454 | file_system.Seek(handle&: *this, location); |
455 | } |
456 | |
457 | void FileHandle::Reset() { |
458 | file_system.Reset(handle&: *this); |
459 | } |
460 | |
461 | idx_t FileHandle::SeekPosition() { |
462 | return file_system.SeekPosition(handle&: *this); |
463 | } |
464 | |
465 | bool FileHandle::CanSeek() { |
466 | return file_system.CanSeek(); |
467 | } |
468 | |
469 | string FileHandle::ReadLine() { |
470 | string result; |
471 | char buffer[1]; |
472 | while (true) { |
473 | idx_t tuples_read = Read(buffer, nr_bytes: 1); |
474 | if (tuples_read == 0 || buffer[0] == '\n') { |
475 | return result; |
476 | } |
477 | if (buffer[0] != '\r') { |
478 | result += buffer[0]; |
479 | } |
480 | } |
481 | } |
482 | |
483 | bool FileHandle::OnDiskFile() { |
484 | return file_system.OnDiskFile(handle&: *this); |
485 | } |
486 | |
487 | idx_t FileHandle::GetFileSize() { |
488 | return file_system.GetFileSize(handle&: *this); |
489 | } |
490 | |
491 | void FileHandle::Sync() { |
492 | file_system.FileSync(handle&: *this); |
493 | } |
494 | |
495 | void FileHandle::Truncate(int64_t new_size) { |
496 | file_system.Truncate(handle&: *this, new_size); |
497 | } |
498 | |
499 | FileType FileHandle::GetType() { |
500 | return file_system.GetFileType(handle&: *this); |
501 | } |
502 | |
503 | bool FileSystem::IsRemoteFile(const string &path) { |
504 | const string prefixes[] = {"http://" , "https://" , "s3://" }; |
505 | for (auto &prefix : prefixes) { |
506 | if (StringUtil::StartsWith(str: path, prefix)) { |
507 | return true; |
508 | } |
509 | } |
510 | return false; |
511 | } |
512 | |
513 | } // namespace duckdb |
514 | |