1#include "duckdb/common/file_system.hpp"
2
3#include "duckdb/common/checksum.hpp"
4#include "duckdb/common/exception.hpp"
5#include "duckdb/common/file_opener.hpp"
6#include "duckdb/common/helper.hpp"
7#include "duckdb/common/string_util.hpp"
8#include "duckdb/common/windows.hpp"
9#include "duckdb/function/scalar/string_functions.hpp"
10#include "duckdb/main/client_context.hpp"
11#include "duckdb/main/client_data.hpp"
12#include "duckdb/main/database.hpp"
13#include "duckdb/main/extension_helper.hpp"
14#include "duckdb/common/windows_util.hpp"
15
16#include <cstdint>
17#include <cstdio>
18
19#ifndef _WIN32
20#include <dirent.h>
21#include <fcntl.h>
22#include <string.h>
23#include <sys/stat.h>
24#include <sys/types.h>
25#include <unistd.h>
26#else
27#include <string>
28#include <sysinfoapi.h>
29
30#ifdef __MINGW32__
31// need to manually define this for mingw
32extern "C" WINBASEAPI BOOL WINAPI GetPhysicallyInstalledSystemMemory(PULONGLONG);
33#endif
34
35#undef FILE_CREATE // woo mingw
36#endif
37
38namespace duckdb {
39
40FileSystem::~FileSystem() {
41}
42
43FileSystem &FileSystem::GetFileSystem(ClientContext &context) {
44 auto &client_data = ClientData::Get(context);
45 return *client_data.client_file_system;
46}
47
48bool PathMatched(const string &path, const string &sub_path) {
49 if (path.rfind(str: sub_path, pos: 0) == 0) {
50 return true;
51 }
52 return false;
53}
54
55#ifndef _WIN32
56
57string FileSystem::GetEnvVariable(const string &name) {
58 const char *env = getenv(name: name.c_str());
59 if (!env) {
60 return string();
61 }
62 return env;
63}
64
65bool FileSystem::IsPathAbsolute(const string &path) {
66 auto path_separator = FileSystem::PathSeparator();
67 return PathMatched(path, sub_path: path_separator);
68}
69
70string FileSystem::PathSeparator() {
71 return "/";
72}
73
74void FileSystem::SetWorkingDirectory(const string &path) {
75 if (chdir(path: path.c_str()) != 0) {
76 throw IOException("Could not change working directory!");
77 }
78}
79
80idx_t FileSystem::GetAvailableMemory() {
81 errno = 0;
82 idx_t max_memory = MinValue<idx_t>(a: (idx_t)sysconf(_SC_PHYS_PAGES) * (idx_t)sysconf(_SC_PAGESIZE), UINTPTR_MAX);
83 if (errno != 0) {
84 return DConstants::INVALID_INDEX;
85 }
86 return max_memory;
87}
88
89string FileSystem::GetWorkingDirectory() {
90 auto buffer = make_unsafe_uniq_array<char>(PATH_MAX);
91 char *ret = getcwd(buf: buffer.get(), PATH_MAX);
92 if (!ret) {
93 throw IOException("Could not get working directory!");
94 }
95 return string(buffer.get());
96}
97
98string FileSystem::NormalizeAbsolutePath(const string &path) {
99 D_ASSERT(IsPathAbsolute(path));
100 return path;
101}
102
103#else
104
105string FileSystem::GetEnvVariable(const string &env) {
106 // first convert the environment variable name to the correct encoding
107 auto env_w = WindowsUtil::UTF8ToUnicode(env.c_str());
108 // use _wgetenv to get the value
109 auto res_w = _wgetenv(env_w.c_str());
110 if (!res_w) {
111 // no environment variable of this name found
112 return string();
113 }
114 return WindowsUtil::UnicodeToUTF8(res_w);
115}
116
117static bool StartsWithSingleBackslash(const string &path) {
118 if (path.size() < 2) {
119 return false;
120 }
121 if (path[0] != '/' && path[0] != '\\') {
122 return false;
123 }
124 if (path[1] == '/' || path[1] == '\\') {
125 return false;
126 }
127 return true;
128}
129
130bool FileSystem::IsPathAbsolute(const string &path) {
131 // 1) A single backslash or forward-slash
132 if (StartsWithSingleBackslash(path)) {
133 return true;
134 }
135 // 2) A disk designator with a backslash (e.g., C:\ or C:/)
136 auto path_aux = path;
137 path_aux.erase(0, 1);
138 if (PathMatched(path_aux, ":\\") || PathMatched(path_aux, ":/")) {
139 return true;
140 }
141 return false;
142}
143
144string FileSystem::NormalizeAbsolutePath(const string &path) {
145 D_ASSERT(IsPathAbsolute(path));
146 auto result = StringUtil::Lower(FileSystem::ConvertSeparators(path));
147 if (StartsWithSingleBackslash(result)) {
148 // Path starts with a single backslash or forward slash
149 // prepend drive letter
150 return GetWorkingDirectory().substr(0, 2) + result;
151 }
152 return result;
153}
154
155string FileSystem::PathSeparator() {
156 return "\\";
157}
158
159void FileSystem::SetWorkingDirectory(const string &path) {
160 auto unicode_path = WindowsUtil::UTF8ToUnicode(path.c_str());
161 if (!SetCurrentDirectoryW(unicode_path.c_str())) {
162 throw IOException("Could not change working directory to \"%s\"", path);
163 }
164}
165
166idx_t FileSystem::GetAvailableMemory() {
167 ULONGLONG available_memory_kb;
168 if (GetPhysicallyInstalledSystemMemory(&available_memory_kb)) {
169 return MinValue<idx_t>(available_memory_kb * 1000, UINTPTR_MAX);
170 }
171 // fallback: try GlobalMemoryStatusEx
172 MEMORYSTATUSEX mem_state;
173 mem_state.dwLength = sizeof(MEMORYSTATUSEX);
174
175 if (GlobalMemoryStatusEx(&mem_state)) {
176 return MinValue<idx_t>(mem_state.ullTotalPhys, UINTPTR_MAX);
177 }
178 return DConstants::INVALID_INDEX;
179}
180
181string FileSystem::GetWorkingDirectory() {
182 idx_t count = GetCurrentDirectoryW(0, nullptr);
183 if (count == 0) {
184 throw IOException("Could not get working directory!");
185 }
186 auto buffer = make_unsafe_uniq_array<wchar_t>(count);
187 idx_t ret = GetCurrentDirectoryW(count, buffer.get());
188 if (count != ret + 1) {
189 throw IOException("Could not get working directory!");
190 }
191 return WindowsUtil::UnicodeToUTF8(buffer.get());
192}
193
194#endif
195
196string FileSystem::JoinPath(const string &a, const string &b) {
197 // FIXME: sanitize paths
198 return a + PathSeparator() + b;
199}
200
201string FileSystem::ConvertSeparators(const string &path) {
202 auto separator_str = PathSeparator();
203 char separator = separator_str[0];
204 if (separator == '/') {
205 // on unix-based systems we only accept / as a separator
206 return path;
207 }
208 // on windows-based systems we accept both
209 return StringUtil::Replace(source: path, from: "/", to: separator_str);
210}
211
212string FileSystem::ExtractName(const string &path) {
213 if (path.empty()) {
214 return string();
215 }
216 auto normalized_path = ConvertSeparators(path);
217 auto sep = PathSeparator();
218 auto splits = StringUtil::Split(input: normalized_path, split: sep);
219 D_ASSERT(!splits.empty());
220 return splits.back();
221}
222
223string FileSystem::ExtractBaseName(const string &path) {
224 if (path.empty()) {
225 return string();
226 }
227 auto vec = StringUtil::Split(input: ExtractName(path), split: ".");
228 D_ASSERT(!vec.empty());
229 return vec[0];
230}
231
232string FileSystem::GetHomeDirectory(optional_ptr<FileOpener> opener) {
233 // read the home_directory setting first, if it is set
234 if (opener) {
235 Value result;
236 if (opener->TryGetCurrentSetting(key: "home_directory", result)) {
237 if (!result.IsNull() && !result.ToString().empty()) {
238 return result.ToString();
239 }
240 }
241 }
242 // fallback to the default home directories for the specified system
243#ifdef DUCKDB_WINDOWS
244 return FileSystem::GetEnvVariable("USERPROFILE");
245#else
246 return FileSystem::GetEnvVariable(name: "HOME");
247#endif
248}
249
250string FileSystem::GetHomeDirectory() {
251 return GetHomeDirectory(opener: nullptr);
252}
253
254string FileSystem::ExpandPath(const string &path, optional_ptr<FileOpener> opener) {
255 if (path.empty()) {
256 return path;
257 }
258 if (path[0] == '~') {
259 return GetHomeDirectory(opener) + path.substr(pos: 1);
260 }
261 return path;
262}
263
264string FileSystem::ExpandPath(const string &path) {
265 return FileSystem::ExpandPath(path, opener: nullptr);
266}
267
268// LCOV_EXCL_START
269unique_ptr<FileHandle> FileSystem::OpenFile(const string &path, uint8_t flags, FileLockType lock,
270 FileCompressionType compression, FileOpener *opener) {
271 throw NotImplementedException("%s: OpenFile is not implemented!", GetName());
272}
273
274void FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) {
275 throw NotImplementedException("%s: Read (with location) is not implemented!", GetName());
276}
277
278void FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) {
279 throw NotImplementedException("%s: Write (with location) is not implemented!", GetName());
280}
281
282int64_t FileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) {
283 throw NotImplementedException("%s: Read is not implemented!", GetName());
284}
285
286int64_t FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) {
287 throw NotImplementedException("%s: Write is not implemented!", GetName());
288}
289
290int64_t FileSystem::GetFileSize(FileHandle &handle) {
291 throw NotImplementedException("%s: GetFileSize is not implemented!", GetName());
292}
293
294time_t FileSystem::GetLastModifiedTime(FileHandle &handle) {
295 throw NotImplementedException("%s: GetLastModifiedTime is not implemented!", GetName());
296}
297
298FileType FileSystem::GetFileType(FileHandle &handle) {
299 return FileType::FILE_TYPE_INVALID;
300}
301
302void FileSystem::Truncate(FileHandle &handle, int64_t new_size) {
303 throw NotImplementedException("%s: Truncate is not implemented!", GetName());
304}
305
306bool FileSystem::DirectoryExists(const string &directory) {
307 throw NotImplementedException("%s: DirectoryExists is not implemented!", GetName());
308}
309
310void FileSystem::CreateDirectory(const string &directory) {
311 throw NotImplementedException("%s: CreateDirectory is not implemented!", GetName());
312}
313
314void FileSystem::RemoveDirectory(const string &directory) {
315 throw NotImplementedException("%s: RemoveDirectory is not implemented!", GetName());
316}
317
318bool FileSystem::ListFiles(const string &directory, const std::function<void(const string &, bool)> &callback,
319 FileOpener *opener) {
320 throw NotImplementedException("%s: ListFiles is not implemented!", GetName());
321}
322
323void FileSystem::MoveFile(const string &source, const string &target) {
324 throw NotImplementedException("%s: MoveFile is not implemented!", GetName());
325}
326
327bool FileSystem::FileExists(const string &filename) {
328 throw NotImplementedException("%s: FileExists is not implemented!", GetName());
329}
330
331bool FileSystem::IsPipe(const string &filename) {
332 throw NotImplementedException("%s: IsPipe is not implemented!", GetName());
333}
334
335void FileSystem::RemoveFile(const string &filename) {
336 throw NotImplementedException("%s: RemoveFile is not implemented!", GetName());
337}
338
339void FileSystem::FileSync(FileHandle &handle) {
340 throw NotImplementedException("%s: FileSync is not implemented!", GetName());
341}
342
343bool FileSystem::HasGlob(const string &str) {
344 for (idx_t i = 0; i < str.size(); i++) {
345 switch (str[i]) {
346 case '*':
347 case '?':
348 case '[':
349 return true;
350 default:
351 break;
352 }
353 }
354 return false;
355}
356
357vector<string> FileSystem::Glob(const string &path, FileOpener *opener) {
358 throw NotImplementedException("%s: Glob is not implemented!", GetName());
359}
360
361void FileSystem::RegisterSubSystem(unique_ptr<FileSystem> sub_fs) {
362 throw NotImplementedException("%s: Can't register a sub system on a non-virtual file system", GetName());
363}
364
365void FileSystem::RegisterSubSystem(FileCompressionType compression_type, unique_ptr<FileSystem> sub_fs) {
366 throw NotImplementedException("%s: Can't register a sub system on a non-virtual file system", GetName());
367}
368
369void FileSystem::UnregisterSubSystem(const string &name) {
370 throw NotImplementedException("%s: Can't unregister a sub system on a non-virtual file system", GetName());
371}
372
373vector<string> FileSystem::ListSubSystems() {
374 throw NotImplementedException("%s: Can't list sub systems on a non-virtual file system", GetName());
375}
376
377bool FileSystem::CanHandleFile(const string &fpath) {
378 throw NotImplementedException("%s: CanHandleFile is not implemented!", GetName());
379}
380
381vector<string> FileSystem::GlobFiles(const string &pattern, ClientContext &context, FileGlobOptions options) {
382 auto result = Glob(path: pattern);
383 if (result.empty()) {
384 string required_extension;
385 if (FileSystem::IsRemoteFile(path: pattern)) {
386 required_extension = "httpfs";
387 }
388 if (!required_extension.empty() && !context.db->ExtensionIsLoaded(name: required_extension)) {
389 // an extension is required to read this file but it is not loaded - try to load it
390 ExtensionHelper::LoadExternalExtension(context, extension: required_extension);
391 // success! glob again
392 // check the extension is loaded just in case to prevent an infinite loop here
393 if (!context.db->ExtensionIsLoaded(name: required_extension)) {
394 throw InternalException("Extension load \"%s\" did not throw but somehow the extension was not loaded",
395 required_extension);
396 }
397 return GlobFiles(pattern, context, options);
398 }
399 if (options == FileGlobOptions::DISALLOW_EMPTY) {
400 throw IOException("No files found that match the pattern \"%s\"", pattern);
401 }
402 }
403 return result;
404}
405
406void FileSystem::Seek(FileHandle &handle, idx_t location) {
407 throw NotImplementedException("%s: Seek is not implemented!", GetName());
408}
409
410void FileSystem::Reset(FileHandle &handle) {
411 handle.Seek(location: 0);
412}
413
414idx_t FileSystem::SeekPosition(FileHandle &handle) {
415 throw NotImplementedException("%s: SeekPosition is not implemented!", GetName());
416}
417
418bool FileSystem::CanSeek() {
419 throw NotImplementedException("%s: CanSeek is not implemented!", GetName());
420}
421
422unique_ptr<FileHandle> FileSystem::OpenCompressedFile(unique_ptr<FileHandle> handle, bool write) {
423 throw NotImplementedException("%s: OpenCompressedFile is not implemented!", GetName());
424}
425
426bool FileSystem::OnDiskFile(FileHandle &handle) {
427 throw NotImplementedException("%s: OnDiskFile is not implemented!", GetName());
428}
429// LCOV_EXCL_STOP
430
431FileHandle::FileHandle(FileSystem &file_system, string path_p) : file_system(file_system), path(std::move(path_p)) {
432}
433
434FileHandle::~FileHandle() {
435}
436
437int64_t FileHandle::Read(void *buffer, idx_t nr_bytes) {
438 return file_system.Read(handle&: *this, buffer, nr_bytes);
439}
440
441int64_t FileHandle::Write(void *buffer, idx_t nr_bytes) {
442 return file_system.Write(handle&: *this, buffer, nr_bytes);
443}
444
445void FileHandle::Read(void *buffer, idx_t nr_bytes, idx_t location) {
446 file_system.Read(handle&: *this, buffer, nr_bytes, location);
447}
448
449void FileHandle::Write(void *buffer, idx_t nr_bytes, idx_t location) {
450 file_system.Write(handle&: *this, buffer, nr_bytes, location);
451}
452
453void FileHandle::Seek(idx_t location) {
454 file_system.Seek(handle&: *this, location);
455}
456
457void FileHandle::Reset() {
458 file_system.Reset(handle&: *this);
459}
460
461idx_t FileHandle::SeekPosition() {
462 return file_system.SeekPosition(handle&: *this);
463}
464
465bool FileHandle::CanSeek() {
466 return file_system.CanSeek();
467}
468
469string FileHandle::ReadLine() {
470 string result;
471 char buffer[1];
472 while (true) {
473 idx_t tuples_read = Read(buffer, nr_bytes: 1);
474 if (tuples_read == 0 || buffer[0] == '\n') {
475 return result;
476 }
477 if (buffer[0] != '\r') {
478 result += buffer[0];
479 }
480 }
481}
482
483bool FileHandle::OnDiskFile() {
484 return file_system.OnDiskFile(handle&: *this);
485}
486
487idx_t FileHandle::GetFileSize() {
488 return file_system.GetFileSize(handle&: *this);
489}
490
491void FileHandle::Sync() {
492 file_system.FileSync(handle&: *this);
493}
494
495void FileHandle::Truncate(int64_t new_size) {
496 file_system.Truncate(handle&: *this, new_size);
497}
498
499FileType FileHandle::GetType() {
500 return file_system.GetFileType(handle&: *this);
501}
502
503bool FileSystem::IsRemoteFile(const string &path) {
504 const string prefixes[] = {"http://", "https://", "s3://"};
505 for (auto &prefix : prefixes) {
506 if (StringUtil::StartsWith(str: path, prefix)) {
507 return true;
508 }
509 }
510 return false;
511}
512
513} // namespace duckdb
514