1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/common/file_system.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/common/constants.hpp"
12#include "duckdb/common/enums/file_compression_type.hpp"
13#include "duckdb/common/exception.hpp"
14#include "duckdb/common/file_buffer.hpp"
15#include "duckdb/common/unordered_map.hpp"
16#include "duckdb/common/vector.hpp"
17#include "duckdb/common/enums/file_glob_options.hpp"
18#include "duckdb/common/optional_ptr.hpp"
19#include <functional>
20
21#undef CreateDirectory
22#undef MoveFile
23#undef RemoveDirectory
24
25namespace duckdb {
26class AttachedDatabase;
27class ClientContext;
28class DatabaseInstance;
29class FileOpener;
30class FileSystem;
31
32enum class FileType {
33 //! Regular file
34 FILE_TYPE_REGULAR,
35 //! Directory
36 FILE_TYPE_DIR,
37 //! FIFO named pipe
38 FILE_TYPE_FIFO,
39 //! Socket
40 FILE_TYPE_SOCKET,
41 //! Symbolic link
42 FILE_TYPE_LINK,
43 //! Block device
44 FILE_TYPE_BLOCKDEV,
45 //! Character device
46 FILE_TYPE_CHARDEV,
47 //! Unknown or invalid file handle
48 FILE_TYPE_INVALID,
49};
50
51struct FileHandle {
52public:
53 DUCKDB_API FileHandle(FileSystem &file_system, string path);
54 FileHandle(const FileHandle &) = delete;
55 DUCKDB_API virtual ~FileHandle();
56
57 DUCKDB_API int64_t Read(void *buffer, idx_t nr_bytes);
58 DUCKDB_API int64_t Write(void *buffer, idx_t nr_bytes);
59 DUCKDB_API void Read(void *buffer, idx_t nr_bytes, idx_t location);
60 DUCKDB_API void Write(void *buffer, idx_t nr_bytes, idx_t location);
61 DUCKDB_API void Seek(idx_t location);
62 DUCKDB_API void Reset();
63 DUCKDB_API idx_t SeekPosition();
64 DUCKDB_API void Sync();
65 DUCKDB_API void Truncate(int64_t new_size);
66 DUCKDB_API string ReadLine();
67
68 DUCKDB_API bool CanSeek();
69 DUCKDB_API bool OnDiskFile();
70 DUCKDB_API idx_t GetFileSize();
71 DUCKDB_API FileType GetType();
72
73 //! Closes the file handle.
74 DUCKDB_API virtual void Close() = 0;
75
76 string GetPath() const {
77 return path;
78 }
79
80 template <class TARGET>
81 TARGET &Cast() {
82 D_ASSERT(dynamic_cast<TARGET *>(this));
83 return reinterpret_cast<TARGET &>(*this);
84 }
85 template <class TARGET>
86 const TARGET &Cast() const {
87 D_ASSERT(dynamic_cast<const TARGET *>(this));
88 return reinterpret_cast<const TARGET &>(*this);
89 }
90
91public:
92 FileSystem &file_system;
93 string path;
94};
95
96enum class FileLockType : uint8_t { NO_LOCK = 0, READ_LOCK = 1, WRITE_LOCK = 2 };
97
98class FileFlags {
99public:
100 //! Open file with read access
101 static constexpr uint8_t FILE_FLAGS_READ = 1 << 0;
102 //! Open file with write access
103 static constexpr uint8_t FILE_FLAGS_WRITE = 1 << 1;
104 //! Use direct IO when reading/writing to the file
105 static constexpr uint8_t FILE_FLAGS_DIRECT_IO = 1 << 2;
106 //! Create file if not exists, can only be used together with WRITE
107 static constexpr uint8_t FILE_FLAGS_FILE_CREATE = 1 << 3;
108 //! Always create a new file. If a file exists, the file is truncated. Cannot be used together with CREATE.
109 static constexpr uint8_t FILE_FLAGS_FILE_CREATE_NEW = 1 << 4;
110 //! Open file in append mode
111 static constexpr uint8_t FILE_FLAGS_APPEND = 1 << 5;
112};
113
114class FileSystem {
115public:
116 DUCKDB_API virtual ~FileSystem();
117
118public:
119 DUCKDB_API static constexpr FileLockType DEFAULT_LOCK = FileLockType::NO_LOCK;
120 DUCKDB_API static constexpr FileCompressionType DEFAULT_COMPRESSION = FileCompressionType::UNCOMPRESSED;
121 DUCKDB_API static FileSystem &GetFileSystem(ClientContext &context);
122 DUCKDB_API static FileSystem &GetFileSystem(DatabaseInstance &db);
123 DUCKDB_API static FileSystem &Get(AttachedDatabase &db);
124
125 DUCKDB_API virtual unique_ptr<FileHandle> OpenFile(const string &path, uint8_t flags,
126 FileLockType lock = DEFAULT_LOCK,
127 FileCompressionType compression = DEFAULT_COMPRESSION,
128 FileOpener *opener = nullptr);
129
130 //! Read exactly nr_bytes from the specified location in the file. Fails if nr_bytes could not be read. This is
131 //! equivalent to calling SetFilePointer(location) followed by calling Read().
132 DUCKDB_API virtual void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location);
133 //! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be written. This is
134 //! equivalent to calling SetFilePointer(location) followed by calling Write().
135 DUCKDB_API virtual void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location);
136 //! Read nr_bytes from the specified file into the buffer, moving the file pointer forward by nr_bytes. Returns the
137 //! amount of bytes read.
138 DUCKDB_API virtual int64_t Read(FileHandle &handle, void *buffer, int64_t nr_bytes);
139 //! Write nr_bytes from the buffer into the file, moving the file pointer forward by nr_bytes.
140 DUCKDB_API virtual int64_t Write(FileHandle &handle, void *buffer, int64_t nr_bytes);
141
142 //! Returns the file size of a file handle, returns -1 on error
143 DUCKDB_API virtual int64_t GetFileSize(FileHandle &handle);
144 //! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error
145 DUCKDB_API virtual time_t GetLastModifiedTime(FileHandle &handle);
146 //! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error
147 DUCKDB_API virtual FileType GetFileType(FileHandle &handle);
148 //! Truncate a file to a maximum size of new_size, new_size should be smaller than or equal to the current size of
149 //! the file
150 DUCKDB_API virtual void Truncate(FileHandle &handle, int64_t new_size);
151
152 //! Check if a directory exists
153 DUCKDB_API virtual bool DirectoryExists(const string &directory);
154 //! Create a directory if it does not exist
155 DUCKDB_API virtual void CreateDirectory(const string &directory);
156 //! Recursively remove a directory and all files in it
157 DUCKDB_API virtual void RemoveDirectory(const string &directory);
158 //! List files in a directory, invoking the callback method for each one with (filename, is_dir)
159 DUCKDB_API virtual bool ListFiles(const string &directory,
160 const std::function<void(const string &, bool)> &callback,
161 FileOpener *opener = nullptr);
162
163 //! Move a file from source path to the target, StorageManager relies on this being an atomic action for ACID
164 //! properties
165 DUCKDB_API virtual void MoveFile(const string &source, const string &target);
166 //! Check if a file exists
167 DUCKDB_API virtual bool FileExists(const string &filename);
168 //! Check if path is pipe
169 DUCKDB_API virtual bool IsPipe(const string &filename);
170 //! Remove a file from disk
171 DUCKDB_API virtual void RemoveFile(const string &filename);
172 //! Sync a file handle to disk
173 DUCKDB_API virtual void FileSync(FileHandle &handle);
174 //! Sets the working directory
175 DUCKDB_API static void SetWorkingDirectory(const string &path);
176 //! Gets the working directory
177 DUCKDB_API static string GetWorkingDirectory();
178 //! Gets the users home directory
179 DUCKDB_API static string GetHomeDirectory(optional_ptr<FileOpener> opener);
180 //! Gets the users home directory
181 DUCKDB_API virtual string GetHomeDirectory();
182 //! Expands a given path, including e.g. expanding the home directory of the user
183 DUCKDB_API static string ExpandPath(const string &path, optional_ptr<FileOpener> opener);
184 //! Expands a given path, including e.g. expanding the home directory of the user
185 DUCKDB_API virtual string ExpandPath(const string &path);
186 //! Returns the system-available memory in bytes. Returns DConstants::INVALID_INDEX if the system function fails.
187 DUCKDB_API static idx_t GetAvailableMemory();
188 //! Path separator for the current file system
189 DUCKDB_API static string PathSeparator();
190 //! Checks if path is starts with separator (i.e., '/' on UNIX '\\' on Windows)
191 DUCKDB_API static bool IsPathAbsolute(const string &path);
192 //! Normalize an absolute path - the goal of normalizing is converting "\test.db" and "C:/test.db" into "C:\test.db"
193 //! so that the database system cache can correctly
194 DUCKDB_API static string NormalizeAbsolutePath(const string &path);
195 //! Join two paths together
196 DUCKDB_API static string JoinPath(const string &a, const string &path);
197 //! Convert separators in a path to the local separators (e.g. convert "/" into \\ on windows)
198 DUCKDB_API static string ConvertSeparators(const string &path);
199 //! Extract the base name of a file (e.g. if the input is lib/example.dll the base name is 'example')
200 DUCKDB_API static string ExtractBaseName(const string &path);
201 //! Extract the name of a file (e.g if the input is lib/example.dll the name is 'example.dll')
202 DUCKDB_API static string ExtractName(const string &path);
203
204 //! Returns the value of an environment variable - or the empty string if it is not set
205 DUCKDB_API static string GetEnvVariable(const string &name);
206
207 //! Whether there is a glob in the string
208 DUCKDB_API static bool HasGlob(const string &str);
209 //! Runs a glob on the file system, returning a list of matching files
210 DUCKDB_API virtual vector<string> Glob(const string &path, FileOpener *opener = nullptr);
211 DUCKDB_API vector<string> GlobFiles(const string &path, ClientContext &context,
212 FileGlobOptions options = FileGlobOptions::DISALLOW_EMPTY);
213
214 //! registers a sub-file system to handle certain file name prefixes, e.g. http:// etc.
215 DUCKDB_API virtual void RegisterSubSystem(unique_ptr<FileSystem> sub_fs);
216 DUCKDB_API virtual void RegisterSubSystem(FileCompressionType compression_type, unique_ptr<FileSystem> fs);
217
218 //! Unregister a sub-filesystem by name
219 DUCKDB_API virtual void UnregisterSubSystem(const string &name);
220
221 //! List registered sub-filesystems, including builtin ones
222 DUCKDB_API virtual vector<string> ListSubSystems();
223
224 //! Whether or not a sub-system can handle a specific file path
225 DUCKDB_API virtual bool CanHandleFile(const string &fpath);
226
227 //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location
228 DUCKDB_API virtual void Seek(FileHandle &handle, idx_t location);
229 //! Reset a file to the beginning (equivalent to Seek(handle, 0) for simple files)
230 DUCKDB_API virtual void Reset(FileHandle &handle);
231 DUCKDB_API virtual idx_t SeekPosition(FileHandle &handle);
232
233 //! Whether or not we can seek into the file
234 DUCKDB_API virtual bool CanSeek();
235 //! Whether or not the FS handles plain files on disk. This is relevant for certain optimizations, as random reads
236 //! in a file on-disk are much cheaper than e.g. random reads in a file over the network
237 DUCKDB_API virtual bool OnDiskFile(FileHandle &handle);
238
239 DUCKDB_API virtual unique_ptr<FileHandle> OpenCompressedFile(unique_ptr<FileHandle> handle, bool write);
240
241 //! Create a LocalFileSystem.
242 DUCKDB_API static unique_ptr<FileSystem> CreateLocal();
243
244 //! Return the name of the filesytem. Used for forming diagnosis messages.
245 DUCKDB_API virtual std::string GetName() const = 0;
246
247 //! Whether or not a file is remote or local, based only on file path
248 DUCKDB_API static bool IsRemoteFile(const string &path);
249};
250
251} // namespace duckdb
252