1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/common/file_system.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "duckdb/common/constants.hpp" |
12 | #include "duckdb/common/enums/file_compression_type.hpp" |
13 | #include "duckdb/common/exception.hpp" |
14 | #include "duckdb/common/file_buffer.hpp" |
15 | #include "duckdb/common/unordered_map.hpp" |
16 | #include "duckdb/common/vector.hpp" |
17 | #include "duckdb/common/enums/file_glob_options.hpp" |
18 | #include "duckdb/common/optional_ptr.hpp" |
19 | #include <functional> |
20 | |
21 | #undef CreateDirectory |
22 | #undef MoveFile |
23 | #undef RemoveDirectory |
24 | |
25 | namespace duckdb { |
26 | class AttachedDatabase; |
27 | class ClientContext; |
28 | class DatabaseInstance; |
29 | class FileOpener; |
30 | class FileSystem; |
31 | |
32 | enum class FileType { |
33 | //! Regular file |
34 | FILE_TYPE_REGULAR, |
35 | //! Directory |
36 | FILE_TYPE_DIR, |
37 | //! FIFO named pipe |
38 | FILE_TYPE_FIFO, |
39 | //! Socket |
40 | FILE_TYPE_SOCKET, |
41 | //! Symbolic link |
42 | FILE_TYPE_LINK, |
43 | //! Block device |
44 | FILE_TYPE_BLOCKDEV, |
45 | //! Character device |
46 | FILE_TYPE_CHARDEV, |
47 | //! Unknown or invalid file handle |
48 | FILE_TYPE_INVALID, |
49 | }; |
50 | |
51 | struct FileHandle { |
52 | public: |
53 | DUCKDB_API FileHandle(FileSystem &file_system, string path); |
54 | FileHandle(const FileHandle &) = delete; |
55 | DUCKDB_API virtual ~FileHandle(); |
56 | |
57 | DUCKDB_API int64_t Read(void *buffer, idx_t nr_bytes); |
58 | DUCKDB_API int64_t Write(void *buffer, idx_t nr_bytes); |
59 | DUCKDB_API void Read(void *buffer, idx_t nr_bytes, idx_t location); |
60 | DUCKDB_API void Write(void *buffer, idx_t nr_bytes, idx_t location); |
61 | DUCKDB_API void Seek(idx_t location); |
62 | DUCKDB_API void Reset(); |
63 | DUCKDB_API idx_t SeekPosition(); |
64 | DUCKDB_API void Sync(); |
65 | DUCKDB_API void Truncate(int64_t new_size); |
66 | DUCKDB_API string ReadLine(); |
67 | |
68 | DUCKDB_API bool CanSeek(); |
69 | DUCKDB_API bool OnDiskFile(); |
70 | DUCKDB_API idx_t GetFileSize(); |
71 | DUCKDB_API FileType GetType(); |
72 | |
73 | //! Closes the file handle. |
74 | DUCKDB_API virtual void Close() = 0; |
75 | |
76 | string GetPath() const { |
77 | return path; |
78 | } |
79 | |
80 | template <class TARGET> |
81 | TARGET &Cast() { |
82 | D_ASSERT(dynamic_cast<TARGET *>(this)); |
83 | return reinterpret_cast<TARGET &>(*this); |
84 | } |
85 | template <class TARGET> |
86 | const TARGET &Cast() const { |
87 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
88 | return reinterpret_cast<const TARGET &>(*this); |
89 | } |
90 | |
91 | public: |
92 | FileSystem &file_system; |
93 | string path; |
94 | }; |
95 | |
96 | enum class FileLockType : uint8_t { NO_LOCK = 0, READ_LOCK = 1, WRITE_LOCK = 2 }; |
97 | |
98 | class FileFlags { |
99 | public: |
100 | //! Open file with read access |
101 | static constexpr uint8_t FILE_FLAGS_READ = 1 << 0; |
102 | //! Open file with write access |
103 | static constexpr uint8_t FILE_FLAGS_WRITE = 1 << 1; |
104 | //! Use direct IO when reading/writing to the file |
105 | static constexpr uint8_t FILE_FLAGS_DIRECT_IO = 1 << 2; |
106 | //! Create file if not exists, can only be used together with WRITE |
107 | static constexpr uint8_t FILE_FLAGS_FILE_CREATE = 1 << 3; |
108 | //! Always create a new file. If a file exists, the file is truncated. Cannot be used together with CREATE. |
109 | static constexpr uint8_t FILE_FLAGS_FILE_CREATE_NEW = 1 << 4; |
110 | //! Open file in append mode |
111 | static constexpr uint8_t FILE_FLAGS_APPEND = 1 << 5; |
112 | }; |
113 | |
114 | class FileSystem { |
115 | public: |
116 | DUCKDB_API virtual ~FileSystem(); |
117 | |
118 | public: |
119 | DUCKDB_API static constexpr FileLockType DEFAULT_LOCK = FileLockType::NO_LOCK; |
120 | DUCKDB_API static constexpr FileCompressionType DEFAULT_COMPRESSION = FileCompressionType::UNCOMPRESSED; |
121 | DUCKDB_API static FileSystem &GetFileSystem(ClientContext &context); |
122 | DUCKDB_API static FileSystem &GetFileSystem(DatabaseInstance &db); |
123 | DUCKDB_API static FileSystem &Get(AttachedDatabase &db); |
124 | |
125 | DUCKDB_API virtual unique_ptr<FileHandle> OpenFile(const string &path, uint8_t flags, |
126 | FileLockType lock = DEFAULT_LOCK, |
127 | FileCompressionType compression = DEFAULT_COMPRESSION, |
128 | FileOpener *opener = nullptr); |
129 | |
130 | //! Read exactly nr_bytes from the specified location in the file. Fails if nr_bytes could not be read. This is |
131 | //! equivalent to calling SetFilePointer(location) followed by calling Read(). |
132 | DUCKDB_API virtual void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location); |
133 | //! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be written. This is |
134 | //! equivalent to calling SetFilePointer(location) followed by calling Write(). |
135 | DUCKDB_API virtual void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location); |
136 | //! Read nr_bytes from the specified file into the buffer, moving the file pointer forward by nr_bytes. Returns the |
137 | //! amount of bytes read. |
138 | DUCKDB_API virtual int64_t Read(FileHandle &handle, void *buffer, int64_t nr_bytes); |
139 | //! Write nr_bytes from the buffer into the file, moving the file pointer forward by nr_bytes. |
140 | DUCKDB_API virtual int64_t Write(FileHandle &handle, void *buffer, int64_t nr_bytes); |
141 | |
142 | //! Returns the file size of a file handle, returns -1 on error |
143 | DUCKDB_API virtual int64_t GetFileSize(FileHandle &handle); |
144 | //! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error |
145 | DUCKDB_API virtual time_t GetLastModifiedTime(FileHandle &handle); |
146 | //! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error |
147 | DUCKDB_API virtual FileType GetFileType(FileHandle &handle); |
148 | //! Truncate a file to a maximum size of new_size, new_size should be smaller than or equal to the current size of |
149 | //! the file |
150 | DUCKDB_API virtual void Truncate(FileHandle &handle, int64_t new_size); |
151 | |
152 | //! Check if a directory exists |
153 | DUCKDB_API virtual bool DirectoryExists(const string &directory); |
154 | //! Create a directory if it does not exist |
155 | DUCKDB_API virtual void CreateDirectory(const string &directory); |
156 | //! Recursively remove a directory and all files in it |
157 | DUCKDB_API virtual void RemoveDirectory(const string &directory); |
158 | //! List files in a directory, invoking the callback method for each one with (filename, is_dir) |
159 | DUCKDB_API virtual bool ListFiles(const string &directory, |
160 | const std::function<void(const string &, bool)> &callback, |
161 | FileOpener *opener = nullptr); |
162 | |
163 | //! Move a file from source path to the target, StorageManager relies on this being an atomic action for ACID |
164 | //! properties |
165 | DUCKDB_API virtual void MoveFile(const string &source, const string &target); |
166 | //! Check if a file exists |
167 | DUCKDB_API virtual bool FileExists(const string &filename); |
168 | //! Check if path is pipe |
169 | DUCKDB_API virtual bool IsPipe(const string &filename); |
170 | //! Remove a file from disk |
171 | DUCKDB_API virtual void RemoveFile(const string &filename); |
172 | //! Sync a file handle to disk |
173 | DUCKDB_API virtual void FileSync(FileHandle &handle); |
174 | //! Sets the working directory |
175 | DUCKDB_API static void SetWorkingDirectory(const string &path); |
176 | //! Gets the working directory |
177 | DUCKDB_API static string GetWorkingDirectory(); |
178 | //! Gets the users home directory |
179 | DUCKDB_API static string GetHomeDirectory(optional_ptr<FileOpener> opener); |
180 | //! Gets the users home directory |
181 | DUCKDB_API virtual string GetHomeDirectory(); |
182 | //! Expands a given path, including e.g. expanding the home directory of the user |
183 | DUCKDB_API static string ExpandPath(const string &path, optional_ptr<FileOpener> opener); |
184 | //! Expands a given path, including e.g. expanding the home directory of the user |
185 | DUCKDB_API virtual string ExpandPath(const string &path); |
186 | //! Returns the system-available memory in bytes. Returns DConstants::INVALID_INDEX if the system function fails. |
187 | DUCKDB_API static idx_t GetAvailableMemory(); |
188 | //! Path separator for the current file system |
189 | DUCKDB_API static string PathSeparator(); |
190 | //! Checks if path is starts with separator (i.e., '/' on UNIX '\\' on Windows) |
191 | DUCKDB_API static bool IsPathAbsolute(const string &path); |
192 | //! Normalize an absolute path - the goal of normalizing is converting "\test.db" and "C:/test.db" into "C:\test.db" |
193 | //! so that the database system cache can correctly |
194 | DUCKDB_API static string NormalizeAbsolutePath(const string &path); |
195 | //! Join two paths together |
196 | DUCKDB_API static string JoinPath(const string &a, const string &path); |
197 | //! Convert separators in a path to the local separators (e.g. convert "/" into \\ on windows) |
198 | DUCKDB_API static string ConvertSeparators(const string &path); |
199 | //! Extract the base name of a file (e.g. if the input is lib/example.dll the base name is 'example') |
200 | DUCKDB_API static string (const string &path); |
201 | //! Extract the name of a file (e.g if the input is lib/example.dll the name is 'example.dll') |
202 | DUCKDB_API static string (const string &path); |
203 | |
204 | //! Returns the value of an environment variable - or the empty string if it is not set |
205 | DUCKDB_API static string GetEnvVariable(const string &name); |
206 | |
207 | //! Whether there is a glob in the string |
208 | DUCKDB_API static bool HasGlob(const string &str); |
209 | //! Runs a glob on the file system, returning a list of matching files |
210 | DUCKDB_API virtual vector<string> Glob(const string &path, FileOpener *opener = nullptr); |
211 | DUCKDB_API vector<string> GlobFiles(const string &path, ClientContext &context, |
212 | FileGlobOptions options = FileGlobOptions::DISALLOW_EMPTY); |
213 | |
214 | //! registers a sub-file system to handle certain file name prefixes, e.g. http:// etc. |
215 | DUCKDB_API virtual void RegisterSubSystem(unique_ptr<FileSystem> sub_fs); |
216 | DUCKDB_API virtual void RegisterSubSystem(FileCompressionType compression_type, unique_ptr<FileSystem> fs); |
217 | |
218 | //! Unregister a sub-filesystem by name |
219 | DUCKDB_API virtual void UnregisterSubSystem(const string &name); |
220 | |
221 | //! List registered sub-filesystems, including builtin ones |
222 | DUCKDB_API virtual vector<string> ListSubSystems(); |
223 | |
224 | //! Whether or not a sub-system can handle a specific file path |
225 | DUCKDB_API virtual bool CanHandleFile(const string &fpath); |
226 | |
227 | //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location |
228 | DUCKDB_API virtual void Seek(FileHandle &handle, idx_t location); |
229 | //! Reset a file to the beginning (equivalent to Seek(handle, 0) for simple files) |
230 | DUCKDB_API virtual void Reset(FileHandle &handle); |
231 | DUCKDB_API virtual idx_t SeekPosition(FileHandle &handle); |
232 | |
233 | //! Whether or not we can seek into the file |
234 | DUCKDB_API virtual bool CanSeek(); |
235 | //! Whether or not the FS handles plain files on disk. This is relevant for certain optimizations, as random reads |
236 | //! in a file on-disk are much cheaper than e.g. random reads in a file over the network |
237 | DUCKDB_API virtual bool OnDiskFile(FileHandle &handle); |
238 | |
239 | DUCKDB_API virtual unique_ptr<FileHandle> OpenCompressedFile(unique_ptr<FileHandle> handle, bool write); |
240 | |
241 | //! Create a LocalFileSystem. |
242 | DUCKDB_API static unique_ptr<FileSystem> CreateLocal(); |
243 | |
244 | //! Return the name of the filesytem. Used for forming diagnosis messages. |
245 | DUCKDB_API virtual std::string GetName() const = 0; |
246 | |
247 | //! Whether or not a file is remote or local, based only on file path |
248 | DUCKDB_API static bool IsRemoteFile(const string &path); |
249 | }; |
250 | |
251 | } // namespace duckdb |
252 | |