| 1 | //===----------------------------------------------------------------------===// |
| 2 | // DuckDB |
| 3 | // |
| 4 | // duckdb/common/file_system.hpp |
| 5 | // |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #include "duckdb/common/constants.hpp" |
| 12 | #include "duckdb/common/enums/file_compression_type.hpp" |
| 13 | #include "duckdb/common/exception.hpp" |
| 14 | #include "duckdb/common/file_buffer.hpp" |
| 15 | #include "duckdb/common/unordered_map.hpp" |
| 16 | #include "duckdb/common/vector.hpp" |
| 17 | #include "duckdb/common/enums/file_glob_options.hpp" |
| 18 | #include "duckdb/common/optional_ptr.hpp" |
| 19 | #include <functional> |
| 20 | |
| 21 | #undef CreateDirectory |
| 22 | #undef MoveFile |
| 23 | #undef RemoveDirectory |
| 24 | |
| 25 | namespace duckdb { |
| 26 | class AttachedDatabase; |
| 27 | class ClientContext; |
| 28 | class DatabaseInstance; |
| 29 | class FileOpener; |
| 30 | class FileSystem; |
| 31 | |
| 32 | enum class FileType { |
| 33 | //! Regular file |
| 34 | FILE_TYPE_REGULAR, |
| 35 | //! Directory |
| 36 | FILE_TYPE_DIR, |
| 37 | //! FIFO named pipe |
| 38 | FILE_TYPE_FIFO, |
| 39 | //! Socket |
| 40 | FILE_TYPE_SOCKET, |
| 41 | //! Symbolic link |
| 42 | FILE_TYPE_LINK, |
| 43 | //! Block device |
| 44 | FILE_TYPE_BLOCKDEV, |
| 45 | //! Character device |
| 46 | FILE_TYPE_CHARDEV, |
| 47 | //! Unknown or invalid file handle |
| 48 | FILE_TYPE_INVALID, |
| 49 | }; |
| 50 | |
| 51 | struct FileHandle { |
| 52 | public: |
| 53 | DUCKDB_API FileHandle(FileSystem &file_system, string path); |
| 54 | FileHandle(const FileHandle &) = delete; |
| 55 | DUCKDB_API virtual ~FileHandle(); |
| 56 | |
| 57 | DUCKDB_API int64_t Read(void *buffer, idx_t nr_bytes); |
| 58 | DUCKDB_API int64_t Write(void *buffer, idx_t nr_bytes); |
| 59 | DUCKDB_API void Read(void *buffer, idx_t nr_bytes, idx_t location); |
| 60 | DUCKDB_API void Write(void *buffer, idx_t nr_bytes, idx_t location); |
| 61 | DUCKDB_API void Seek(idx_t location); |
| 62 | DUCKDB_API void Reset(); |
| 63 | DUCKDB_API idx_t SeekPosition(); |
| 64 | DUCKDB_API void Sync(); |
| 65 | DUCKDB_API void Truncate(int64_t new_size); |
| 66 | DUCKDB_API string ReadLine(); |
| 67 | |
| 68 | DUCKDB_API bool CanSeek(); |
| 69 | DUCKDB_API bool OnDiskFile(); |
| 70 | DUCKDB_API idx_t GetFileSize(); |
| 71 | DUCKDB_API FileType GetType(); |
| 72 | |
| 73 | //! Closes the file handle. |
| 74 | DUCKDB_API virtual void Close() = 0; |
| 75 | |
| 76 | string GetPath() const { |
| 77 | return path; |
| 78 | } |
| 79 | |
| 80 | template <class TARGET> |
| 81 | TARGET &Cast() { |
| 82 | D_ASSERT(dynamic_cast<TARGET *>(this)); |
| 83 | return reinterpret_cast<TARGET &>(*this); |
| 84 | } |
| 85 | template <class TARGET> |
| 86 | const TARGET &Cast() const { |
| 87 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
| 88 | return reinterpret_cast<const TARGET &>(*this); |
| 89 | } |
| 90 | |
| 91 | public: |
| 92 | FileSystem &file_system; |
| 93 | string path; |
| 94 | }; |
| 95 | |
| 96 | enum class FileLockType : uint8_t { NO_LOCK = 0, READ_LOCK = 1, WRITE_LOCK = 2 }; |
| 97 | |
| 98 | class FileFlags { |
| 99 | public: |
| 100 | //! Open file with read access |
| 101 | static constexpr uint8_t FILE_FLAGS_READ = 1 << 0; |
| 102 | //! Open file with write access |
| 103 | static constexpr uint8_t FILE_FLAGS_WRITE = 1 << 1; |
| 104 | //! Use direct IO when reading/writing to the file |
| 105 | static constexpr uint8_t FILE_FLAGS_DIRECT_IO = 1 << 2; |
| 106 | //! Create file if not exists, can only be used together with WRITE |
| 107 | static constexpr uint8_t FILE_FLAGS_FILE_CREATE = 1 << 3; |
| 108 | //! Always create a new file. If a file exists, the file is truncated. Cannot be used together with CREATE. |
| 109 | static constexpr uint8_t FILE_FLAGS_FILE_CREATE_NEW = 1 << 4; |
| 110 | //! Open file in append mode |
| 111 | static constexpr uint8_t FILE_FLAGS_APPEND = 1 << 5; |
| 112 | }; |
| 113 | |
| 114 | class FileSystem { |
| 115 | public: |
| 116 | DUCKDB_API virtual ~FileSystem(); |
| 117 | |
| 118 | public: |
| 119 | DUCKDB_API static constexpr FileLockType DEFAULT_LOCK = FileLockType::NO_LOCK; |
| 120 | DUCKDB_API static constexpr FileCompressionType DEFAULT_COMPRESSION = FileCompressionType::UNCOMPRESSED; |
| 121 | DUCKDB_API static FileSystem &GetFileSystem(ClientContext &context); |
| 122 | DUCKDB_API static FileSystem &GetFileSystem(DatabaseInstance &db); |
| 123 | DUCKDB_API static FileSystem &Get(AttachedDatabase &db); |
| 124 | |
| 125 | DUCKDB_API virtual unique_ptr<FileHandle> OpenFile(const string &path, uint8_t flags, |
| 126 | FileLockType lock = DEFAULT_LOCK, |
| 127 | FileCompressionType compression = DEFAULT_COMPRESSION, |
| 128 | FileOpener *opener = nullptr); |
| 129 | |
| 130 | //! Read exactly nr_bytes from the specified location in the file. Fails if nr_bytes could not be read. This is |
| 131 | //! equivalent to calling SetFilePointer(location) followed by calling Read(). |
| 132 | DUCKDB_API virtual void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location); |
| 133 | //! Write exactly nr_bytes to the specified location in the file. Fails if nr_bytes could not be written. This is |
| 134 | //! equivalent to calling SetFilePointer(location) followed by calling Write(). |
| 135 | DUCKDB_API virtual void Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location); |
| 136 | //! Read nr_bytes from the specified file into the buffer, moving the file pointer forward by nr_bytes. Returns the |
| 137 | //! amount of bytes read. |
| 138 | DUCKDB_API virtual int64_t Read(FileHandle &handle, void *buffer, int64_t nr_bytes); |
| 139 | //! Write nr_bytes from the buffer into the file, moving the file pointer forward by nr_bytes. |
| 140 | DUCKDB_API virtual int64_t Write(FileHandle &handle, void *buffer, int64_t nr_bytes); |
| 141 | |
| 142 | //! Returns the file size of a file handle, returns -1 on error |
| 143 | DUCKDB_API virtual int64_t GetFileSize(FileHandle &handle); |
| 144 | //! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error |
| 145 | DUCKDB_API virtual time_t GetLastModifiedTime(FileHandle &handle); |
| 146 | //! Returns the file last modified time of a file handle, returns timespec with zero on all attributes on error |
| 147 | DUCKDB_API virtual FileType GetFileType(FileHandle &handle); |
| 148 | //! Truncate a file to a maximum size of new_size, new_size should be smaller than or equal to the current size of |
| 149 | //! the file |
| 150 | DUCKDB_API virtual void Truncate(FileHandle &handle, int64_t new_size); |
| 151 | |
| 152 | //! Check if a directory exists |
| 153 | DUCKDB_API virtual bool DirectoryExists(const string &directory); |
| 154 | //! Create a directory if it does not exist |
| 155 | DUCKDB_API virtual void CreateDirectory(const string &directory); |
| 156 | //! Recursively remove a directory and all files in it |
| 157 | DUCKDB_API virtual void RemoveDirectory(const string &directory); |
| 158 | //! List files in a directory, invoking the callback method for each one with (filename, is_dir) |
| 159 | DUCKDB_API virtual bool ListFiles(const string &directory, |
| 160 | const std::function<void(const string &, bool)> &callback, |
| 161 | FileOpener *opener = nullptr); |
| 162 | |
| 163 | //! Move a file from source path to the target, StorageManager relies on this being an atomic action for ACID |
| 164 | //! properties |
| 165 | DUCKDB_API virtual void MoveFile(const string &source, const string &target); |
| 166 | //! Check if a file exists |
| 167 | DUCKDB_API virtual bool FileExists(const string &filename); |
| 168 | //! Check if path is pipe |
| 169 | DUCKDB_API virtual bool IsPipe(const string &filename); |
| 170 | //! Remove a file from disk |
| 171 | DUCKDB_API virtual void RemoveFile(const string &filename); |
| 172 | //! Sync a file handle to disk |
| 173 | DUCKDB_API virtual void FileSync(FileHandle &handle); |
| 174 | //! Sets the working directory |
| 175 | DUCKDB_API static void SetWorkingDirectory(const string &path); |
| 176 | //! Gets the working directory |
| 177 | DUCKDB_API static string GetWorkingDirectory(); |
| 178 | //! Gets the users home directory |
| 179 | DUCKDB_API static string GetHomeDirectory(optional_ptr<FileOpener> opener); |
| 180 | //! Gets the users home directory |
| 181 | DUCKDB_API virtual string GetHomeDirectory(); |
| 182 | //! Expands a given path, including e.g. expanding the home directory of the user |
| 183 | DUCKDB_API static string ExpandPath(const string &path, optional_ptr<FileOpener> opener); |
| 184 | //! Expands a given path, including e.g. expanding the home directory of the user |
| 185 | DUCKDB_API virtual string ExpandPath(const string &path); |
| 186 | //! Returns the system-available memory in bytes. Returns DConstants::INVALID_INDEX if the system function fails. |
| 187 | DUCKDB_API static idx_t GetAvailableMemory(); |
| 188 | //! Path separator for the current file system |
| 189 | DUCKDB_API static string PathSeparator(); |
| 190 | //! Checks if path is starts with separator (i.e., '/' on UNIX '\\' on Windows) |
| 191 | DUCKDB_API static bool IsPathAbsolute(const string &path); |
| 192 | //! Normalize an absolute path - the goal of normalizing is converting "\test.db" and "C:/test.db" into "C:\test.db" |
| 193 | //! so that the database system cache can correctly |
| 194 | DUCKDB_API static string NormalizeAbsolutePath(const string &path); |
| 195 | //! Join two paths together |
| 196 | DUCKDB_API static string JoinPath(const string &a, const string &path); |
| 197 | //! Convert separators in a path to the local separators (e.g. convert "/" into \\ on windows) |
| 198 | DUCKDB_API static string ConvertSeparators(const string &path); |
| 199 | //! Extract the base name of a file (e.g. if the input is lib/example.dll the base name is 'example') |
| 200 | DUCKDB_API static string (const string &path); |
| 201 | //! Extract the name of a file (e.g if the input is lib/example.dll the name is 'example.dll') |
| 202 | DUCKDB_API static string (const string &path); |
| 203 | |
| 204 | //! Returns the value of an environment variable - or the empty string if it is not set |
| 205 | DUCKDB_API static string GetEnvVariable(const string &name); |
| 206 | |
| 207 | //! Whether there is a glob in the string |
| 208 | DUCKDB_API static bool HasGlob(const string &str); |
| 209 | //! Runs a glob on the file system, returning a list of matching files |
| 210 | DUCKDB_API virtual vector<string> Glob(const string &path, FileOpener *opener = nullptr); |
| 211 | DUCKDB_API vector<string> GlobFiles(const string &path, ClientContext &context, |
| 212 | FileGlobOptions options = FileGlobOptions::DISALLOW_EMPTY); |
| 213 | |
| 214 | //! registers a sub-file system to handle certain file name prefixes, e.g. http:// etc. |
| 215 | DUCKDB_API virtual void RegisterSubSystem(unique_ptr<FileSystem> sub_fs); |
| 216 | DUCKDB_API virtual void RegisterSubSystem(FileCompressionType compression_type, unique_ptr<FileSystem> fs); |
| 217 | |
| 218 | //! Unregister a sub-filesystem by name |
| 219 | DUCKDB_API virtual void UnregisterSubSystem(const string &name); |
| 220 | |
| 221 | //! List registered sub-filesystems, including builtin ones |
| 222 | DUCKDB_API virtual vector<string> ListSubSystems(); |
| 223 | |
| 224 | //! Whether or not a sub-system can handle a specific file path |
| 225 | DUCKDB_API virtual bool CanHandleFile(const string &fpath); |
| 226 | |
| 227 | //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location |
| 228 | DUCKDB_API virtual void Seek(FileHandle &handle, idx_t location); |
| 229 | //! Reset a file to the beginning (equivalent to Seek(handle, 0) for simple files) |
| 230 | DUCKDB_API virtual void Reset(FileHandle &handle); |
| 231 | DUCKDB_API virtual idx_t SeekPosition(FileHandle &handle); |
| 232 | |
| 233 | //! Whether or not we can seek into the file |
| 234 | DUCKDB_API virtual bool CanSeek(); |
| 235 | //! Whether or not the FS handles plain files on disk. This is relevant for certain optimizations, as random reads |
| 236 | //! in a file on-disk are much cheaper than e.g. random reads in a file over the network |
| 237 | DUCKDB_API virtual bool OnDiskFile(FileHandle &handle); |
| 238 | |
| 239 | DUCKDB_API virtual unique_ptr<FileHandle> OpenCompressedFile(unique_ptr<FileHandle> handle, bool write); |
| 240 | |
| 241 | //! Create a LocalFileSystem. |
| 242 | DUCKDB_API static unique_ptr<FileSystem> CreateLocal(); |
| 243 | |
| 244 | //! Return the name of the filesytem. Used for forming diagnosis messages. |
| 245 | DUCKDB_API virtual std::string GetName() const = 0; |
| 246 | |
| 247 | //! Whether or not a file is remote or local, based only on file path |
| 248 | DUCKDB_API static bool IsRemoteFile(const string &path); |
| 249 | }; |
| 250 | |
| 251 | } // namespace duckdb |
| 252 | |