| 1 | #pragma once |
| 2 | |
| 3 | #include <Core/Names.h> |
| 4 | #include <Core/QueryProcessingStage.h> |
| 5 | #include <DataStreams/IBlockStream_fwd.h> |
| 6 | #include <Databases/IDatabase.h> |
| 7 | #include <Interpreters/CancellationCode.h> |
| 8 | #include <IO/CompressionMethod.h> |
| 9 | #include <Storages/IStorage_fwd.h> |
| 10 | #include <Storages/SelectQueryInfo.h> |
| 11 | #include <Storages/TableStructureLockHolder.h> |
| 12 | #include <Storages/CheckResults.h> |
| 13 | #include <Storages/ColumnsDescription.h> |
| 14 | #include <Storages/IndicesDescription.h> |
| 15 | #include <Storages/ConstraintsDescription.h> |
| 16 | #include <Storages/StorageInMemoryMetadata.h> |
| 17 | #include <Common/ActionLock.h> |
| 18 | #include <Common/Exception.h> |
| 19 | #include <Common/RWLock.h> |
| 20 | #include <Common/TypePromotion.h> |
| 21 | |
| 22 | #include <optional> |
| 23 | #include <shared_mutex> |
| 24 | |
| 25 | |
| 26 | namespace DB |
| 27 | { |
| 28 | |
| 29 | namespace ErrorCodes |
| 30 | { |
| 31 | extern const int NOT_IMPLEMENTED; |
| 32 | } |
| 33 | |
| 34 | class Context; |
| 35 | |
| 36 | using StorageActionBlockType = size_t; |
| 37 | |
| 38 | class ASTCreateQuery; |
| 39 | |
| 40 | struct Settings; |
| 41 | struct SettingChange; |
| 42 | using SettingsChanges = std::vector<SettingChange>; |
| 43 | |
| 44 | class AlterCommands; |
| 45 | class MutationCommands; |
| 46 | class PartitionCommands; |
| 47 | |
| 48 | class IProcessor; |
| 49 | using ProcessorPtr = std::shared_ptr<IProcessor>; |
| 50 | using Processors = std::vector<ProcessorPtr>; |
| 51 | |
| 52 | class Pipe; |
| 53 | using Pipes = std::vector<Pipe>; |
| 54 | |
| 55 | struct ColumnSize |
| 56 | { |
| 57 | size_t marks = 0; |
| 58 | size_t data_compressed = 0; |
| 59 | size_t data_uncompressed = 0; |
| 60 | |
| 61 | void add(const ColumnSize & other) |
| 62 | { |
| 63 | marks += other.marks; |
| 64 | data_compressed += other.data_compressed; |
| 65 | data_uncompressed += other.data_uncompressed; |
| 66 | } |
| 67 | }; |
| 68 | |
| 69 | /** Storage. Describes the table. Responsible for |
| 70 | * - storage of the table data; |
| 71 | * - the definition in which files (or not in files) the data is stored; |
| 72 | * - data lookups and appends; |
| 73 | * - data storage structure (compression, etc.) |
| 74 | * - concurrent access to data (locks, etc.) |
| 75 | */ |
| 76 | class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromotion<IStorage> |
| 77 | { |
| 78 | public: |
| 79 | IStorage() = default; |
| 80 | explicit IStorage(ColumnsDescription virtuals_); |
| 81 | |
| 82 | virtual ~IStorage() = default; |
| 83 | IStorage(const IStorage &) = delete; |
| 84 | IStorage & operator=(const IStorage &) = delete; |
| 85 | |
| 86 | /// The main name of the table type (for example, StorageMergeTree). |
| 87 | virtual std::string getName() const = 0; |
| 88 | |
| 89 | /// The name of the table. |
| 90 | virtual std::string getTableName() const = 0; |
| 91 | virtual std::string getDatabaseName() const { return {}; } |
| 92 | |
| 93 | /// Returns true if the storage receives data from a remote server or servers. |
| 94 | virtual bool isRemote() const { return false; } |
| 95 | |
| 96 | /// Returns true if the storage supports queries with the SAMPLE section. |
| 97 | virtual bool supportsSampling() const { return false; } |
| 98 | |
| 99 | /// Returns true if the storage supports queries with the FINAL section. |
| 100 | virtual bool supportsFinal() const { return false; } |
| 101 | |
| 102 | /// Returns true if the storage supports queries with the PREWHERE section. |
| 103 | virtual bool supportsPrewhere() const { return false; } |
| 104 | |
| 105 | /// Returns true if the storage replicates SELECT, INSERT and ALTER commands among replicas. |
| 106 | virtual bool supportsReplication() const { return false; } |
| 107 | |
| 108 | /// Returns true if the storage supports deduplication of inserted data blocks. |
| 109 | virtual bool supportsDeduplication() const { return false; } |
| 110 | |
| 111 | /// Returns true if the storage supports settings. |
| 112 | virtual bool supportsSettings() const { return false; } |
| 113 | |
| 114 | /// Returns true if the blocks shouldn't be pushed to associated views on insert. |
| 115 | virtual bool noPushingToViews() const { return false; } |
| 116 | |
| 117 | /// Optional size information of each physical column. |
| 118 | /// Currently it's only used by the MergeTree family for query optimizations. |
| 119 | using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>; |
| 120 | virtual ColumnSizeByName getColumnSizes() const { return {}; } |
| 121 | |
| 122 | public: /// thread-unsafe part. lockStructure must be acquired |
| 123 | virtual const ColumnsDescription & getColumns() const; /// returns combined set of columns |
| 124 | virtual void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. |
| 125 | const ColumnsDescription & getVirtuals() const; |
| 126 | const IndicesDescription & getIndices() const; |
| 127 | |
| 128 | const ConstraintsDescription & getConstraints() const; |
| 129 | void setConstraints(ConstraintsDescription constraints_); |
| 130 | |
| 131 | /// Returns storage metadata copy. Direct modification of |
| 132 | /// result structure doesn't affect storage. |
| 133 | virtual StorageInMemoryMetadata getInMemoryMetadata() const; |
| 134 | |
| 135 | /// NOTE: these methods should include virtual columns, |
| 136 | /// but should NOT include ALIAS columns (they are treated separately). |
| 137 | virtual NameAndTypePair getColumn(const String & column_name) const; |
| 138 | virtual bool hasColumn(const String & column_name) const; |
| 139 | |
| 140 | Block getSampleBlock() const; /// ordinary + materialized. |
| 141 | Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals. |
| 142 | Block getSampleBlockNonMaterialized() const; /// ordinary. |
| 143 | Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals. |
| 144 | |
| 145 | /// Verify that all the requested names are in the table and are set correctly: |
| 146 | /// list of names is not empty and the names do not repeat. |
| 147 | void check(const Names & column_names, bool include_virtuals = false) const; |
| 148 | |
| 149 | /// Check that all the requested names are in the table and have the correct types. |
| 150 | void check(const NamesAndTypesList & columns) const; |
| 151 | |
| 152 | /// Check that all names from the intersection of `names` and `columns` are in the table and have the same types. |
| 153 | void check(const NamesAndTypesList & columns, const Names & column_names) const; |
| 154 | |
| 155 | /// Check that the data block contains all the columns of the table with the correct types, |
| 156 | /// contains only the columns of the table, and all the columns are different. |
| 157 | /// If |need_all| is set, then checks that all the columns of the table are in the block. |
| 158 | void check(const Block & block, bool need_all = false) const; |
| 159 | |
| 160 | protected: /// still thread-unsafe part. |
| 161 | void setIndices(IndicesDescription indices_); |
| 162 | |
| 163 | /// Returns whether the column is virtual - by default all columns are real. |
| 164 | /// Initially reserved virtual column name may be shadowed by real column. |
| 165 | virtual bool isVirtualColumn(const String & column_name) const; |
| 166 | |
| 167 | |
| 168 | private: |
| 169 | ColumnsDescription columns; /// combined real and virtual columns |
| 170 | const ColumnsDescription virtuals = {}; |
| 171 | IndicesDescription indices; |
| 172 | ConstraintsDescription constraints; |
| 173 | |
| 174 | public: |
| 175 | /// Acquire this lock if you need the table structure to remain constant during the execution of |
| 176 | /// the query. If will_add_new_data is true, this means that the query will add new data to the table |
| 177 | /// (INSERT or a parts merge). |
| 178 | TableStructureReadLockHolder lockStructureForShare(bool will_add_new_data, const String & query_id); |
| 179 | |
| 180 | /// Acquire this lock at the start of ALTER to lock out other ALTERs and make sure that only you |
| 181 | /// can modify the table structure. It can later be upgraded to the exclusive lock. |
| 182 | TableStructureWriteLockHolder lockAlterIntention(const String & query_id); |
| 183 | |
| 184 | /// Upgrade alter intention lock and make sure that no new data is inserted into the table. |
| 185 | /// This is used by the ALTER MODIFY of the MergeTree storage to consistently determine |
| 186 | /// the set of parts that needs to be altered. |
| 187 | void lockNewDataStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id); |
| 188 | |
| 189 | /// Upgrade alter intention lock to the full exclusive structure lock. This is done by ALTER queries |
| 190 | /// to ensure that no other query uses the table structure and it can be safely changed. |
| 191 | void lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id); |
| 192 | |
| 193 | /// Acquire the full exclusive lock immediately. No other queries can run concurrently. |
| 194 | TableStructureWriteLockHolder lockExclusively(const String & query_id); |
| 195 | |
| 196 | /** Returns stage to which query is going to be processed in read() function. |
| 197 | * (Normally, the function only reads the columns from the list, but in other cases, |
| 198 | * for example, the request can be partially processed on a remote server.) |
| 199 | */ |
| 200 | virtual QueryProcessingStage::Enum getQueryProcessingStage(const Context &) const { return QueryProcessingStage::FetchColumns; } |
| 201 | |
| 202 | /** Watch live changes to the table. |
| 203 | * Accepts a list of columns to read, as well as a description of the query, |
| 204 | * from which information can be extracted about how to retrieve data |
| 205 | * (indexes, locks, etc.) |
| 206 | * Returns a stream with which you can read data sequentially |
| 207 | * or multiple streams for parallel data reading. |
| 208 | * The `processed_stage` info is also written to what stage the request was processed. |
| 209 | * (Normally, the function only reads the columns from the list, but in other cases, |
| 210 | * for example, the request can be partially processed on a remote server.) |
| 211 | * |
| 212 | * context contains settings for one query. |
| 213 | * Usually Storage does not care about these settings, since they are used in the interpreter. |
| 214 | * But, for example, for distributed query processing, the settings are passed to the remote server. |
| 215 | * |
| 216 | * num_streams - a recommendation, how many streams to return, |
| 217 | * if the storage can return a different number of streams. |
| 218 | * |
| 219 | * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP). |
| 220 | */ |
| 221 | virtual BlockInputStreams watch( |
| 222 | const Names & /*column_names*/, |
| 223 | const SelectQueryInfo & /*query_info*/, |
| 224 | const Context & /*context*/, |
| 225 | QueryProcessingStage::Enum & /*processed_stage*/, |
| 226 | size_t /*max_block_size*/, |
| 227 | unsigned /*num_streams*/) |
| 228 | { |
| 229 | throw Exception("Method watch is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
| 230 | } |
| 231 | |
| 232 | /** Read a set of columns from the table. |
| 233 | * Accepts a list of columns to read, as well as a description of the query, |
| 234 | * from which information can be extracted about how to retrieve data |
| 235 | * (indexes, locks, etc.) |
| 236 | * Returns a stream with which you can read data sequentially |
| 237 | * or multiple streams for parallel data reading. |
| 238 | * The `processed_stage` must be the result of getQueryProcessingStage() function. |
| 239 | * |
| 240 | * context contains settings for one query. |
| 241 | * Usually Storage does not care about these settings, since they are used in the interpreter. |
| 242 | * But, for example, for distributed query processing, the settings are passed to the remote server. |
| 243 | * |
| 244 | * num_streams - a recommendation, how many streams to return, |
| 245 | * if the storage can return a different number of streams. |
| 246 | * |
| 247 | * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP). |
| 248 | * |
| 249 | * Default implementation calls `readWithProcessors` and wraps into TreeExecutor. |
| 250 | */ |
| 251 | virtual BlockInputStreams read( |
| 252 | const Names & /*column_names*/, |
| 253 | const SelectQueryInfo & /*query_info*/, |
| 254 | const Context & /*context*/, |
| 255 | QueryProcessingStage::Enum /*processed_stage*/, |
| 256 | size_t /*max_block_size*/, |
| 257 | unsigned /*num_streams*/); |
| 258 | |
| 259 | /** The same as read, but returns processors. |
| 260 | */ |
| 261 | virtual Pipes readWithProcessors( |
| 262 | const Names & /*column_names*/, |
| 263 | const SelectQueryInfo & /*query_info*/, |
| 264 | const Context & /*context*/, |
| 265 | QueryProcessingStage::Enum /*processed_stage*/, |
| 266 | size_t /*max_block_size*/, |
| 267 | unsigned /*num_streams*/) |
| 268 | { |
| 269 | throw Exception("Method read is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
| 270 | } |
| 271 | |
| 272 | virtual bool supportProcessorsPipeline() const { return false; } |
| 273 | |
| 274 | /** Writes the data to a table. |
| 275 | * Receives a description of the query, which can contain information about the data write method. |
| 276 | * Returns an object by which you can write data sequentially. |
| 277 | * |
| 278 | * It is guaranteed that the table structure will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP). |
| 279 | */ |
| 280 | virtual BlockOutputStreamPtr write( |
| 281 | const ASTPtr & /*query*/, |
| 282 | const Context & /*context*/) |
| 283 | { |
| 284 | throw Exception("Method write is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
| 285 | } |
| 286 | |
| 287 | /** Delete the table data. Called before deleting the directory with the data. |
| 288 | * The method can be called only after detaching table from Context (when no queries are performed with table). |
| 289 | * The table is not usable during and after call to this method. |
| 290 | * If you do not need any action other than deleting the directory with data, you can leave this method blank. |
| 291 | */ |
| 292 | virtual void drop(TableStructureWriteLockHolder &) {} |
| 293 | |
| 294 | /** Clear the table data and leave it empty. |
| 295 | * Must be called under lockForAlter. |
| 296 | */ |
| 297 | virtual void truncate(const ASTPtr & /*query*/, const Context & /* context */, TableStructureWriteLockHolder &) |
| 298 | { |
| 299 | throw Exception("Truncate is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
| 300 | } |
| 301 | |
| 302 | /** Rename the table. |
| 303 | * Renaming a name in a file with metadata, the name in the list of tables in the RAM, is done separately. |
| 304 | * In this function, you need to rename the directory with the data, if any. |
| 305 | * Called when the table structure is locked for write. |
| 306 | */ |
| 307 | virtual void rename(const String & /*new_path_to_table_data*/, const String & /*new_database_name*/, const String & /*new_table_name*/, |
| 308 | TableStructureWriteLockHolder &) |
| 309 | { |
| 310 | throw Exception("Method rename is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
| 311 | } |
| 312 | |
| 313 | /** ALTER tables in the form of column changes that do not affect the change to Storage or its parameters. |
| 314 | * This method must fully execute the ALTER query, taking care of the locks itself. |
| 315 | * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata. |
| 316 | */ |
| 317 | virtual void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder); |
| 318 | |
| 319 | /** Checks that alter commands can be applied to storage. For example, columns can be modified, |
| 320 | * or primary key can be changes, etc. |
| 321 | */ |
| 322 | virtual void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings); |
| 323 | |
| 324 | /** ALTER tables with regard to its partitions. |
| 325 | * Should handle locks for each command on its own. |
| 326 | */ |
| 327 | virtual void alterPartition(const ASTPtr & /* query */, const PartitionCommands & /* commands */, const Context & /* context */) |
| 328 | { |
| 329 | throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
| 330 | } |
| 331 | |
| 332 | /** Perform any background work. For example, combining parts in a MergeTree type table. |
| 333 | * Returns whether any work has been done. |
| 334 | */ |
| 335 | virtual bool optimize(const ASTPtr & /*query*/, const ASTPtr & /*partition*/, bool /*final*/, bool /*deduplicate*/, const Context & /*context*/) |
| 336 | { |
| 337 | throw Exception("Method optimize is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
| 338 | } |
| 339 | |
| 340 | /// Mutate the table contents |
| 341 | virtual void mutate(const MutationCommands &, const Context &) |
| 342 | { |
| 343 | throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
| 344 | } |
| 345 | |
| 346 | /// Cancel a mutation. |
| 347 | virtual CancellationCode killMutation(const String & /*mutation_id*/) |
| 348 | { |
| 349 | throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
| 350 | } |
| 351 | |
| 352 | /** If the table have to do some complicated work on startup, |
| 353 | * that must be postponed after creation of table object |
| 354 | * (like launching some background threads), |
| 355 | * do it in this method. |
| 356 | * You should call this method after creation of object. |
| 357 | * By default, does nothing. |
| 358 | * Cannot be called simultaneously by multiple threads. |
| 359 | */ |
| 360 | virtual void startup() {} |
| 361 | |
| 362 | /** If the table have to do some complicated work when destroying an object - do it in advance. |
| 363 | * For example, if the table contains any threads for background work - ask them to complete and wait for completion. |
| 364 | * By default, does nothing. |
| 365 | * Can be called simultaneously from different threads, even after a call to drop(). |
| 366 | */ |
| 367 | virtual void shutdown() {} |
| 368 | |
| 369 | /// Asks table to stop executing some action identified by action_type |
| 370 | /// If table does not support such type of lock, and empty lock is returned |
| 371 | virtual ActionLock getActionLock(StorageActionBlockType /* action_type */) |
| 372 | { |
| 373 | return {}; |
| 374 | } |
| 375 | |
| 376 | std::atomic<bool> is_dropped{false}; |
| 377 | |
| 378 | /// Does table support index for IN sections |
| 379 | virtual bool supportsIndexForIn() const { return false; } |
| 380 | |
| 381 | /// Provides a hint that the storage engine may evaluate the IN-condition by using an index. |
| 382 | virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, const Context & /* query_context */) const { return false; } |
| 383 | |
| 384 | /// Checks validity of the data |
| 385 | virtual CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) { throw Exception("Check query is not supported for " + getName() + " storage" , ErrorCodes::NOT_IMPLEMENTED); } |
| 386 | |
| 387 | /// Checks that table could be dropped right now |
| 388 | /// Otherwise - throws an exception with detailed information. |
| 389 | /// We do not use mutex because it is not very important that the size could change during the operation. |
| 390 | virtual void checkTableCanBeDropped() const {} |
| 391 | |
| 392 | /// Checks that Partition could be dropped right now |
| 393 | /// Otherwise - throws an exception with detailed information. |
| 394 | /// We do not use mutex because it is not very important that the size could change during the operation. |
| 395 | virtual void checkPartitionCanBeDropped(const ASTPtr & /*partition*/) {} |
| 396 | |
| 397 | /** Notify engine about updated dependencies for this storage. */ |
| 398 | virtual void updateDependencies() {} |
| 399 | |
| 400 | /// Returns data paths if storage supports it, empty vector otherwise. |
| 401 | virtual Strings getDataPaths() const { return {}; } |
| 402 | |
| 403 | /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. |
| 404 | virtual ASTPtr getPartitionKeyAST() const { return nullptr; } |
| 405 | |
| 406 | /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. |
| 407 | virtual ASTPtr getSortingKeyAST() const { return nullptr; } |
| 408 | |
| 409 | /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. |
| 410 | virtual ASTPtr getPrimaryKeyAST() const { return nullptr; } |
| 411 | |
| 412 | /// Returns sampling expression AST for storage or nullptr if there is none. |
| 413 | virtual ASTPtr getSamplingKeyAST() const { return nullptr; } |
| 414 | |
| 415 | /// Returns additional columns that need to be read to calculate partition key. |
| 416 | virtual Names getColumnsRequiredForPartitionKey() const { return {}; } |
| 417 | |
| 418 | /// Returns additional columns that need to be read to calculate sorting key. |
| 419 | virtual Names getColumnsRequiredForSortingKey() const { return {}; } |
| 420 | |
| 421 | /// Returns additional columns that need to be read to calculate primary key. |
| 422 | virtual Names getColumnsRequiredForPrimaryKey() const { return {}; } |
| 423 | |
| 424 | /// Returns additional columns that need to be read to calculate sampling key. |
| 425 | virtual Names getColumnsRequiredForSampling() const { return {}; } |
| 426 | |
| 427 | /// Returns additional columns that need to be read for FINAL to work. |
| 428 | virtual Names getColumnsRequiredForFinal() const { return {}; } |
| 429 | |
| 430 | /// Returns names of primary key + secondary sorting columns |
| 431 | virtual Names getSortingKeyColumns() const { return {}; } |
| 432 | |
| 433 | /// Returns storage policy if storage supports it |
| 434 | virtual StoragePolicyPtr getStoragePolicy() const { return {}; } |
| 435 | |
| 436 | /** If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. |
| 437 | */ |
| 438 | virtual std::optional<UInt64> totalRows() const |
| 439 | { |
| 440 | return {}; |
| 441 | } |
| 442 | |
| 443 | static DB::CompressionMethod chooseCompressionMethod(const String & uri, const String & compression_method); |
| 444 | |
| 445 | private: |
| 446 | /// You always need to take the next three locks in this order. |
| 447 | |
| 448 | /// If you hold this lock exclusively, you can be sure that no other structure modifying queries |
| 449 | /// (e.g. ALTER, DROP) are concurrently executing. But queries that only read table structure |
| 450 | /// (e.g. SELECT, INSERT) can continue to execute. |
| 451 | mutable RWLock alter_intention_lock = RWLockImpl::create(); |
| 452 | |
| 453 | /// It is taken for share for the entire INSERT query and the entire merge of the parts (for MergeTree). |
| 454 | /// ALTER COLUMN queries acquire an exclusive lock to ensure that no new parts with the old structure |
| 455 | /// are added to the table and thus the set of parts to modify doesn't change. |
| 456 | mutable RWLock new_data_structure_lock = RWLockImpl::create(); |
| 457 | |
| 458 | /// Lock for the table column structure (names, types, etc.) and data path. |
| 459 | /// It is taken in exclusive mode by queries that modify them (e.g. RENAME, ALTER and DROP) |
| 460 | /// and in share mode by other queries. |
| 461 | mutable RWLock structure_lock = RWLockImpl::create(); |
| 462 | }; |
| 463 | |
| 464 | } |
| 465 | |