1 | #pragma once |
2 | |
3 | #include <Core/Names.h> |
4 | #include <Core/QueryProcessingStage.h> |
5 | #include <DataStreams/IBlockStream_fwd.h> |
6 | #include <Databases/IDatabase.h> |
7 | #include <Interpreters/CancellationCode.h> |
8 | #include <IO/CompressionMethod.h> |
9 | #include <Storages/IStorage_fwd.h> |
10 | #include <Storages/SelectQueryInfo.h> |
11 | #include <Storages/TableStructureLockHolder.h> |
12 | #include <Storages/CheckResults.h> |
13 | #include <Storages/ColumnsDescription.h> |
14 | #include <Storages/IndicesDescription.h> |
15 | #include <Storages/ConstraintsDescription.h> |
16 | #include <Storages/StorageInMemoryMetadata.h> |
17 | #include <Common/ActionLock.h> |
18 | #include <Common/Exception.h> |
19 | #include <Common/RWLock.h> |
20 | #include <Common/TypePromotion.h> |
21 | |
22 | #include <optional> |
23 | #include <shared_mutex> |
24 | |
25 | |
26 | namespace DB |
27 | { |
28 | |
29 | namespace ErrorCodes |
30 | { |
31 | extern const int NOT_IMPLEMENTED; |
32 | } |
33 | |
34 | class Context; |
35 | |
36 | using StorageActionBlockType = size_t; |
37 | |
38 | class ASTCreateQuery; |
39 | |
40 | struct Settings; |
41 | struct SettingChange; |
42 | using SettingsChanges = std::vector<SettingChange>; |
43 | |
44 | class AlterCommands; |
45 | class MutationCommands; |
46 | class PartitionCommands; |
47 | |
48 | class IProcessor; |
49 | using ProcessorPtr = std::shared_ptr<IProcessor>; |
50 | using Processors = std::vector<ProcessorPtr>; |
51 | |
52 | class Pipe; |
53 | using Pipes = std::vector<Pipe>; |
54 | |
55 | struct ColumnSize |
56 | { |
57 | size_t marks = 0; |
58 | size_t data_compressed = 0; |
59 | size_t data_uncompressed = 0; |
60 | |
61 | void add(const ColumnSize & other) |
62 | { |
63 | marks += other.marks; |
64 | data_compressed += other.data_compressed; |
65 | data_uncompressed += other.data_uncompressed; |
66 | } |
67 | }; |
68 | |
69 | /** Storage. Describes the table. Responsible for |
70 | * - storage of the table data; |
71 | * - the definition in which files (or not in files) the data is stored; |
72 | * - data lookups and appends; |
73 | * - data storage structure (compression, etc.) |
74 | * - concurrent access to data (locks, etc.) |
75 | */ |
76 | class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromotion<IStorage> |
77 | { |
78 | public: |
79 | IStorage() = default; |
80 | explicit IStorage(ColumnsDescription virtuals_); |
81 | |
82 | virtual ~IStorage() = default; |
83 | IStorage(const IStorage &) = delete; |
84 | IStorage & operator=(const IStorage &) = delete; |
85 | |
86 | /// The main name of the table type (for example, StorageMergeTree). |
87 | virtual std::string getName() const = 0; |
88 | |
89 | /// The name of the table. |
90 | virtual std::string getTableName() const = 0; |
91 | virtual std::string getDatabaseName() const { return {}; } |
92 | |
93 | /// Returns true if the storage receives data from a remote server or servers. |
94 | virtual bool isRemote() const { return false; } |
95 | |
96 | /// Returns true if the storage supports queries with the SAMPLE section. |
97 | virtual bool supportsSampling() const { return false; } |
98 | |
99 | /// Returns true if the storage supports queries with the FINAL section. |
100 | virtual bool supportsFinal() const { return false; } |
101 | |
102 | /// Returns true if the storage supports queries with the PREWHERE section. |
103 | virtual bool supportsPrewhere() const { return false; } |
104 | |
105 | /// Returns true if the storage replicates SELECT, INSERT and ALTER commands among replicas. |
106 | virtual bool supportsReplication() const { return false; } |
107 | |
108 | /// Returns true if the storage supports deduplication of inserted data blocks. |
109 | virtual bool supportsDeduplication() const { return false; } |
110 | |
111 | /// Returns true if the storage supports settings. |
112 | virtual bool supportsSettings() const { return false; } |
113 | |
114 | /// Returns true if the blocks shouldn't be pushed to associated views on insert. |
115 | virtual bool noPushingToViews() const { return false; } |
116 | |
117 | /// Optional size information of each physical column. |
118 | /// Currently it's only used by the MergeTree family for query optimizations. |
119 | using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>; |
120 | virtual ColumnSizeByName getColumnSizes() const { return {}; } |
121 | |
122 | public: /// thread-unsafe part. lockStructure must be acquired |
123 | virtual const ColumnsDescription & getColumns() const; /// returns combined set of columns |
124 | virtual void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. |
125 | const ColumnsDescription & getVirtuals() const; |
126 | const IndicesDescription & getIndices() const; |
127 | |
128 | const ConstraintsDescription & getConstraints() const; |
129 | void setConstraints(ConstraintsDescription constraints_); |
130 | |
131 | /// Returns storage metadata copy. Direct modification of |
132 | /// result structure doesn't affect storage. |
133 | virtual StorageInMemoryMetadata getInMemoryMetadata() const; |
134 | |
135 | /// NOTE: these methods should include virtual columns, |
136 | /// but should NOT include ALIAS columns (they are treated separately). |
137 | virtual NameAndTypePair getColumn(const String & column_name) const; |
138 | virtual bool hasColumn(const String & column_name) const; |
139 | |
140 | Block getSampleBlock() const; /// ordinary + materialized. |
141 | Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals. |
142 | Block getSampleBlockNonMaterialized() const; /// ordinary. |
143 | Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals. |
144 | |
145 | /// Verify that all the requested names are in the table and are set correctly: |
146 | /// list of names is not empty and the names do not repeat. |
147 | void check(const Names & column_names, bool include_virtuals = false) const; |
148 | |
149 | /// Check that all the requested names are in the table and have the correct types. |
150 | void check(const NamesAndTypesList & columns) const; |
151 | |
152 | /// Check that all names from the intersection of `names` and `columns` are in the table and have the same types. |
153 | void check(const NamesAndTypesList & columns, const Names & column_names) const; |
154 | |
155 | /// Check that the data block contains all the columns of the table with the correct types, |
156 | /// contains only the columns of the table, and all the columns are different. |
157 | /// If |need_all| is set, then checks that all the columns of the table are in the block. |
158 | void check(const Block & block, bool need_all = false) const; |
159 | |
160 | protected: /// still thread-unsafe part. |
161 | void setIndices(IndicesDescription indices_); |
162 | |
163 | /// Returns whether the column is virtual - by default all columns are real. |
164 | /// Initially reserved virtual column name may be shadowed by real column. |
165 | virtual bool isVirtualColumn(const String & column_name) const; |
166 | |
167 | |
168 | private: |
169 | ColumnsDescription columns; /// combined real and virtual columns |
170 | const ColumnsDescription virtuals = {}; |
171 | IndicesDescription indices; |
172 | ConstraintsDescription constraints; |
173 | |
174 | public: |
175 | /// Acquire this lock if you need the table structure to remain constant during the execution of |
176 | /// the query. If will_add_new_data is true, this means that the query will add new data to the table |
177 | /// (INSERT or a parts merge). |
178 | TableStructureReadLockHolder lockStructureForShare(bool will_add_new_data, const String & query_id); |
179 | |
180 | /// Acquire this lock at the start of ALTER to lock out other ALTERs and make sure that only you |
181 | /// can modify the table structure. It can later be upgraded to the exclusive lock. |
182 | TableStructureWriteLockHolder lockAlterIntention(const String & query_id); |
183 | |
184 | /// Upgrade alter intention lock and make sure that no new data is inserted into the table. |
185 | /// This is used by the ALTER MODIFY of the MergeTree storage to consistently determine |
186 | /// the set of parts that needs to be altered. |
187 | void lockNewDataStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id); |
188 | |
189 | /// Upgrade alter intention lock to the full exclusive structure lock. This is done by ALTER queries |
190 | /// to ensure that no other query uses the table structure and it can be safely changed. |
191 | void lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id); |
192 | |
193 | /// Acquire the full exclusive lock immediately. No other queries can run concurrently. |
194 | TableStructureWriteLockHolder lockExclusively(const String & query_id); |
195 | |
196 | /** Returns stage to which query is going to be processed in read() function. |
197 | * (Normally, the function only reads the columns from the list, but in other cases, |
198 | * for example, the request can be partially processed on a remote server.) |
199 | */ |
200 | virtual QueryProcessingStage::Enum getQueryProcessingStage(const Context &) const { return QueryProcessingStage::FetchColumns; } |
201 | |
202 | /** Watch live changes to the table. |
203 | * Accepts a list of columns to read, as well as a description of the query, |
204 | * from which information can be extracted about how to retrieve data |
205 | * (indexes, locks, etc.) |
206 | * Returns a stream with which you can read data sequentially |
207 | * or multiple streams for parallel data reading. |
208 | * The `processed_stage` info is also written to what stage the request was processed. |
209 | * (Normally, the function only reads the columns from the list, but in other cases, |
210 | * for example, the request can be partially processed on a remote server.) |
211 | * |
212 | * context contains settings for one query. |
213 | * Usually Storage does not care about these settings, since they are used in the interpreter. |
214 | * But, for example, for distributed query processing, the settings are passed to the remote server. |
215 | * |
216 | * num_streams - a recommendation, how many streams to return, |
217 | * if the storage can return a different number of streams. |
218 | * |
219 | * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP). |
220 | */ |
221 | virtual BlockInputStreams watch( |
222 | const Names & /*column_names*/, |
223 | const SelectQueryInfo & /*query_info*/, |
224 | const Context & /*context*/, |
225 | QueryProcessingStage::Enum & /*processed_stage*/, |
226 | size_t /*max_block_size*/, |
227 | unsigned /*num_streams*/) |
228 | { |
229 | throw Exception("Method watch is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
230 | } |
231 | |
232 | /** Read a set of columns from the table. |
233 | * Accepts a list of columns to read, as well as a description of the query, |
234 | * from which information can be extracted about how to retrieve data |
235 | * (indexes, locks, etc.) |
236 | * Returns a stream with which you can read data sequentially |
237 | * or multiple streams for parallel data reading. |
238 | * The `processed_stage` must be the result of getQueryProcessingStage() function. |
239 | * |
240 | * context contains settings for one query. |
241 | * Usually Storage does not care about these settings, since they are used in the interpreter. |
242 | * But, for example, for distributed query processing, the settings are passed to the remote server. |
243 | * |
244 | * num_streams - a recommendation, how many streams to return, |
245 | * if the storage can return a different number of streams. |
246 | * |
247 | * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP). |
248 | * |
249 | * Default implementation calls `readWithProcessors` and wraps into TreeExecutor. |
250 | */ |
251 | virtual BlockInputStreams read( |
252 | const Names & /*column_names*/, |
253 | const SelectQueryInfo & /*query_info*/, |
254 | const Context & /*context*/, |
255 | QueryProcessingStage::Enum /*processed_stage*/, |
256 | size_t /*max_block_size*/, |
257 | unsigned /*num_streams*/); |
258 | |
259 | /** The same as read, but returns processors. |
260 | */ |
261 | virtual Pipes readWithProcessors( |
262 | const Names & /*column_names*/, |
263 | const SelectQueryInfo & /*query_info*/, |
264 | const Context & /*context*/, |
265 | QueryProcessingStage::Enum /*processed_stage*/, |
266 | size_t /*max_block_size*/, |
267 | unsigned /*num_streams*/) |
268 | { |
269 | throw Exception("Method read is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
270 | } |
271 | |
272 | virtual bool supportProcessorsPipeline() const { return false; } |
273 | |
274 | /** Writes the data to a table. |
275 | * Receives a description of the query, which can contain information about the data write method. |
276 | * Returns an object by which you can write data sequentially. |
277 | * |
278 | * It is guaranteed that the table structure will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP). |
279 | */ |
280 | virtual BlockOutputStreamPtr write( |
281 | const ASTPtr & /*query*/, |
282 | const Context & /*context*/) |
283 | { |
284 | throw Exception("Method write is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
285 | } |
286 | |
287 | /** Delete the table data. Called before deleting the directory with the data. |
288 | * The method can be called only after detaching table from Context (when no queries are performed with table). |
289 | * The table is not usable during and after call to this method. |
290 | * If you do not need any action other than deleting the directory with data, you can leave this method blank. |
291 | */ |
292 | virtual void drop(TableStructureWriteLockHolder &) {} |
293 | |
294 | /** Clear the table data and leave it empty. |
295 | * Must be called under lockForAlter. |
296 | */ |
297 | virtual void truncate(const ASTPtr & /*query*/, const Context & /* context */, TableStructureWriteLockHolder &) |
298 | { |
299 | throw Exception("Truncate is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
300 | } |
301 | |
302 | /** Rename the table. |
303 | * Renaming a name in a file with metadata, the name in the list of tables in the RAM, is done separately. |
304 | * In this function, you need to rename the directory with the data, if any. |
305 | * Called when the table structure is locked for write. |
306 | */ |
307 | virtual void rename(const String & /*new_path_to_table_data*/, const String & /*new_database_name*/, const String & /*new_table_name*/, |
308 | TableStructureWriteLockHolder &) |
309 | { |
310 | throw Exception("Method rename is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
311 | } |
312 | |
313 | /** ALTER tables in the form of column changes that do not affect the change to Storage or its parameters. |
314 | * This method must fully execute the ALTER query, taking care of the locks itself. |
315 | * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata. |
316 | */ |
317 | virtual void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder); |
318 | |
319 | /** Checks that alter commands can be applied to storage. For example, columns can be modified, |
320 | * or primary key can be changes, etc. |
321 | */ |
322 | virtual void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings); |
323 | |
324 | /** ALTER tables with regard to its partitions. |
325 | * Should handle locks for each command on its own. |
326 | */ |
327 | virtual void alterPartition(const ASTPtr & /* query */, const PartitionCommands & /* commands */, const Context & /* context */) |
328 | { |
329 | throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
330 | } |
331 | |
332 | /** Perform any background work. For example, combining parts in a MergeTree type table. |
333 | * Returns whether any work has been done. |
334 | */ |
335 | virtual bool optimize(const ASTPtr & /*query*/, const ASTPtr & /*partition*/, bool /*final*/, bool /*deduplicate*/, const Context & /*context*/) |
336 | { |
337 | throw Exception("Method optimize is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
338 | } |
339 | |
340 | /// Mutate the table contents |
341 | virtual void mutate(const MutationCommands &, const Context &) |
342 | { |
343 | throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
344 | } |
345 | |
346 | /// Cancel a mutation. |
347 | virtual CancellationCode killMutation(const String & /*mutation_id*/) |
348 | { |
349 | throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); |
350 | } |
351 | |
352 | /** If the table have to do some complicated work on startup, |
353 | * that must be postponed after creation of table object |
354 | * (like launching some background threads), |
355 | * do it in this method. |
356 | * You should call this method after creation of object. |
357 | * By default, does nothing. |
358 | * Cannot be called simultaneously by multiple threads. |
359 | */ |
360 | virtual void startup() {} |
361 | |
362 | /** If the table have to do some complicated work when destroying an object - do it in advance. |
363 | * For example, if the table contains any threads for background work - ask them to complete and wait for completion. |
364 | * By default, does nothing. |
365 | * Can be called simultaneously from different threads, even after a call to drop(). |
366 | */ |
367 | virtual void shutdown() {} |
368 | |
369 | /// Asks table to stop executing some action identified by action_type |
370 | /// If table does not support such type of lock, and empty lock is returned |
371 | virtual ActionLock getActionLock(StorageActionBlockType /* action_type */) |
372 | { |
373 | return {}; |
374 | } |
375 | |
376 | std::atomic<bool> is_dropped{false}; |
377 | |
378 | /// Does table support index for IN sections |
379 | virtual bool supportsIndexForIn() const { return false; } |
380 | |
381 | /// Provides a hint that the storage engine may evaluate the IN-condition by using an index. |
382 | virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, const Context & /* query_context */) const { return false; } |
383 | |
384 | /// Checks validity of the data |
385 | virtual CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) { throw Exception("Check query is not supported for " + getName() + " storage" , ErrorCodes::NOT_IMPLEMENTED); } |
386 | |
387 | /// Checks that table could be dropped right now |
388 | /// Otherwise - throws an exception with detailed information. |
389 | /// We do not use mutex because it is not very important that the size could change during the operation. |
390 | virtual void checkTableCanBeDropped() const {} |
391 | |
392 | /// Checks that Partition could be dropped right now |
393 | /// Otherwise - throws an exception with detailed information. |
394 | /// We do not use mutex because it is not very important that the size could change during the operation. |
395 | virtual void checkPartitionCanBeDropped(const ASTPtr & /*partition*/) {} |
396 | |
397 | /** Notify engine about updated dependencies for this storage. */ |
398 | virtual void updateDependencies() {} |
399 | |
400 | /// Returns data paths if storage supports it, empty vector otherwise. |
401 | virtual Strings getDataPaths() const { return {}; } |
402 | |
403 | /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. |
404 | virtual ASTPtr getPartitionKeyAST() const { return nullptr; } |
405 | |
406 | /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. |
407 | virtual ASTPtr getSortingKeyAST() const { return nullptr; } |
408 | |
409 | /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. |
410 | virtual ASTPtr getPrimaryKeyAST() const { return nullptr; } |
411 | |
412 | /// Returns sampling expression AST for storage or nullptr if there is none. |
413 | virtual ASTPtr getSamplingKeyAST() const { return nullptr; } |
414 | |
415 | /// Returns additional columns that need to be read to calculate partition key. |
416 | virtual Names getColumnsRequiredForPartitionKey() const { return {}; } |
417 | |
418 | /// Returns additional columns that need to be read to calculate sorting key. |
419 | virtual Names getColumnsRequiredForSortingKey() const { return {}; } |
420 | |
421 | /// Returns additional columns that need to be read to calculate primary key. |
422 | virtual Names getColumnsRequiredForPrimaryKey() const { return {}; } |
423 | |
424 | /// Returns additional columns that need to be read to calculate sampling key. |
425 | virtual Names getColumnsRequiredForSampling() const { return {}; } |
426 | |
427 | /// Returns additional columns that need to be read for FINAL to work. |
428 | virtual Names getColumnsRequiredForFinal() const { return {}; } |
429 | |
430 | /// Returns names of primary key + secondary sorting columns |
431 | virtual Names getSortingKeyColumns() const { return {}; } |
432 | |
433 | /// Returns storage policy if storage supports it |
434 | virtual StoragePolicyPtr getStoragePolicy() const { return {}; } |
435 | |
436 | /** If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. |
437 | */ |
438 | virtual std::optional<UInt64> totalRows() const |
439 | { |
440 | return {}; |
441 | } |
442 | |
443 | static DB::CompressionMethod chooseCompressionMethod(const String & uri, const String & compression_method); |
444 | |
445 | private: |
446 | /// You always need to take the next three locks in this order. |
447 | |
448 | /// If you hold this lock exclusively, you can be sure that no other structure modifying queries |
449 | /// (e.g. ALTER, DROP) are concurrently executing. But queries that only read table structure |
450 | /// (e.g. SELECT, INSERT) can continue to execute. |
451 | mutable RWLock alter_intention_lock = RWLockImpl::create(); |
452 | |
453 | /// It is taken for share for the entire INSERT query and the entire merge of the parts (for MergeTree). |
454 | /// ALTER COLUMN queries acquire an exclusive lock to ensure that no new parts with the old structure |
455 | /// are added to the table and thus the set of parts to modify doesn't change. |
456 | mutable RWLock new_data_structure_lock = RWLockImpl::create(); |
457 | |
458 | /// Lock for the table column structure (names, types, etc.) and data path. |
459 | /// It is taken in exclusive mode by queries that modify them (e.g. RENAME, ALTER and DROP) |
460 | /// and in share mode by other queries. |
461 | mutable RWLock structure_lock = RWLockImpl::create(); |
462 | }; |
463 | |
464 | } |
465 | |