1#pragma once
2
3#include <Core/Names.h>
4#include <Core/QueryProcessingStage.h>
5#include <DataStreams/IBlockStream_fwd.h>
6#include <Databases/IDatabase.h>
7#include <Interpreters/CancellationCode.h>
8#include <IO/CompressionMethod.h>
9#include <Storages/IStorage_fwd.h>
10#include <Storages/SelectQueryInfo.h>
11#include <Storages/TableStructureLockHolder.h>
12#include <Storages/CheckResults.h>
13#include <Storages/ColumnsDescription.h>
14#include <Storages/IndicesDescription.h>
15#include <Storages/ConstraintsDescription.h>
16#include <Storages/StorageInMemoryMetadata.h>
17#include <Common/ActionLock.h>
18#include <Common/Exception.h>
19#include <Common/RWLock.h>
20#include <Common/TypePromotion.h>
21
22#include <optional>
23#include <shared_mutex>
24
25
26namespace DB
27{
28
29namespace ErrorCodes
30{
31 extern const int NOT_IMPLEMENTED;
32}
33
34class Context;
35
36using StorageActionBlockType = size_t;
37
38class ASTCreateQuery;
39
40struct Settings;
41struct SettingChange;
42using SettingsChanges = std::vector<SettingChange>;
43
44class AlterCommands;
45class MutationCommands;
46class PartitionCommands;
47
48class IProcessor;
49using ProcessorPtr = std::shared_ptr<IProcessor>;
50using Processors = std::vector<ProcessorPtr>;
51
52class Pipe;
53using Pipes = std::vector<Pipe>;
54
55struct ColumnSize
56{
57 size_t marks = 0;
58 size_t data_compressed = 0;
59 size_t data_uncompressed = 0;
60
61 void add(const ColumnSize & other)
62 {
63 marks += other.marks;
64 data_compressed += other.data_compressed;
65 data_uncompressed += other.data_uncompressed;
66 }
67};
68
69/** Storage. Describes the table. Responsible for
70 * - storage of the table data;
71 * - the definition in which files (or not in files) the data is stored;
72 * - data lookups and appends;
73 * - data storage structure (compression, etc.)
74 * - concurrent access to data (locks, etc.)
75 */
76class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromotion<IStorage>
77{
78public:
79 IStorage() = default;
80 explicit IStorage(ColumnsDescription virtuals_);
81
82 virtual ~IStorage() = default;
83 IStorage(const IStorage &) = delete;
84 IStorage & operator=(const IStorage &) = delete;
85
86 /// The main name of the table type (for example, StorageMergeTree).
87 virtual std::string getName() const = 0;
88
89 /// The name of the table.
90 virtual std::string getTableName() const = 0;
91 virtual std::string getDatabaseName() const { return {}; }
92
93 /// Returns true if the storage receives data from a remote server or servers.
94 virtual bool isRemote() const { return false; }
95
96 /// Returns true if the storage supports queries with the SAMPLE section.
97 virtual bool supportsSampling() const { return false; }
98
99 /// Returns true if the storage supports queries with the FINAL section.
100 virtual bool supportsFinal() const { return false; }
101
102 /// Returns true if the storage supports queries with the PREWHERE section.
103 virtual bool supportsPrewhere() const { return false; }
104
105 /// Returns true if the storage replicates SELECT, INSERT and ALTER commands among replicas.
106 virtual bool supportsReplication() const { return false; }
107
108 /// Returns true if the storage supports deduplication of inserted data blocks.
109 virtual bool supportsDeduplication() const { return false; }
110
111 /// Returns true if the storage supports settings.
112 virtual bool supportsSettings() const { return false; }
113
114 /// Returns true if the blocks shouldn't be pushed to associated views on insert.
115 virtual bool noPushingToViews() const { return false; }
116
117 /// Optional size information of each physical column.
118 /// Currently it's only used by the MergeTree family for query optimizations.
119 using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>;
120 virtual ColumnSizeByName getColumnSizes() const { return {}; }
121
122public: /// thread-unsafe part. lockStructure must be acquired
123 virtual const ColumnsDescription & getColumns() const; /// returns combined set of columns
124 virtual void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones.
125 const ColumnsDescription & getVirtuals() const;
126 const IndicesDescription & getIndices() const;
127
128 const ConstraintsDescription & getConstraints() const;
129 void setConstraints(ConstraintsDescription constraints_);
130
131 /// Returns storage metadata copy. Direct modification of
132 /// result structure doesn't affect storage.
133 virtual StorageInMemoryMetadata getInMemoryMetadata() const;
134
135 /// NOTE: these methods should include virtual columns,
136 /// but should NOT include ALIAS columns (they are treated separately).
137 virtual NameAndTypePair getColumn(const String & column_name) const;
138 virtual bool hasColumn(const String & column_name) const;
139
140 Block getSampleBlock() const; /// ordinary + materialized.
141 Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals.
142 Block getSampleBlockNonMaterialized() const; /// ordinary.
143 Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals.
144
145 /// Verify that all the requested names are in the table and are set correctly:
146 /// list of names is not empty and the names do not repeat.
147 void check(const Names & column_names, bool include_virtuals = false) const;
148
149 /// Check that all the requested names are in the table and have the correct types.
150 void check(const NamesAndTypesList & columns) const;
151
152 /// Check that all names from the intersection of `names` and `columns` are in the table and have the same types.
153 void check(const NamesAndTypesList & columns, const Names & column_names) const;
154
155 /// Check that the data block contains all the columns of the table with the correct types,
156 /// contains only the columns of the table, and all the columns are different.
157 /// If |need_all| is set, then checks that all the columns of the table are in the block.
158 void check(const Block & block, bool need_all = false) const;
159
160protected: /// still thread-unsafe part.
161 void setIndices(IndicesDescription indices_);
162
163 /// Returns whether the column is virtual - by default all columns are real.
164 /// Initially reserved virtual column name may be shadowed by real column.
165 virtual bool isVirtualColumn(const String & column_name) const;
166
167
168private:
169 ColumnsDescription columns; /// combined real and virtual columns
170 const ColumnsDescription virtuals = {};
171 IndicesDescription indices;
172 ConstraintsDescription constraints;
173
174public:
175 /// Acquire this lock if you need the table structure to remain constant during the execution of
176 /// the query. If will_add_new_data is true, this means that the query will add new data to the table
177 /// (INSERT or a parts merge).
178 TableStructureReadLockHolder lockStructureForShare(bool will_add_new_data, const String & query_id);
179
180 /// Acquire this lock at the start of ALTER to lock out other ALTERs and make sure that only you
181 /// can modify the table structure. It can later be upgraded to the exclusive lock.
182 TableStructureWriteLockHolder lockAlterIntention(const String & query_id);
183
184 /// Upgrade alter intention lock and make sure that no new data is inserted into the table.
185 /// This is used by the ALTER MODIFY of the MergeTree storage to consistently determine
186 /// the set of parts that needs to be altered.
187 void lockNewDataStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id);
188
189 /// Upgrade alter intention lock to the full exclusive structure lock. This is done by ALTER queries
190 /// to ensure that no other query uses the table structure and it can be safely changed.
191 void lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id);
192
193 /// Acquire the full exclusive lock immediately. No other queries can run concurrently.
194 TableStructureWriteLockHolder lockExclusively(const String & query_id);
195
196 /** Returns stage to which query is going to be processed in read() function.
197 * (Normally, the function only reads the columns from the list, but in other cases,
198 * for example, the request can be partially processed on a remote server.)
199 */
200 virtual QueryProcessingStage::Enum getQueryProcessingStage(const Context &) const { return QueryProcessingStage::FetchColumns; }
201
202 /** Watch live changes to the table.
203 * Accepts a list of columns to read, as well as a description of the query,
204 * from which information can be extracted about how to retrieve data
205 * (indexes, locks, etc.)
206 * Returns a stream with which you can read data sequentially
207 * or multiple streams for parallel data reading.
208 * The `processed_stage` info is also written to what stage the request was processed.
209 * (Normally, the function only reads the columns from the list, but in other cases,
210 * for example, the request can be partially processed on a remote server.)
211 *
212 * context contains settings for one query.
213 * Usually Storage does not care about these settings, since they are used in the interpreter.
214 * But, for example, for distributed query processing, the settings are passed to the remote server.
215 *
216 * num_streams - a recommendation, how many streams to return,
217 * if the storage can return a different number of streams.
218 *
219 * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP).
220 */
221 virtual BlockInputStreams watch(
222 const Names & /*column_names*/,
223 const SelectQueryInfo & /*query_info*/,
224 const Context & /*context*/,
225 QueryProcessingStage::Enum & /*processed_stage*/,
226 size_t /*max_block_size*/,
227 unsigned /*num_streams*/)
228 {
229 throw Exception("Method watch is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
230 }
231
232 /** Read a set of columns from the table.
233 * Accepts a list of columns to read, as well as a description of the query,
234 * from which information can be extracted about how to retrieve data
235 * (indexes, locks, etc.)
236 * Returns a stream with which you can read data sequentially
237 * or multiple streams for parallel data reading.
238 * The `processed_stage` must be the result of getQueryProcessingStage() function.
239 *
240 * context contains settings for one query.
241 * Usually Storage does not care about these settings, since they are used in the interpreter.
242 * But, for example, for distributed query processing, the settings are passed to the remote server.
243 *
244 * num_streams - a recommendation, how many streams to return,
245 * if the storage can return a different number of streams.
246 *
247 * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP).
248 *
249 * Default implementation calls `readWithProcessors` and wraps into TreeExecutor.
250 */
251 virtual BlockInputStreams read(
252 const Names & /*column_names*/,
253 const SelectQueryInfo & /*query_info*/,
254 const Context & /*context*/,
255 QueryProcessingStage::Enum /*processed_stage*/,
256 size_t /*max_block_size*/,
257 unsigned /*num_streams*/);
258
259 /** The same as read, but returns processors.
260 */
261 virtual Pipes readWithProcessors(
262 const Names & /*column_names*/,
263 const SelectQueryInfo & /*query_info*/,
264 const Context & /*context*/,
265 QueryProcessingStage::Enum /*processed_stage*/,
266 size_t /*max_block_size*/,
267 unsigned /*num_streams*/)
268 {
269 throw Exception("Method read is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
270 }
271
272 virtual bool supportProcessorsPipeline() const { return false; }
273
274 /** Writes the data to a table.
275 * Receives a description of the query, which can contain information about the data write method.
276 * Returns an object by which you can write data sequentially.
277 *
278 * It is guaranteed that the table structure will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP).
279 */
280 virtual BlockOutputStreamPtr write(
281 const ASTPtr & /*query*/,
282 const Context & /*context*/)
283 {
284 throw Exception("Method write is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
285 }
286
287 /** Delete the table data. Called before deleting the directory with the data.
288 * The method can be called only after detaching table from Context (when no queries are performed with table).
289 * The table is not usable during and after call to this method.
290 * If you do not need any action other than deleting the directory with data, you can leave this method blank.
291 */
292 virtual void drop(TableStructureWriteLockHolder &) {}
293
294 /** Clear the table data and leave it empty.
295 * Must be called under lockForAlter.
296 */
297 virtual void truncate(const ASTPtr & /*query*/, const Context & /* context */, TableStructureWriteLockHolder &)
298 {
299 throw Exception("Truncate is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
300 }
301
302 /** Rename the table.
303 * Renaming a name in a file with metadata, the name in the list of tables in the RAM, is done separately.
304 * In this function, you need to rename the directory with the data, if any.
305 * Called when the table structure is locked for write.
306 */
307 virtual void rename(const String & /*new_path_to_table_data*/, const String & /*new_database_name*/, const String & /*new_table_name*/,
308 TableStructureWriteLockHolder &)
309 {
310 throw Exception("Method rename is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
311 }
312
313 /** ALTER tables in the form of column changes that do not affect the change to Storage or its parameters.
314 * This method must fully execute the ALTER query, taking care of the locks itself.
315 * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata.
316 */
317 virtual void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder);
318
319 /** Checks that alter commands can be applied to storage. For example, columns can be modified,
320 * or primary key can be changes, etc.
321 */
322 virtual void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings);
323
324 /** ALTER tables with regard to its partitions.
325 * Should handle locks for each command on its own.
326 */
327 virtual void alterPartition(const ASTPtr & /* query */, const PartitionCommands & /* commands */, const Context & /* context */)
328 {
329 throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
330 }
331
332 /** Perform any background work. For example, combining parts in a MergeTree type table.
333 * Returns whether any work has been done.
334 */
335 virtual bool optimize(const ASTPtr & /*query*/, const ASTPtr & /*partition*/, bool /*final*/, bool /*deduplicate*/, const Context & /*context*/)
336 {
337 throw Exception("Method optimize is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
338 }
339
340 /// Mutate the table contents
341 virtual void mutate(const MutationCommands &, const Context &)
342 {
343 throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
344 }
345
346 /// Cancel a mutation.
347 virtual CancellationCode killMutation(const String & /*mutation_id*/)
348 {
349 throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
350 }
351
352 /** If the table have to do some complicated work on startup,
353 * that must be postponed after creation of table object
354 * (like launching some background threads),
355 * do it in this method.
356 * You should call this method after creation of object.
357 * By default, does nothing.
358 * Cannot be called simultaneously by multiple threads.
359 */
360 virtual void startup() {}
361
362 /** If the table have to do some complicated work when destroying an object - do it in advance.
363 * For example, if the table contains any threads for background work - ask them to complete and wait for completion.
364 * By default, does nothing.
365 * Can be called simultaneously from different threads, even after a call to drop().
366 */
367 virtual void shutdown() {}
368
369 /// Asks table to stop executing some action identified by action_type
370 /// If table does not support such type of lock, and empty lock is returned
371 virtual ActionLock getActionLock(StorageActionBlockType /* action_type */)
372 {
373 return {};
374 }
375
376 std::atomic<bool> is_dropped{false};
377
378 /// Does table support index for IN sections
379 virtual bool supportsIndexForIn() const { return false; }
380
381 /// Provides a hint that the storage engine may evaluate the IN-condition by using an index.
382 virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, const Context & /* query_context */) const { return false; }
383
384 /// Checks validity of the data
385 virtual CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) { throw Exception("Check query is not supported for " + getName() + " storage", ErrorCodes::NOT_IMPLEMENTED); }
386
387 /// Checks that table could be dropped right now
388 /// Otherwise - throws an exception with detailed information.
389 /// We do not use mutex because it is not very important that the size could change during the operation.
390 virtual void checkTableCanBeDropped() const {}
391
392 /// Checks that Partition could be dropped right now
393 /// Otherwise - throws an exception with detailed information.
394 /// We do not use mutex because it is not very important that the size could change during the operation.
395 virtual void checkPartitionCanBeDropped(const ASTPtr & /*partition*/) {}
396
397 /** Notify engine about updated dependencies for this storage. */
398 virtual void updateDependencies() {}
399
400 /// Returns data paths if storage supports it, empty vector otherwise.
401 virtual Strings getDataPaths() const { return {}; }
402
403 /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none.
404 virtual ASTPtr getPartitionKeyAST() const { return nullptr; }
405
406 /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none.
407 virtual ASTPtr getSortingKeyAST() const { return nullptr; }
408
409 /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none.
410 virtual ASTPtr getPrimaryKeyAST() const { return nullptr; }
411
412 /// Returns sampling expression AST for storage or nullptr if there is none.
413 virtual ASTPtr getSamplingKeyAST() const { return nullptr; }
414
415 /// Returns additional columns that need to be read to calculate partition key.
416 virtual Names getColumnsRequiredForPartitionKey() const { return {}; }
417
418 /// Returns additional columns that need to be read to calculate sorting key.
419 virtual Names getColumnsRequiredForSortingKey() const { return {}; }
420
421 /// Returns additional columns that need to be read to calculate primary key.
422 virtual Names getColumnsRequiredForPrimaryKey() const { return {}; }
423
424 /// Returns additional columns that need to be read to calculate sampling key.
425 virtual Names getColumnsRequiredForSampling() const { return {}; }
426
427 /// Returns additional columns that need to be read for FINAL to work.
428 virtual Names getColumnsRequiredForFinal() const { return {}; }
429
430 /// Returns names of primary key + secondary sorting columns
431 virtual Names getSortingKeyColumns() const { return {}; }
432
433 /// Returns storage policy if storage supports it
434 virtual StoragePolicyPtr getStoragePolicy() const { return {}; }
435
436 /** If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it.
437 */
438 virtual std::optional<UInt64> totalRows() const
439 {
440 return {};
441 }
442
443 static DB::CompressionMethod chooseCompressionMethod(const String & uri, const String & compression_method);
444
445private:
446 /// You always need to take the next three locks in this order.
447
448 /// If you hold this lock exclusively, you can be sure that no other structure modifying queries
449 /// (e.g. ALTER, DROP) are concurrently executing. But queries that only read table structure
450 /// (e.g. SELECT, INSERT) can continue to execute.
451 mutable RWLock alter_intention_lock = RWLockImpl::create();
452
453 /// It is taken for share for the entire INSERT query and the entire merge of the parts (for MergeTree).
454 /// ALTER COLUMN queries acquire an exclusive lock to ensure that no new parts with the old structure
455 /// are added to the table and thus the set of parts to modify doesn't change.
456 mutable RWLock new_data_structure_lock = RWLockImpl::create();
457
458 /// Lock for the table column structure (names, types, etc.) and data path.
459 /// It is taken in exclusive mode by queries that modify them (e.g. RENAME, ALTER and DROP)
460 /// and in share mode by other queries.
461 mutable RWLock structure_lock = RWLockImpl::create();
462};
463
464}
465