| 1 | #pragma once |
| 2 | |
| 3 | #include <map> |
| 4 | #include <shared_mutex> |
| 5 | #include <ext/shared_ptr_helper.h> |
| 6 | |
| 7 | #include <Poco/File.h> |
| 8 | |
| 9 | #include <Storages/IStorage.h> |
| 10 | #include <Common/FileChecker.h> |
| 11 | #include <Common/escapeForFileName.h> |
| 12 | |
| 13 | |
| 14 | namespace DB |
| 15 | { |
| 16 | |
| 17 | /** Implements simple table engine without support of indices. |
| 18 | * The data is stored in a compressed form. |
| 19 | */ |
| 20 | class StorageLog : public ext::shared_ptr_helper<StorageLog>, public IStorage |
| 21 | { |
| 22 | friend class LogBlockInputStream; |
| 23 | friend class LogBlockOutputStream; |
| 24 | friend struct ext::shared_ptr_helper<StorageLog>; |
| 25 | |
| 26 | public: |
| 27 | std::string getName() const override { return "Log" ; } |
| 28 | std::string getTableName() const override { return table_name; } |
| 29 | std::string getDatabaseName() const override { return database_name; } |
| 30 | |
| 31 | BlockInputStreams read( |
| 32 | const Names & column_names, |
| 33 | const SelectQueryInfo & query_info, |
| 34 | const Context & context, |
| 35 | QueryProcessingStage::Enum processed_stage, |
| 36 | size_t max_block_size, |
| 37 | unsigned num_streams) override; |
| 38 | |
| 39 | BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; |
| 40 | |
| 41 | void rename(const String & new_path_to_table_data, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) override; |
| 42 | |
| 43 | CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) override; |
| 44 | |
| 45 | void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; |
| 46 | |
| 47 | std::string fullPath() const { return path; } |
| 48 | |
| 49 | Strings getDataPaths() const override { return {fullPath()}; } |
| 50 | |
| 51 | protected: |
| 52 | /** Attach the table with the appropriate name, along the appropriate path (with / at the end), |
| 53 | * (the correctness of names and paths is not verified) |
| 54 | * consisting of the specified columns; Create files if they do not exist. |
| 55 | */ |
| 56 | StorageLog( |
| 57 | const std::string & relative_path_, |
| 58 | const std::string & database_name_, |
| 59 | const std::string & table_name_, |
| 60 | const ColumnsDescription & columns_, |
| 61 | const ConstraintsDescription & constraints_, |
| 62 | size_t max_compress_block_size_, |
| 63 | const Context & context_); |
| 64 | |
| 65 | private: |
| 66 | String base_path; |
| 67 | String path; |
| 68 | String table_name; |
| 69 | String database_name; |
| 70 | |
| 71 | mutable std::shared_mutex rwlock; |
| 72 | |
| 73 | /** Offsets to some row number in a file for column in table. |
| 74 | * They are needed so that you can read the data in several threads. |
| 75 | */ |
| 76 | struct Mark |
| 77 | { |
| 78 | size_t rows; /// How many rows are before this offset including the block at this offset. |
| 79 | size_t offset; /// The offset in compressed file. |
| 80 | }; |
| 81 | |
| 82 | using Marks = std::vector<Mark>; |
| 83 | |
| 84 | /// Column data |
| 85 | struct ColumnData |
| 86 | { |
| 87 | /// Specifies the column number in the marks file. |
| 88 | /// Does not necessarily match the column number among the columns of the table: columns with lengths of arrays are also numbered here. |
| 89 | size_t column_index; |
| 90 | |
| 91 | Poco::File data_file; |
| 92 | Marks marks; |
| 93 | }; |
| 94 | using Files_t = std::map<String, ColumnData>; |
| 95 | |
| 96 | Files_t files; /// name -> data |
| 97 | |
| 98 | Names column_names_by_idx; /// column_index -> name |
| 99 | |
| 100 | Poco::File marks_file; |
| 101 | |
| 102 | /// The order of adding files should not change: it corresponds to the order of the columns in the marks file. |
| 103 | void addFiles(const String & column_name, const IDataType & type); |
| 104 | |
| 105 | bool loaded_marks = false; |
| 106 | |
| 107 | size_t max_compress_block_size; |
| 108 | size_t file_count = 0; |
| 109 | |
| 110 | FileChecker file_checker; |
| 111 | |
| 112 | /// Read marks files if they are not already read. |
| 113 | /// It is done lazily, so that with a large number of tables, the server starts quickly. |
| 114 | /// You can not call with a write locked `rwlock`. |
| 115 | void loadMarks(); |
| 116 | |
| 117 | /// The order of adding files should not change: it corresponds to the order of the columns in the marks file. |
| 118 | void addFile(const String & column_name, const IDataType & type, size_t level = 0); |
| 119 | |
| 120 | /** For normal columns, the number of rows in the block is specified in the marks. |
| 121 | * For array columns and nested structures, there are more than one group of marks that correspond to different files |
| 122 | * - for elements (file name.bin) - the total number of array elements in the block is specified, |
| 123 | * - for array sizes (file name.size0.bin) - the number of rows (the whole arrays themselves) in the block is specified. |
| 124 | * |
| 125 | * Return the first group of marks that contain the number of rows, but not the internals of the arrays. |
| 126 | */ |
| 127 | const Marks & getMarksWithRealRowCount() const; |
| 128 | |
| 129 | std::string getFullPath() const { return path; } |
| 130 | }; |
| 131 | |
| 132 | } |
| 133 | |