1 | #pragma once |
2 | |
3 | #include <map> |
4 | #include <shared_mutex> |
5 | #include <ext/shared_ptr_helper.h> |
6 | |
7 | #include <Poco/File.h> |
8 | |
9 | #include <Storages/IStorage.h> |
10 | #include <Common/FileChecker.h> |
11 | #include <Common/escapeForFileName.h> |
12 | |
13 | |
14 | namespace DB |
15 | { |
16 | |
17 | /** Implements simple table engine without support of indices. |
18 | * The data is stored in a compressed form. |
19 | */ |
20 | class StorageLog : public ext::shared_ptr_helper<StorageLog>, public IStorage |
21 | { |
22 | friend class LogBlockInputStream; |
23 | friend class LogBlockOutputStream; |
24 | friend struct ext::shared_ptr_helper<StorageLog>; |
25 | |
26 | public: |
27 | std::string getName() const override { return "Log" ; } |
28 | std::string getTableName() const override { return table_name; } |
29 | std::string getDatabaseName() const override { return database_name; } |
30 | |
31 | BlockInputStreams read( |
32 | const Names & column_names, |
33 | const SelectQueryInfo & query_info, |
34 | const Context & context, |
35 | QueryProcessingStage::Enum processed_stage, |
36 | size_t max_block_size, |
37 | unsigned num_streams) override; |
38 | |
39 | BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; |
40 | |
41 | void rename(const String & new_path_to_table_data, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) override; |
42 | |
43 | CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) override; |
44 | |
45 | void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; |
46 | |
47 | std::string fullPath() const { return path; } |
48 | |
49 | Strings getDataPaths() const override { return {fullPath()}; } |
50 | |
51 | protected: |
52 | /** Attach the table with the appropriate name, along the appropriate path (with / at the end), |
53 | * (the correctness of names and paths is not verified) |
54 | * consisting of the specified columns; Create files if they do not exist. |
55 | */ |
56 | StorageLog( |
57 | const std::string & relative_path_, |
58 | const std::string & database_name_, |
59 | const std::string & table_name_, |
60 | const ColumnsDescription & columns_, |
61 | const ConstraintsDescription & constraints_, |
62 | size_t max_compress_block_size_, |
63 | const Context & context_); |
64 | |
65 | private: |
66 | String base_path; |
67 | String path; |
68 | String table_name; |
69 | String database_name; |
70 | |
71 | mutable std::shared_mutex rwlock; |
72 | |
73 | /** Offsets to some row number in a file for column in table. |
74 | * They are needed so that you can read the data in several threads. |
75 | */ |
76 | struct Mark |
77 | { |
78 | size_t rows; /// How many rows are before this offset including the block at this offset. |
79 | size_t offset; /// The offset in compressed file. |
80 | }; |
81 | |
82 | using Marks = std::vector<Mark>; |
83 | |
84 | /// Column data |
85 | struct ColumnData |
86 | { |
87 | /// Specifies the column number in the marks file. |
88 | /// Does not necessarily match the column number among the columns of the table: columns with lengths of arrays are also numbered here. |
89 | size_t column_index; |
90 | |
91 | Poco::File data_file; |
92 | Marks marks; |
93 | }; |
94 | using Files_t = std::map<String, ColumnData>; |
95 | |
96 | Files_t files; /// name -> data |
97 | |
98 | Names column_names_by_idx; /// column_index -> name |
99 | |
100 | Poco::File marks_file; |
101 | |
102 | /// The order of adding files should not change: it corresponds to the order of the columns in the marks file. |
103 | void addFiles(const String & column_name, const IDataType & type); |
104 | |
105 | bool loaded_marks = false; |
106 | |
107 | size_t max_compress_block_size; |
108 | size_t file_count = 0; |
109 | |
110 | FileChecker file_checker; |
111 | |
112 | /// Read marks files if they are not already read. |
113 | /// It is done lazily, so that with a large number of tables, the server starts quickly. |
114 | /// You can not call with a write locked `rwlock`. |
115 | void loadMarks(); |
116 | |
117 | /// The order of adding files should not change: it corresponds to the order of the columns in the marks file. |
118 | void addFile(const String & column_name, const IDataType & type, size_t level = 0); |
119 | |
120 | /** For normal columns, the number of rows in the block is specified in the marks. |
121 | * For array columns and nested structures, there are more than one group of marks that correspond to different files |
122 | * - for elements (file name.bin) - the total number of array elements in the block is specified, |
123 | * - for array sizes (file name.size0.bin) - the number of rows (the whole arrays themselves) in the block is specified. |
124 | * |
125 | * Return the first group of marks that contain the number of rows, but not the internals of the arrays. |
126 | */ |
127 | const Marks & getMarksWithRealRowCount() const; |
128 | |
129 | std::string getFullPath() const { return path; } |
130 | }; |
131 | |
132 | } |
133 | |