1#pragma once
2
3#include <map>
4#include <shared_mutex>
5#include <ext/shared_ptr_helper.h>
6
7#include <Poco/File.h>
8
9#include <Storages/IStorage.h>
10#include <Common/FileChecker.h>
11#include <Common/escapeForFileName.h>
12
13
14namespace DB
15{
16
17/** Implements simple table engine without support of indices.
18 * The data is stored in a compressed form.
19 */
20class StorageLog : public ext::shared_ptr_helper<StorageLog>, public IStorage
21{
22friend class LogBlockInputStream;
23friend class LogBlockOutputStream;
24friend struct ext::shared_ptr_helper<StorageLog>;
25
26public:
27 std::string getName() const override { return "Log"; }
28 std::string getTableName() const override { return table_name; }
29 std::string getDatabaseName() const override { return database_name; }
30
31 BlockInputStreams read(
32 const Names & column_names,
33 const SelectQueryInfo & query_info,
34 const Context & context,
35 QueryProcessingStage::Enum processed_stage,
36 size_t max_block_size,
37 unsigned num_streams) override;
38
39 BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override;
40
41 void rename(const String & new_path_to_table_data, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) override;
42
43 CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) override;
44
45 void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override;
46
47 std::string fullPath() const { return path; }
48
49 Strings getDataPaths() const override { return {fullPath()}; }
50
51protected:
52 /** Attach the table with the appropriate name, along the appropriate path (with / at the end),
53 * (the correctness of names and paths is not verified)
54 * consisting of the specified columns; Create files if they do not exist.
55 */
56 StorageLog(
57 const std::string & relative_path_,
58 const std::string & database_name_,
59 const std::string & table_name_,
60 const ColumnsDescription & columns_,
61 const ConstraintsDescription & constraints_,
62 size_t max_compress_block_size_,
63 const Context & context_);
64
65private:
66 String base_path;
67 String path;
68 String table_name;
69 String database_name;
70
71 mutable std::shared_mutex rwlock;
72
73 /** Offsets to some row number in a file for column in table.
74 * They are needed so that you can read the data in several threads.
75 */
76 struct Mark
77 {
78 size_t rows; /// How many rows are before this offset including the block at this offset.
79 size_t offset; /// The offset in compressed file.
80 };
81
82 using Marks = std::vector<Mark>;
83
84 /// Column data
85 struct ColumnData
86 {
87 /// Specifies the column number in the marks file.
88 /// Does not necessarily match the column number among the columns of the table: columns with lengths of arrays are also numbered here.
89 size_t column_index;
90
91 Poco::File data_file;
92 Marks marks;
93 };
94 using Files_t = std::map<String, ColumnData>;
95
96 Files_t files; /// name -> data
97
98 Names column_names_by_idx; /// column_index -> name
99
100 Poco::File marks_file;
101
102 /// The order of adding files should not change: it corresponds to the order of the columns in the marks file.
103 void addFiles(const String & column_name, const IDataType & type);
104
105 bool loaded_marks = false;
106
107 size_t max_compress_block_size;
108 size_t file_count = 0;
109
110 FileChecker file_checker;
111
112 /// Read marks files if they are not already read.
113 /// It is done lazily, so that with a large number of tables, the server starts quickly.
114 /// You can not call with a write locked `rwlock`.
115 void loadMarks();
116
117 /// The order of adding files should not change: it corresponds to the order of the columns in the marks file.
118 void addFile(const String & column_name, const IDataType & type, size_t level = 0);
119
120 /** For normal columns, the number of rows in the block is specified in the marks.
121 * For array columns and nested structures, there are more than one group of marks that correspond to different files
122 * - for elements (file name.bin) - the total number of array elements in the block is specified,
123 * - for array sizes (file name.size0.bin) - the number of rows (the whole arrays themselves) in the block is specified.
124 *
125 * Return the first group of marks that contain the number of rows, but not the internals of the arrays.
126 */
127 const Marks & getMarksWithRealRowCount() const;
128
129 std::string getFullPath() const { return path; }
130};
131
132}
133