1#pragma once
2
3#include <string>
4#include <unordered_map>
5#include <vector>
6#include <memory>
7#include <Core/Block.h>
8#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
9#include <Storages/SelectQueryInfo.h>
10#include <Storages/MergeTree/MarkRange.h>
11#include <Interpreters/ExpressionActions.h>
12#include <Parsers/ASTIndexDeclaration.h>
13#include <DataTypes/DataTypeLowCardinality.h>
14
15constexpr auto INDEX_FILE_PREFIX = "skp_idx_";
16
17namespace DB
18{
19
20class MergeTreeData;
21class IMergeTreeIndex;
22
23using MergeTreeIndexPtr = std::shared_ptr<const IMergeTreeIndex>;
24using MutableMergeTreeIndexPtr = std::shared_ptr<IMergeTreeIndex>;
25
26
27/// Stores some info about a single block of data.
28struct IMergeTreeIndexGranule
29{
30 virtual ~IMergeTreeIndexGranule() = default;
31
32 virtual void serializeBinary(WriteBuffer & ostr) const = 0;
33 virtual void deserializeBinary(ReadBuffer & istr) = 0;
34
35 virtual bool empty() const = 0;
36};
37
38using MergeTreeIndexGranulePtr = std::shared_ptr<IMergeTreeIndexGranule>;
39using MergeTreeIndexGranules = std::vector<MergeTreeIndexGranulePtr>;
40
41
42/// Aggregates info about a single block of data.
43struct IMergeTreeIndexAggregator
44{
45 virtual ~IMergeTreeIndexAggregator() = default;
46
47 virtual bool empty() const = 0;
48 virtual MergeTreeIndexGranulePtr getGranuleAndReset() = 0;
49
50 /// Updates the stored info using rows of the specified block.
51 /// Reads no more than `limit` rows.
52 /// After finishing updating `pos` will store the position of the first row which was not read.
53 virtual void update(const Block & block, size_t * pos, size_t limit) = 0;
54};
55
56using MergeTreeIndexAggregatorPtr = std::shared_ptr<IMergeTreeIndexAggregator>;
57using MergeTreeIndexAggregators = std::vector<MergeTreeIndexAggregatorPtr>;
58
59
60/// Condition on the index.
61class IMergeTreeIndexCondition
62{
63public:
64 virtual ~IMergeTreeIndexCondition() = default;
65 /// Checks if this index is useful for query.
66 virtual bool alwaysUnknownOrTrue() const = 0;
67
68 virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0;
69};
70
71using MergeTreeIndexConditionPtr = std::shared_ptr<IMergeTreeIndexCondition>;
72
73
74/// Structure for storing basic index info like columns, expression, arguments, ...
75class IMergeTreeIndex
76{
77public:
78 IMergeTreeIndex(
79 String name_,
80 ExpressionActionsPtr expr_,
81 const Names & columns_,
82 const DataTypes & data_types_,
83 const Block & header_,
84 size_t granularity_)
85 : name(name_)
86 , expr(expr_)
87 , columns(columns_)
88 , data_types(data_types_)
89 , header(header_)
90 , granularity(granularity_) {}
91
92 virtual ~IMergeTreeIndex() = default;
93
94 /// gets filename without extension
95 String getFileName() const { return INDEX_FILE_PREFIX + name; }
96
97 /// Checks whether the column is in data skipping index.
98 virtual bool mayBenefitFromIndexForIn(const ASTPtr & node) const = 0;
99
100 virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0;
101 virtual MergeTreeIndexAggregatorPtr createIndexAggregator() const = 0;
102
103 virtual MergeTreeIndexConditionPtr createIndexCondition(
104 const SelectQueryInfo & query_info, const Context & context) const = 0;
105
106 Names getColumnsRequiredForIndexCalc() const { return expr->getRequiredColumns(); }
107
108 /// Index name
109 String name;
110
111 /// Index expression (x * y)
112 /// with columns arguments
113 ExpressionActionsPtr expr;
114
115 /// Names of columns for index
116 Names columns;
117
118 /// Data types of columns
119 DataTypes data_types;
120
121 /// Block with columns and data_types
122 Block header;
123
124 /// Skip index granularity
125 size_t granularity;
126};
127
128using MergeTreeIndices = std::vector<MutableMergeTreeIndexPtr>;
129
130
131class MergeTreeIndexFactory : private boost::noncopyable
132{
133public:
134 static MergeTreeIndexFactory & instance();
135
136 using Creator = std::function<
137 std::unique_ptr<IMergeTreeIndex>(
138 const NamesAndTypesList & columns,
139 std::shared_ptr<ASTIndexDeclaration> node,
140 const Context & context)>;
141
142 std::unique_ptr<IMergeTreeIndex> get(
143 const NamesAndTypesList & columns,
144 std::shared_ptr<ASTIndexDeclaration> node,
145 const Context & context) const;
146
147 void registerIndex(const std::string & name, Creator creator);
148
149 const auto & getAllIndexes() const { return indexes; }
150
151protected:
152 MergeTreeIndexFactory();
153
154private:
155 using Indexes = std::unordered_map<std::string, Creator>;
156 Indexes indexes;
157};
158
159std::unique_ptr<IMergeTreeIndex> minmaxIndexCreator(
160 const NamesAndTypesList & columns,
161 std::shared_ptr<ASTIndexDeclaration> node,
162 const Context & context);
163
164std::unique_ptr<IMergeTreeIndex> setIndexCreator(
165 const NamesAndTypesList & columns,
166 std::shared_ptr<ASTIndexDeclaration> node,
167 const Context & context);
168
169std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreator(
170 const NamesAndTypesList & columns,
171 std::shared_ptr<ASTIndexDeclaration> node,
172 const Context & context);
173
174std::unique_ptr<IMergeTreeIndex> bloomFilterIndexCreatorNew(
175 const NamesAndTypesList & columns,
176 std::shared_ptr<ASTIndexDeclaration> node,
177 const Context & context);
178
179}
180