1#include <Storages/MergeTree/MergeTreeIndexMinMax.h>
2
3#include <Interpreters/ExpressionActions.h>
4#include <Interpreters/ExpressionAnalyzer.h>
5#include <Interpreters/SyntaxAnalyzer.h>
6
7#include <Poco/Logger.h>
8
9namespace DB
10{
11
12namespace ErrorCodes
13{
14 extern const int LOGICAL_ERROR;
15 extern const int INCORRECT_QUERY;
16}
17
18
19MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index_)
20 : IMergeTreeIndexGranule(), index(index_), parallelogram() {}
21
22MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax(
23 const MergeTreeIndexMinMax & index_, std::vector<Range> && parallelogram_)
24 : IMergeTreeIndexGranule(), index(index_), parallelogram(std::move(parallelogram_)) {}
25
26void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const
27{
28 if (empty())
29 throw Exception(
30 "Attempt to write empty minmax index " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR);
31
32 for (size_t i = 0; i < index.columns.size(); ++i)
33 {
34 const DataTypePtr & type = index.data_types[i];
35 if (!type->isNullable())
36 {
37 type->serializeBinary(parallelogram[i].left, ostr);
38 type->serializeBinary(parallelogram[i].right, ostr);
39 }
40 else
41 {
42 bool is_null = parallelogram[i].left.isNull() || parallelogram[i].right.isNull(); // one is enough
43 writeBinary(is_null, ostr);
44 if (!is_null)
45 {
46 type->serializeBinary(parallelogram[i].left, ostr);
47 type->serializeBinary(parallelogram[i].right, ostr);
48 }
49 }
50 }
51}
52
53void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr)
54{
55 parallelogram.clear();
56 Field min_val;
57 Field max_val;
58 for (size_t i = 0; i < index.columns.size(); ++i)
59 {
60 const DataTypePtr & type = index.data_types[i];
61 if (!type->isNullable())
62 {
63 type->deserializeBinary(min_val, istr);
64 type->deserializeBinary(max_val, istr);
65 }
66 else
67 {
68 bool is_null;
69 readBinary(is_null, istr);
70 if (!is_null)
71 {
72 type->deserializeBinary(min_val, istr);
73 type->deserializeBinary(max_val, istr);
74 }
75 else
76 {
77 min_val = Null();
78 max_val = Null();
79 }
80 }
81 parallelogram.emplace_back(min_val, true, max_val, true);
82 }
83}
84
85
86MergeTreeIndexAggregatorMinMax::MergeTreeIndexAggregatorMinMax(const MergeTreeIndexMinMax & index_)
87 : index(index_) {}
88
89MergeTreeIndexGranulePtr MergeTreeIndexAggregatorMinMax::getGranuleAndReset()
90{
91 return std::make_shared<MergeTreeIndexGranuleMinMax>(index, std::move(parallelogram));
92}
93
94void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, size_t limit)
95{
96 if (*pos >= block.rows())
97 throw Exception(
98 "The provided position is not less than the number of block rows. Position: "
99 + toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR);
100
101 size_t rows_read = std::min(limit, block.rows() - *pos);
102
103 Field field_min;
104 Field field_max;
105 for (size_t i = 0; i < index.columns.size(); ++i)
106 {
107 const auto & column = block.getByName(index.columns[i]).column;
108 column->cut(*pos, rows_read)->getExtremes(field_min, field_max);
109
110 if (parallelogram.size() <= i)
111 {
112 parallelogram.emplace_back(field_min, true, field_max, true);
113 }
114 else
115 {
116 parallelogram[i].left = std::min(parallelogram[i].left, field_min);
117 parallelogram[i].right = std::max(parallelogram[i].right, field_max);
118 }
119 }
120
121 *pos += rows_read;
122}
123
124
125MergeTreeIndexConditionMinMax::MergeTreeIndexConditionMinMax(
126 const SelectQueryInfo &query,
127 const Context &context,
128 const MergeTreeIndexMinMax &index_)
129 : IMergeTreeIndexCondition(), index(index_), condition(query, context, index.columns, index.expr) {}
130
131bool MergeTreeIndexConditionMinMax::alwaysUnknownOrTrue() const
132{
133 return condition.alwaysUnknownOrTrue();
134}
135
136bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
137{
138 std::shared_ptr<MergeTreeIndexGranuleMinMax> granule
139 = std::dynamic_pointer_cast<MergeTreeIndexGranuleMinMax>(idx_granule);
140 if (!granule)
141 throw Exception(
142 "Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
143 for (const auto & range : granule->parallelogram)
144 if (range.left.isNull() || range.right.isNull())
145 return true;
146 return condition.mayBeTrueInParallelogram(granule->parallelogram, index.data_types);
147}
148
149
150MergeTreeIndexGranulePtr MergeTreeIndexMinMax::createIndexGranule() const
151{
152 return std::make_shared<MergeTreeIndexGranuleMinMax>(*this);
153}
154
155
156MergeTreeIndexAggregatorPtr MergeTreeIndexMinMax::createIndexAggregator() const
157{
158 return std::make_shared<MergeTreeIndexAggregatorMinMax>(*this);
159}
160
161
162MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition(
163 const SelectQueryInfo & query, const Context & context) const
164{
165 return std::make_shared<MergeTreeIndexConditionMinMax>(query, context, *this);
166};
167
168bool MergeTreeIndexMinMax::mayBenefitFromIndexForIn(const ASTPtr & node) const
169{
170 const String column_name = node->getColumnName();
171
172 for (const auto & cname : columns)
173 if (column_name == cname)
174 return true;
175
176 if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
177 if (func->arguments->children.size() == 1)
178 return mayBenefitFromIndexForIn(func->arguments->children.front());
179
180 return false;
181}
182
183std::unique_ptr<IMergeTreeIndex> minmaxIndexCreator(
184 const NamesAndTypesList & new_columns,
185 std::shared_ptr<ASTIndexDeclaration> node,
186 const Context & context)
187{
188 if (node->name.empty())
189 throw Exception("Index must have unique name", ErrorCodes::INCORRECT_QUERY);
190
191 if (node->type->arguments)
192 throw Exception("Minmax index have not any arguments", ErrorCodes::INCORRECT_QUERY);
193
194 ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone());
195 auto syntax = SyntaxAnalyzer(context, {}).analyze(
196 expr_list, new_columns);
197 auto minmax_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false);
198
199 auto sample = ExpressionAnalyzer(expr_list, syntax, context)
200 .getActions(true)->getSampleBlock();
201
202 Names columns;
203 DataTypes data_types;
204
205 for (size_t i = 0; i < expr_list->children.size(); ++i)
206 {
207 const auto & column = sample.getByPosition(i);
208
209 columns.emplace_back(column.name);
210 data_types.emplace_back(column.type);
211 }
212
213 return std::make_unique<MergeTreeIndexMinMax>(
214 node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity);
215}
216
217}
218