1#pragma once
2
3#include <sstream>
4#include <optional>
5
6#include <Interpreters/Context.h>
7#include <Interpreters/ExpressionActions.h>
8#include <Interpreters/Set.h>
9#include <Core/SortDescription.h>
10#include <Parsers/ASTExpressionList.h>
11#include <Parsers/ASTSelectQuery.h>
12#include <Parsers/ASTFunction.h>
13#include <Storages/SelectQueryInfo.h>
14
15
16namespace DB
17{
18
19class IFunction;
20using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
21
22/** Range with open or closed ends; possibly unbounded.
23 */
24struct Range
25{
26private:
27 static bool equals(const Field & lhs, const Field & rhs);
28 static bool less(const Field & lhs, const Field & rhs);
29
30public:
31 Field left; /// the left border, if any
32 Field right; /// the right border, if any
33 bool left_bounded = false; /// bounded at the left
34 bool right_bounded = false; /// bounded at the right
35 bool left_included = false; /// includes the left border, if any
36 bool right_included = false; /// includes the right border, if any
37
38 /// The whole unversum.
39 Range() {}
40
41 /// One point.
42 Range(const Field & point)
43 : left(point), right(point), left_bounded(true), right_bounded(true), left_included(true), right_included(true) {}
44
45 /// A bounded two-sided range.
46 Range(const Field & left_, bool left_included_, const Field & right_, bool right_included_)
47 : left(left_), right(right_),
48 left_bounded(true), right_bounded(true),
49 left_included(left_included_), right_included(right_included_)
50 {
51 shrinkToIncludedIfPossible();
52 }
53
54 static Range createRightBounded(const Field & right_point, bool right_included)
55 {
56 Range r;
57 r.right = right_point;
58 r.right_bounded = true;
59 r.right_included = right_included;
60 r.shrinkToIncludedIfPossible();
61 return r;
62 }
63
64 static Range createLeftBounded(const Field & left_point, bool left_included)
65 {
66 Range r;
67 r.left = left_point;
68 r.left_bounded = true;
69 r.left_included = left_included;
70 r.shrinkToIncludedIfPossible();
71 return r;
72 }
73
74 /** Optimize the range. If it has an open boundary and the Field type is "loose"
75 * - then convert it to closed, narrowing by one.
76 * That is, for example, turn (0,2) into [1].
77 */
78 void shrinkToIncludedIfPossible()
79 {
80 if (left_bounded && !left_included)
81 {
82 if (left.getType() == Field::Types::UInt64 && left.get<UInt64>() != std::numeric_limits<UInt64>::max())
83 {
84 ++left.get<UInt64 &>();
85 left_included = true;
86 }
87 if (left.getType() == Field::Types::Int64 && left.get<Int64>() != std::numeric_limits<Int64>::max())
88 {
89 ++left.get<Int64 &>();
90 left_included = true;
91 }
92 }
93 if (right_bounded && !right_included)
94 {
95 if (right.getType() == Field::Types::UInt64 && right.get<UInt64>() != std::numeric_limits<UInt64>::min())
96 {
97 --right.get<UInt64 &>();
98 right_included = true;
99 }
100 if (right.getType() == Field::Types::Int64 && right.get<Int64>() != std::numeric_limits<Int64>::min())
101 {
102 --right.get<Int64 &>();
103 right_included = true;
104 }
105 }
106 }
107
108 bool empty() const
109 {
110 return left_bounded && right_bounded
111 && (less(right, left)
112 || ((!left_included || !right_included) && !less(left, right)));
113 }
114
115 /// x contained in the range
116 bool contains(const Field & x) const
117 {
118 return !leftThan(x) && !rightThan(x);
119 }
120
121 /// x is to the left
122 bool rightThan(const Field & x) const
123 {
124 return (left_bounded
125 ? !(less(left, x) || (left_included && equals(x, left)))
126 : false);
127 }
128
129 /// x is to the right
130 bool leftThan(const Field & x) const
131 {
132 return (right_bounded
133 ? !(less(x, right) || (right_included && equals(x, right)))
134 : false);
135 }
136
137 bool intersectsRange(const Range & r) const
138 {
139 /// r to the left of me.
140 if (r.right_bounded
141 && left_bounded
142 && (less(r.right, left)
143 || ((!left_included || !r.right_included)
144 && equals(r.right, left))))
145 return false;
146
147 /// r to the right of me.
148 if (r.left_bounded
149 && right_bounded
150 && (less(right, r.left) /// ...} {...
151 || ((!right_included || !r.left_included) /// ...) [... or ...] (...
152 && equals(r.left, right))))
153 return false;
154
155 return true;
156 }
157
158 bool containsRange(const Range & r) const
159 {
160 /// r starts to the left of me.
161 if (left_bounded
162 && (!r.left_bounded
163 || less(r.left, left)
164 || (r.left_included
165 && !left_included
166 && equals(r.left, left))))
167 return false;
168
169 /// r ends right of me.
170 if (right_bounded
171 && (!r.right_bounded
172 || less(right, r.right)
173 || (r.right_included
174 && !right_included
175 && equals(r.right, right))))
176 return false;
177
178 return true;
179 }
180
181 void swapLeftAndRight()
182 {
183 std::swap(left, right);
184 std::swap(left_bounded, right_bounded);
185 std::swap(left_included, right_included);
186 }
187
188 String toString() const;
189};
190
191
192/// Class that extends arbitrary objects with infinities, like +-inf for floats
193class FieldWithInfinity
194{
195public:
196 enum Type
197 {
198 MINUS_INFINITY = -1,
199 NORMAL = 0,
200 PLUS_INFINITY = 1
201 };
202
203 explicit FieldWithInfinity(const Field & field_);
204 FieldWithInfinity(Field && field_);
205
206 static FieldWithInfinity getMinusInfinity();
207 static FieldWithInfinity getPlusinfinity();
208
209 bool operator<(const FieldWithInfinity & other) const;
210 bool operator==(const FieldWithInfinity & other) const;
211
212private:
213 Field field;
214 Type type;
215
216 FieldWithInfinity(const Type type_);
217};
218
219
220/** Condition on the index.
221 *
222 * Consists of the conditions for the key belonging to all possible ranges or sets,
223 * as well as logical operators AND/OR/NOT above these conditions.
224 *
225 * Constructs a reverse polish notation from these conditions
226 * and can calculate (interpret) its satisfiability over key ranges.
227 */
228class KeyCondition
229{
230public:
231 /// Does not take into account the SAMPLE section. all_columns - the set of all columns of the table.
232 KeyCondition(
233 const SelectQueryInfo & query_info,
234 const Context & context,
235 const Names & key_column_names,
236 const ExpressionActionsPtr & key_expr);
237
238 /// Whether the condition is feasible in the key range.
239 /// left_key and right_key must contain all fields in the sort_descr in the appropriate order.
240 /// data_types - the types of the key columns.
241 bool mayBeTrueInRange(size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const;
242
243 /// Whether the condition is feasible in the direct product of single column ranges specified by `parallelogram`.
244 bool mayBeTrueInParallelogram(const std::vector<Range> & parallelogram, const DataTypes & data_types) const;
245
246 /// Is the condition valid in a semi-infinite (not limited to the right) key range.
247 /// left_key must contain all the fields in the sort_descr in the appropriate order.
248 bool mayBeTrueAfter(size_t used_key_size, const Field * left_key, const DataTypes & data_types) const;
249
250 /// Checks that the index can not be used.
251 bool alwaysUnknownOrTrue() const;
252
253 /// Get the maximum number of the key element used in the condition.
254 size_t getMaxKeyColumn() const;
255
256 /// Impose an additional condition: the value in the column `column` must be in the range `range`.
257 /// Returns whether there is such a column in the key.
258 bool addCondition(const String & column, const Range & range);
259
260 String toString() const;
261
262
263 /** A chain of possibly monotone functions.
264 * If the key column is wrapped in functions that can be monotonous in some value ranges
265 * (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate.
266 */
267 using MonotonicFunctionsChain = std::vector<FunctionBasePtr>;
268
269 /** Computes value of constant expression and its data type.
270 * Returns false, if expression isn't constant.
271 */
272 static bool getConstant(
273 const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type);
274
275 static Block getBlockWithConstants(
276 const ASTPtr & query, const SyntaxAnalyzerResultPtr & syntax_analyzer_result, const Context & context);
277
278 static std::optional<Range> applyMonotonicFunctionsChainToRange(
279 Range key_range,
280 MonotonicFunctionsChain & functions,
281 DataTypePtr current_type);
282
283private:
284 /// The expression is stored as Reverse Polish Notation.
285 struct RPNElement
286 {
287 enum Function
288 {
289 /// Atoms of a Boolean expression.
290 FUNCTION_IN_RANGE,
291 FUNCTION_NOT_IN_RANGE,
292 FUNCTION_IN_SET,
293 FUNCTION_NOT_IN_SET,
294 FUNCTION_UNKNOWN, /// Can take any value.
295 /// Operators of the logical expression.
296 FUNCTION_NOT,
297 FUNCTION_AND,
298 FUNCTION_OR,
299 /// Constants
300 ALWAYS_FALSE,
301 ALWAYS_TRUE,
302 };
303
304 RPNElement() {}
305 RPNElement(Function function_) : function(function_) {}
306 RPNElement(Function function_, size_t key_column_) : function(function_), key_column(key_column_) {}
307 RPNElement(Function function_, size_t key_column_, const Range & range_)
308 : function(function_), range(range_), key_column(key_column_) {}
309
310 String toString() const;
311
312 Function function = FUNCTION_UNKNOWN;
313
314 /// For FUNCTION_IN_RANGE and FUNCTION_NOT_IN_RANGE.
315 Range range;
316 size_t key_column = 0;
317 /// For FUNCTION_IN_SET, FUNCTION_NOT_IN_SET
318 using MergeTreeSetIndexPtr = std::shared_ptr<MergeTreeSetIndex>;
319 MergeTreeSetIndexPtr set_index;
320
321 mutable MonotonicFunctionsChain monotonic_functions_chain; /// The function execution does not violate the constancy.
322 };
323
324 using RPN = std::vector<RPNElement>;
325 using ColumnIndices = std::map<String, size_t>;
326
327 using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value)>;
328
329public:
330 static const AtomMap atom_map;
331
332private:
333 bool mayBeTrueInRange(
334 size_t used_key_size,
335 const Field * left_key,
336 const Field * right_key,
337 const DataTypes & data_types,
338 bool right_bounded) const;
339
340 void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants);
341 bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
342 bool operatorFromAST(const ASTFunction * func, RPNElement & out);
343
344 /** Is node the key column
345 * or expression in which column of key is wrapped by chain of functions,
346 * that can be monotonic on certain ranges?
347 * If these conditions are true, then returns number of column in key, type of resulting expression
348 * and fills chain of possibly-monotonic functions.
349 */
350 bool isKeyPossiblyWrappedByMonotonicFunctions(
351 const ASTPtr & node,
352 const Context & context,
353 size_t & out_key_column_num,
354 DataTypePtr & out_key_res_column_type,
355 MonotonicFunctionsChain & out_functions_chain);
356
357 bool isKeyPossiblyWrappedByMonotonicFunctionsImpl(
358 const ASTPtr & node,
359 size_t & out_key_column_num,
360 DataTypePtr & out_key_column_type,
361 std::vector<const ASTFunction *> & out_functions_chain);
362
363 bool canConstantBeWrappedByMonotonicFunctions(
364 const ASTPtr & node,
365 size_t & out_key_column_num,
366 DataTypePtr & out_key_column_type,
367 Field & out_value,
368 DataTypePtr & out_type);
369
370 /// If it's possible to make an RPNElement
371 /// that will filter values (possibly tuples) by the content of 'prepared_set',
372 /// do it and return true.
373 bool tryPrepareSetIndex(
374 const ASTs & args,
375 const Context & context,
376 RPNElement & out,
377 size_t & out_key_column_num);
378
379 RPN rpn;
380
381 ColumnIndices key_columns;
382 ExpressionActionsPtr key_expr;
383 PreparedSets prepared_sets;
384};
385
386}
387