1 | #pragma once |
2 | |
3 | #include <sstream> |
4 | #include <optional> |
5 | |
6 | #include <Interpreters/Context.h> |
7 | #include <Interpreters/ExpressionActions.h> |
8 | #include <Interpreters/Set.h> |
9 | #include <Core/SortDescription.h> |
10 | #include <Parsers/ASTExpressionList.h> |
11 | #include <Parsers/ASTSelectQuery.h> |
12 | #include <Parsers/ASTFunction.h> |
13 | #include <Storages/SelectQueryInfo.h> |
14 | |
15 | |
16 | namespace DB |
17 | { |
18 | |
19 | class IFunction; |
20 | using FunctionBasePtr = std::shared_ptr<IFunctionBase>; |
21 | |
22 | /** Range with open or closed ends; possibly unbounded. |
23 | */ |
24 | struct Range |
25 | { |
26 | private: |
27 | static bool equals(const Field & lhs, const Field & rhs); |
28 | static bool less(const Field & lhs, const Field & rhs); |
29 | |
30 | public: |
31 | Field left; /// the left border, if any |
32 | Field right; /// the right border, if any |
33 | bool left_bounded = false; /// bounded at the left |
34 | bool right_bounded = false; /// bounded at the right |
35 | bool left_included = false; /// includes the left border, if any |
36 | bool right_included = false; /// includes the right border, if any |
37 | |
38 | /// The whole unversum. |
39 | Range() {} |
40 | |
41 | /// One point. |
42 | Range(const Field & point) |
43 | : left(point), right(point), left_bounded(true), right_bounded(true), left_included(true), right_included(true) {} |
44 | |
45 | /// A bounded two-sided range. |
46 | Range(const Field & left_, bool left_included_, const Field & right_, bool right_included_) |
47 | : left(left_), right(right_), |
48 | left_bounded(true), right_bounded(true), |
49 | left_included(left_included_), right_included(right_included_) |
50 | { |
51 | shrinkToIncludedIfPossible(); |
52 | } |
53 | |
54 | static Range createRightBounded(const Field & right_point, bool right_included) |
55 | { |
56 | Range r; |
57 | r.right = right_point; |
58 | r.right_bounded = true; |
59 | r.right_included = right_included; |
60 | r.shrinkToIncludedIfPossible(); |
61 | return r; |
62 | } |
63 | |
64 | static Range createLeftBounded(const Field & left_point, bool left_included) |
65 | { |
66 | Range r; |
67 | r.left = left_point; |
68 | r.left_bounded = true; |
69 | r.left_included = left_included; |
70 | r.shrinkToIncludedIfPossible(); |
71 | return r; |
72 | } |
73 | |
74 | /** Optimize the range. If it has an open boundary and the Field type is "loose" |
75 | * - then convert it to closed, narrowing by one. |
76 | * That is, for example, turn (0,2) into [1]. |
77 | */ |
78 | void shrinkToIncludedIfPossible() |
79 | { |
80 | if (left_bounded && !left_included) |
81 | { |
82 | if (left.getType() == Field::Types::UInt64 && left.get<UInt64>() != std::numeric_limits<UInt64>::max()) |
83 | { |
84 | ++left.get<UInt64 &>(); |
85 | left_included = true; |
86 | } |
87 | if (left.getType() == Field::Types::Int64 && left.get<Int64>() != std::numeric_limits<Int64>::max()) |
88 | { |
89 | ++left.get<Int64 &>(); |
90 | left_included = true; |
91 | } |
92 | } |
93 | if (right_bounded && !right_included) |
94 | { |
95 | if (right.getType() == Field::Types::UInt64 && right.get<UInt64>() != std::numeric_limits<UInt64>::min()) |
96 | { |
97 | --right.get<UInt64 &>(); |
98 | right_included = true; |
99 | } |
100 | if (right.getType() == Field::Types::Int64 && right.get<Int64>() != std::numeric_limits<Int64>::min()) |
101 | { |
102 | --right.get<Int64 &>(); |
103 | right_included = true; |
104 | } |
105 | } |
106 | } |
107 | |
108 | bool empty() const |
109 | { |
110 | return left_bounded && right_bounded |
111 | && (less(right, left) |
112 | || ((!left_included || !right_included) && !less(left, right))); |
113 | } |
114 | |
115 | /// x contained in the range |
116 | bool contains(const Field & x) const |
117 | { |
118 | return !leftThan(x) && !rightThan(x); |
119 | } |
120 | |
121 | /// x is to the left |
122 | bool rightThan(const Field & x) const |
123 | { |
124 | return (left_bounded |
125 | ? !(less(left, x) || (left_included && equals(x, left))) |
126 | : false); |
127 | } |
128 | |
129 | /// x is to the right |
130 | bool leftThan(const Field & x) const |
131 | { |
132 | return (right_bounded |
133 | ? !(less(x, right) || (right_included && equals(x, right))) |
134 | : false); |
135 | } |
136 | |
137 | bool intersectsRange(const Range & r) const |
138 | { |
139 | /// r to the left of me. |
140 | if (r.right_bounded |
141 | && left_bounded |
142 | && (less(r.right, left) |
143 | || ((!left_included || !r.right_included) |
144 | && equals(r.right, left)))) |
145 | return false; |
146 | |
147 | /// r to the right of me. |
148 | if (r.left_bounded |
149 | && right_bounded |
150 | && (less(right, r.left) /// ...} {... |
151 | || ((!right_included || !r.left_included) /// ...) [... or ...] (... |
152 | && equals(r.left, right)))) |
153 | return false; |
154 | |
155 | return true; |
156 | } |
157 | |
158 | bool containsRange(const Range & r) const |
159 | { |
160 | /// r starts to the left of me. |
161 | if (left_bounded |
162 | && (!r.left_bounded |
163 | || less(r.left, left) |
164 | || (r.left_included |
165 | && !left_included |
166 | && equals(r.left, left)))) |
167 | return false; |
168 | |
169 | /// r ends right of me. |
170 | if (right_bounded |
171 | && (!r.right_bounded |
172 | || less(right, r.right) |
173 | || (r.right_included |
174 | && !right_included |
175 | && equals(r.right, right)))) |
176 | return false; |
177 | |
178 | return true; |
179 | } |
180 | |
181 | void swapLeftAndRight() |
182 | { |
183 | std::swap(left, right); |
184 | std::swap(left_bounded, right_bounded); |
185 | std::swap(left_included, right_included); |
186 | } |
187 | |
188 | String toString() const; |
189 | }; |
190 | |
191 | |
192 | /// Class that extends arbitrary objects with infinities, like +-inf for floats |
193 | class FieldWithInfinity |
194 | { |
195 | public: |
196 | enum Type |
197 | { |
198 | MINUS_INFINITY = -1, |
199 | NORMAL = 0, |
200 | PLUS_INFINITY = 1 |
201 | }; |
202 | |
203 | explicit FieldWithInfinity(const Field & field_); |
204 | FieldWithInfinity(Field && field_); |
205 | |
206 | static FieldWithInfinity getMinusInfinity(); |
207 | static FieldWithInfinity getPlusinfinity(); |
208 | |
209 | bool operator<(const FieldWithInfinity & other) const; |
210 | bool operator==(const FieldWithInfinity & other) const; |
211 | |
212 | private: |
213 | Field field; |
214 | Type type; |
215 | |
216 | FieldWithInfinity(const Type type_); |
217 | }; |
218 | |
219 | |
220 | /** Condition on the index. |
221 | * |
222 | * Consists of the conditions for the key belonging to all possible ranges or sets, |
223 | * as well as logical operators AND/OR/NOT above these conditions. |
224 | * |
225 | * Constructs a reverse polish notation from these conditions |
226 | * and can calculate (interpret) its satisfiability over key ranges. |
227 | */ |
228 | class KeyCondition |
229 | { |
230 | public: |
231 | /// Does not take into account the SAMPLE section. all_columns - the set of all columns of the table. |
232 | KeyCondition( |
233 | const SelectQueryInfo & query_info, |
234 | const Context & context, |
235 | const Names & key_column_names, |
236 | const ExpressionActionsPtr & key_expr); |
237 | |
238 | /// Whether the condition is feasible in the key range. |
239 | /// left_key and right_key must contain all fields in the sort_descr in the appropriate order. |
240 | /// data_types - the types of the key columns. |
241 | bool mayBeTrueInRange(size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const; |
242 | |
243 | /// Whether the condition is feasible in the direct product of single column ranges specified by `parallelogram`. |
244 | bool mayBeTrueInParallelogram(const std::vector<Range> & parallelogram, const DataTypes & data_types) const; |
245 | |
246 | /// Is the condition valid in a semi-infinite (not limited to the right) key range. |
247 | /// left_key must contain all the fields in the sort_descr in the appropriate order. |
248 | bool mayBeTrueAfter(size_t used_key_size, const Field * left_key, const DataTypes & data_types) const; |
249 | |
250 | /// Checks that the index can not be used. |
251 | bool alwaysUnknownOrTrue() const; |
252 | |
253 | /// Get the maximum number of the key element used in the condition. |
254 | size_t getMaxKeyColumn() const; |
255 | |
256 | /// Impose an additional condition: the value in the column `column` must be in the range `range`. |
257 | /// Returns whether there is such a column in the key. |
258 | bool addCondition(const String & column, const Range & range); |
259 | |
260 | String toString() const; |
261 | |
262 | |
263 | /** A chain of possibly monotone functions. |
264 | * If the key column is wrapped in functions that can be monotonous in some value ranges |
265 | * (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate. |
266 | */ |
267 | using MonotonicFunctionsChain = std::vector<FunctionBasePtr>; |
268 | |
269 | /** Computes value of constant expression and its data type. |
270 | * Returns false, if expression isn't constant. |
271 | */ |
272 | static bool getConstant( |
273 | const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type); |
274 | |
275 | static Block getBlockWithConstants( |
276 | const ASTPtr & query, const SyntaxAnalyzerResultPtr & syntax_analyzer_result, const Context & context); |
277 | |
278 | static std::optional<Range> applyMonotonicFunctionsChainToRange( |
279 | Range key_range, |
280 | MonotonicFunctionsChain & functions, |
281 | DataTypePtr current_type); |
282 | |
283 | private: |
284 | /// The expression is stored as Reverse Polish Notation. |
285 | struct RPNElement |
286 | { |
287 | enum Function |
288 | { |
289 | /// Atoms of a Boolean expression. |
290 | FUNCTION_IN_RANGE, |
291 | FUNCTION_NOT_IN_RANGE, |
292 | FUNCTION_IN_SET, |
293 | FUNCTION_NOT_IN_SET, |
294 | FUNCTION_UNKNOWN, /// Can take any value. |
295 | /// Operators of the logical expression. |
296 | FUNCTION_NOT, |
297 | FUNCTION_AND, |
298 | FUNCTION_OR, |
299 | /// Constants |
300 | ALWAYS_FALSE, |
301 | ALWAYS_TRUE, |
302 | }; |
303 | |
304 | RPNElement() {} |
305 | RPNElement(Function function_) : function(function_) {} |
306 | RPNElement(Function function_, size_t key_column_) : function(function_), key_column(key_column_) {} |
307 | RPNElement(Function function_, size_t key_column_, const Range & range_) |
308 | : function(function_), range(range_), key_column(key_column_) {} |
309 | |
310 | String toString() const; |
311 | |
312 | Function function = FUNCTION_UNKNOWN; |
313 | |
314 | /// For FUNCTION_IN_RANGE and FUNCTION_NOT_IN_RANGE. |
315 | Range range; |
316 | size_t key_column = 0; |
317 | /// For FUNCTION_IN_SET, FUNCTION_NOT_IN_SET |
318 | using MergeTreeSetIndexPtr = std::shared_ptr<MergeTreeSetIndex>; |
319 | MergeTreeSetIndexPtr set_index; |
320 | |
321 | mutable MonotonicFunctionsChain monotonic_functions_chain; /// The function execution does not violate the constancy. |
322 | }; |
323 | |
324 | using RPN = std::vector<RPNElement>; |
325 | using ColumnIndices = std::map<String, size_t>; |
326 | |
327 | using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value)>; |
328 | |
329 | public: |
330 | static const AtomMap atom_map; |
331 | |
332 | private: |
333 | bool mayBeTrueInRange( |
334 | size_t used_key_size, |
335 | const Field * left_key, |
336 | const Field * right_key, |
337 | const DataTypes & data_types, |
338 | bool right_bounded) const; |
339 | |
340 | void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants); |
341 | bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out); |
342 | bool operatorFromAST(const ASTFunction * func, RPNElement & out); |
343 | |
344 | /** Is node the key column |
345 | * or expression in which column of key is wrapped by chain of functions, |
346 | * that can be monotonic on certain ranges? |
347 | * If these conditions are true, then returns number of column in key, type of resulting expression |
348 | * and fills chain of possibly-monotonic functions. |
349 | */ |
350 | bool isKeyPossiblyWrappedByMonotonicFunctions( |
351 | const ASTPtr & node, |
352 | const Context & context, |
353 | size_t & out_key_column_num, |
354 | DataTypePtr & out_key_res_column_type, |
355 | MonotonicFunctionsChain & out_functions_chain); |
356 | |
357 | bool isKeyPossiblyWrappedByMonotonicFunctionsImpl( |
358 | const ASTPtr & node, |
359 | size_t & out_key_column_num, |
360 | DataTypePtr & out_key_column_type, |
361 | std::vector<const ASTFunction *> & out_functions_chain); |
362 | |
363 | bool canConstantBeWrappedByMonotonicFunctions( |
364 | const ASTPtr & node, |
365 | size_t & out_key_column_num, |
366 | DataTypePtr & out_key_column_type, |
367 | Field & out_value, |
368 | DataTypePtr & out_type); |
369 | |
370 | /// If it's possible to make an RPNElement |
371 | /// that will filter values (possibly tuples) by the content of 'prepared_set', |
372 | /// do it and return true. |
373 | bool tryPrepareSetIndex( |
374 | const ASTs & args, |
375 | const Context & context, |
376 | RPNElement & out, |
377 | size_t & out_key_column_num); |
378 | |
379 | RPN rpn; |
380 | |
381 | ColumnIndices key_columns; |
382 | ExpressionActionsPtr key_expr; |
383 | PreparedSets prepared_sets; |
384 | }; |
385 | |
386 | } |
387 | |