1#include <Storages/MergeTree/KeyCondition.h>
2#include <Storages/MergeTree/BoolMask.h>
3#include <DataTypes/DataTypesNumber.h>
4#include <Interpreters/SyntaxAnalyzer.h>
5#include <Interpreters/ExpressionAnalyzer.h>
6#include <Interpreters/ExpressionActions.h>
7#include <Interpreters/misc.h>
8#include <Functions/FunctionFactory.h>
9#include <Functions/IFunction.h>
10#include <Common/FieldVisitors.h>
11#include <Common/typeid_cast.h>
12#include <Interpreters/convertFieldToType.h>
13#include <Interpreters/Set.h>
14#include <Parsers/queryToString.h>
15#include <Parsers/ASTLiteral.h>
16#include <Parsers/ASTSubquery.h>
17#include <Parsers/ASTIdentifier.h>
18
19#include <cassert>
20
21
22namespace DB
23{
24
25namespace ErrorCodes
26{
27 extern const int LOGICAL_ERROR;
28 extern const int BAD_TYPE_OF_FIELD;
29 extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
30}
31
32
33String Range::toString() const
34{
35 std::stringstream str;
36
37 if (!left_bounded)
38 str << "(-inf, ";
39 else
40 str << (left_included ? '[' : '(') << applyVisitor(FieldVisitorToString(), left) << ", ";
41
42 if (!right_bounded)
43 str << "+inf)";
44 else
45 str << applyVisitor(FieldVisitorToString(), right) << (right_included ? ']' : ')');
46
47 return str.str();
48}
49
50
51/// Example: for `Hello\_World% ...` string it returns `Hello_World`, and for `%test%` returns an empty string.
52static String extractFixedPrefixFromLikePattern(const String & like_pattern)
53{
54 String fixed_prefix;
55
56 const char * pos = like_pattern.data();
57 const char * end = pos + like_pattern.size();
58 while (pos < end)
59 {
60 switch (*pos)
61 {
62 case '%':
63 [[fallthrough]];
64 case '_':
65 return fixed_prefix;
66
67 case '\\':
68 ++pos;
69 if (pos == end)
70 break;
71 [[fallthrough]];
72 default:
73 fixed_prefix += *pos;
74 break;
75 }
76
77 ++pos;
78 }
79
80 return fixed_prefix;
81}
82
83
84/** For a given string, get a minimum string that is strictly greater than all strings with this prefix,
85 * or return an empty string if there are no such strings.
86 */
87static String firstStringThatIsGreaterThanAllStringsWithPrefix(const String & prefix)
88{
89 /** Increment the last byte of the prefix by one. But if it is 255, then remove it and increase the previous one.
90 * Example (for convenience, suppose that the maximum value of byte is `z`)
91 * abcx -> abcy
92 * abcz -> abd
93 * zzz -> empty string
94 * z -> empty string
95 */
96
97 String res = prefix;
98
99 while (!res.empty() && static_cast<UInt8>(res.back()) == 255)
100 res.pop_back();
101
102 if (res.empty())
103 return res;
104
105 res.back() = static_cast<char>(1 + static_cast<UInt8>(res.back()));
106 return res;
107}
108
109
110/// A dictionary containing actions to the corresponding functions to turn them into `RPNElement`
111const KeyCondition::AtomMap KeyCondition::atom_map
112{
113 {
114 "notEquals",
115 [] (RPNElement & out, const Field & value)
116 {
117 out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
118 out.range = Range(value);
119 return true;
120 }
121 },
122 {
123 "equals",
124 [] (RPNElement & out, const Field & value)
125 {
126 out.function = RPNElement::FUNCTION_IN_RANGE;
127 out.range = Range(value);
128 return true;
129 }
130 },
131 {
132 "less",
133 [] (RPNElement & out, const Field & value)
134 {
135 out.function = RPNElement::FUNCTION_IN_RANGE;
136 out.range = Range::createRightBounded(value, false);
137 return true;
138 }
139 },
140 {
141 "greater",
142 [] (RPNElement & out, const Field & value)
143 {
144 out.function = RPNElement::FUNCTION_IN_RANGE;
145 out.range = Range::createLeftBounded(value, false);
146 return true;
147 }
148 },
149 {
150 "lessOrEquals",
151 [] (RPNElement & out, const Field & value)
152 {
153 out.function = RPNElement::FUNCTION_IN_RANGE;
154 out.range = Range::createRightBounded(value, true);
155 return true;
156 }
157 },
158 {
159 "greaterOrEquals",
160 [] (RPNElement & out, const Field & value)
161 {
162 out.function = RPNElement::FUNCTION_IN_RANGE;
163 out.range = Range::createLeftBounded(value, true);
164 return true;
165 }
166 },
167 {
168 "in",
169 [] (RPNElement & out, const Field &)
170 {
171 out.function = RPNElement::FUNCTION_IN_SET;
172 return true;
173 }
174 },
175 {
176 "notIn",
177 [] (RPNElement & out, const Field &)
178 {
179 out.function = RPNElement::FUNCTION_NOT_IN_SET;
180 return true;
181 }
182 },
183 {
184 "empty",
185 [] (RPNElement & out, const Field &)
186 {
187 out.function = RPNElement::FUNCTION_IN_RANGE;
188 out.range = Range("");
189 return true;
190 }
191 },
192 {
193 "notEmpty",
194 [] (RPNElement & out, const Field &)
195 {
196 out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
197 out.range = Range("");
198 return true;
199 }
200 },
201 {
202 "like",
203 [] (RPNElement & out, const Field & value)
204 {
205 if (value.getType() != Field::Types::String)
206 return false;
207
208 String prefix = extractFixedPrefixFromLikePattern(value.get<const String &>());
209 if (prefix.empty())
210 return false;
211
212 String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix);
213
214 out.function = RPNElement::FUNCTION_IN_RANGE;
215 out.range = !right_bound.empty()
216 ? Range(prefix, true, right_bound, false)
217 : Range::createLeftBounded(prefix, true);
218
219 return true;
220 }
221 },
222 {
223 "notLike",
224 [] (RPNElement & out, const Field & value)
225 {
226 if (value.getType() != Field::Types::String)
227 return false;
228
229 String prefix = extractFixedPrefixFromLikePattern(value.get<const String &>());
230 if (prefix.empty())
231 return false;
232
233 String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix);
234
235 out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
236 out.range = !right_bound.empty()
237 ? Range(prefix, true, right_bound, false)
238 : Range::createLeftBounded(prefix, true);
239
240 return true;
241 }
242 },
243 {
244 "startsWith",
245 [] (RPNElement & out, const Field & value)
246 {
247 if (value.getType() != Field::Types::String)
248 return false;
249
250 String prefix = value.get<const String &>();
251 if (prefix.empty())
252 return false;
253
254 String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix);
255
256 out.function = RPNElement::FUNCTION_IN_RANGE;
257 out.range = !right_bound.empty()
258 ? Range(prefix, true, right_bound, false)
259 : Range::createLeftBounded(prefix, true);
260
261 return true;
262 }
263 }
264};
265
266
267inline bool Range::equals(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateEquals(), lhs, rhs); }
268inline bool Range::less(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateLess(), lhs, rhs); }
269
270
271FieldWithInfinity::FieldWithInfinity(const Field & field_)
272 : field(field_),
273 type(Type::NORMAL)
274{
275}
276
277FieldWithInfinity::FieldWithInfinity(Field && field_)
278 : field(std::move(field_)),
279 type(Type::NORMAL)
280{
281}
282
283FieldWithInfinity::FieldWithInfinity(const Type type_)
284 : field(),
285 type(type_)
286{
287}
288
289FieldWithInfinity FieldWithInfinity::getMinusInfinity()
290{
291 return FieldWithInfinity(Type::MINUS_INFINITY);
292}
293
294FieldWithInfinity FieldWithInfinity::getPlusinfinity()
295{
296 return FieldWithInfinity(Type::PLUS_INFINITY);
297}
298
299bool FieldWithInfinity::operator<(const FieldWithInfinity & other) const
300{
301 return type < other.type || (type == other.type && type == Type::NORMAL && field < other.field);
302}
303
304bool FieldWithInfinity::operator==(const FieldWithInfinity & other) const
305{
306 return type == other.type && (type != Type::NORMAL || field == other.field);
307}
308
309
310/** Calculate expressions, that depend only on constants.
311 * For index to work when something like "WHERE Date = toDate(now())" is written.
312 */
313Block KeyCondition::getBlockWithConstants(
314 const ASTPtr & query, const SyntaxAnalyzerResultPtr & syntax_analyzer_result, const Context & context)
315{
316 Block result
317 {
318 { DataTypeUInt8().createColumnConstWithDefaultValue(1), std::make_shared<DataTypeUInt8>(), "_dummy" }
319 };
320
321 const auto expr_for_constant_folding = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActions();
322
323 expr_for_constant_folding->execute(result);
324
325 return result;
326}
327
328
329KeyCondition::KeyCondition(
330 const SelectQueryInfo & query_info,
331 const Context & context,
332 const Names & key_column_names,
333 const ExpressionActionsPtr & key_expr_)
334 : key_expr(key_expr_), prepared_sets(query_info.sets)
335{
336 for (size_t i = 0, size = key_column_names.size(); i < size; ++i)
337 {
338 std::string name = key_column_names[i];
339 if (!key_columns.count(name))
340 key_columns[name] = i;
341 }
342
343 /** Evaluation of expressions that depend only on constants.
344 * For the index to be used, if it is written, for example `WHERE Date = toDate(now())`.
345 */
346 Block block_with_constants = getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context);
347
348 /// Trasform WHERE section to Reverse Polish notation
349 const auto & select = query_info.query->as<ASTSelectQuery &>();
350 if (select.where())
351 {
352 traverseAST(select.where(), context, block_with_constants);
353
354 if (select.prewhere())
355 {
356 traverseAST(select.prewhere(), context, block_with_constants);
357 rpn.emplace_back(RPNElement::FUNCTION_AND);
358 }
359 }
360 else if (select.prewhere())
361 {
362 traverseAST(select.prewhere(), context, block_with_constants);
363 }
364 else
365 {
366 rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
367 }
368}
369
370bool KeyCondition::addCondition(const String & column, const Range & range)
371{
372 if (!key_columns.count(column))
373 return false;
374 rpn.emplace_back(RPNElement::FUNCTION_IN_RANGE, key_columns[column], range);
375 rpn.emplace_back(RPNElement::FUNCTION_AND);
376 return true;
377}
378
379/** Computes value of constant expression and its data type.
380 * Returns false, if expression isn't constant.
381 */
382bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type)
383{
384 String column_name = expr->getColumnName();
385
386 if (const auto * lit = expr->as<ASTLiteral>())
387 {
388 /// By default block_with_constants has only one column named "_dummy".
389 /// If block contains only constants it's may not be preprocessed by
390 // ExpressionAnalyzer, so try to look up in the default column.
391 if (!block_with_constants.has(column_name))
392 column_name = "_dummy";
393
394 /// Simple literal
395 out_value = lit->value;
396 out_type = block_with_constants.getByName(column_name).type;
397 return true;
398 }
399 else if (block_with_constants.has(column_name) && isColumnConst(*block_with_constants.getByName(column_name).column))
400 {
401 /// An expression which is dependent on constants only
402 const auto & expr_info = block_with_constants.getByName(column_name);
403 out_value = (*expr_info.column)[0];
404 out_type = expr_info.type;
405 return true;
406 }
407 else
408 return false;
409}
410
411
412static void applyFunction(
413 const FunctionBasePtr & func,
414 const DataTypePtr & arg_type, const Field & arg_value,
415 DataTypePtr & res_type, Field & res_value)
416{
417 res_type = func->getReturnType();
418
419 Block block
420 {
421 { arg_type->createColumnConst(1, arg_value), arg_type, "x" },
422 { nullptr, res_type, "y" }
423 };
424
425 func->execute(block, {0}, 1, 1);
426
427 block.safeGetByPosition(1).column->get(0, res_value);
428}
429
430
431void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants)
432{
433 RPNElement element;
434
435 if (auto * func = node->as<ASTFunction>())
436 {
437 if (operatorFromAST(func, element))
438 {
439 auto & args = func->arguments->children;
440 for (size_t i = 0, size = args.size(); i < size; ++i)
441 {
442 traverseAST(args[i], context, block_with_constants);
443
444 /** The first part of the condition is for the correct support of `and` and `or` functions of arbitrary arity
445 * - in this case `n - 1` elements are added (where `n` is the number of arguments).
446 */
447 if (i != 0 || element.function == RPNElement::FUNCTION_NOT)
448 rpn.emplace_back(std::move(element));
449 }
450
451 return;
452 }
453 }
454
455 if (!atomFromAST(node, context, block_with_constants, element))
456 {
457 element.function = RPNElement::FUNCTION_UNKNOWN;
458 }
459
460 rpn.emplace_back(std::move(element));
461}
462
463
464bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
465 const ASTPtr & node,
466 size_t & out_key_column_num,
467 DataTypePtr & out_key_column_type,
468 Field & out_value,
469 DataTypePtr & out_type)
470{
471 String expr_name = node->getColumnName();
472 const auto & sample_block = key_expr->getSampleBlock();
473 if (!sample_block.has(expr_name))
474 return false;
475
476 bool found_transformation = false;
477 for (const ExpressionAction & a : key_expr->getActions())
478 {
479 /** The key functional expression constraint may be inferred from a plain column in the expression.
480 * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`,
481 * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())`
482 * condition also holds, so the index may be used to select only parts satisfying this condition.
483 *
484 * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the
485 * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`).
486 * Instead, we can qualify only functions that do not transform the range (for example rounding),
487 * which while not strictly monotonic, are monotonic everywhere on the input range.
488 */
489 const auto & action = a.argument_names;
490 if (a.type == ExpressionAction::Type::APPLY_FUNCTION && action.size() == 1 && a.argument_names[0] == expr_name)
491 {
492 if (!a.function_base->hasInformationAboutMonotonicity())
493 return false;
494
495 // Range is irrelevant in this case
496 IFunction::Monotonicity monotonicity = a.function_base->getMonotonicityForRange(*out_type, Field(), Field());
497 if (!monotonicity.is_always_monotonic)
498 return false;
499
500 // Apply the next transformation step
501 DataTypePtr new_type;
502 applyFunction(a.function_base, out_type, out_value, new_type, out_value);
503 if (!new_type)
504 return false;
505
506 out_type.swap(new_type);
507 expr_name = a.result_name;
508
509 // Transformation results in a key expression, accept
510 auto it = key_columns.find(expr_name);
511 if (key_columns.end() != it)
512 {
513 out_key_column_num = it->second;
514 out_key_column_type = sample_block.getByName(it->first).type;
515 found_transformation = true;
516 break;
517 }
518 }
519 }
520
521 return found_transformation;
522}
523
524bool KeyCondition::tryPrepareSetIndex(
525 const ASTs & args,
526 const Context & context,
527 RPNElement & out,
528 size_t & out_key_column_num)
529{
530 const ASTPtr & left_arg = args[0];
531
532 out_key_column_num = 0;
533 std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> indexes_mapping;
534 DataTypes data_types;
535
536 auto get_key_tuple_position_mapping = [&](const ASTPtr & node, size_t tuple_index)
537 {
538 MergeTreeSetIndex::KeyTuplePositionMapping index_mapping;
539 index_mapping.tuple_index = tuple_index;
540 DataTypePtr data_type;
541 if (isKeyPossiblyWrappedByMonotonicFunctions(
542 node, context, index_mapping.key_index, data_type, index_mapping.functions))
543 {
544 indexes_mapping.push_back(index_mapping);
545 data_types.push_back(data_type);
546 if (out_key_column_num < index_mapping.key_index)
547 out_key_column_num = index_mapping.key_index;
548 }
549 };
550
551 size_t left_args_count = 1;
552 const auto * left_arg_tuple = left_arg->as<ASTFunction>();
553 if (left_arg_tuple && left_arg_tuple->name == "tuple")
554 {
555 const auto & tuple_elements = left_arg_tuple->arguments->children;
556 left_args_count = tuple_elements.size();
557 for (size_t i = 0; i < left_args_count; ++i)
558 get_key_tuple_position_mapping(tuple_elements[i], i);
559 }
560 else
561 get_key_tuple_position_mapping(left_arg, 0);
562
563 if (indexes_mapping.empty())
564 return false;
565
566 const ASTPtr & right_arg = args[1];
567
568 PreparedSetKey set_key;
569 if (right_arg->as<ASTSubquery>() || right_arg->as<ASTIdentifier>())
570 set_key = PreparedSetKey::forSubquery(*right_arg);
571 else
572 set_key = PreparedSetKey::forLiteral(*right_arg, data_types);
573
574 auto set_it = prepared_sets.find(set_key);
575 if (set_it == prepared_sets.end())
576 return false;
577
578 const SetPtr & prepared_set = set_it->second;
579
580 /// The index can be prepared if the elements of the set were saved in advance.
581 if (!prepared_set->hasExplicitSetElements())
582 return false;
583
584 prepared_set->checkColumnsNumber(left_args_count);
585 for (size_t i = 0; i < indexes_mapping.size(); ++i)
586 prepared_set->checkTypesEqual(indexes_mapping[i].tuple_index, removeLowCardinality(data_types[i]));
587
588 out.set_index = std::make_shared<MergeTreeSetIndex>(prepared_set->getSetElements(), std::move(indexes_mapping));
589
590 return true;
591}
592
593
594bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
595 const ASTPtr & node,
596 const Context & context,
597 size_t & out_key_column_num,
598 DataTypePtr & out_key_res_column_type,
599 MonotonicFunctionsChain & out_functions_chain)
600{
601 std::vector<const ASTFunction *> chain_not_tested_for_monotonicity;
602 DataTypePtr key_column_type;
603
604 if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_key_column_num, key_column_type, chain_not_tested_for_monotonicity))
605 return false;
606
607 for (auto it = chain_not_tested_for_monotonicity.rbegin(); it != chain_not_tested_for_monotonicity.rend(); ++it)
608 {
609 auto func_builder = FunctionFactory::instance().tryGet((*it)->name, context);
610 ColumnsWithTypeAndName arguments{{ nullptr, key_column_type, "" }};
611 auto func = func_builder->build(arguments);
612
613 if (!func || !func->hasInformationAboutMonotonicity())
614 return false;
615
616 key_column_type = func->getReturnType();
617 out_functions_chain.push_back(func);
618 }
619
620 out_key_res_column_type = key_column_type;
621
622 return true;
623}
624
625bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
626 const ASTPtr & node,
627 size_t & out_key_column_num,
628 DataTypePtr & out_key_column_type,
629 std::vector<const ASTFunction *> & out_functions_chain)
630{
631 /** By itself, the key column can be a functional expression. for example, `intHash32(UserID)`.
632 * Therefore, use the full name of the expression for search.
633 */
634 const auto & sample_block = key_expr->getSampleBlock();
635 String name = node->getColumnName();
636
637 auto it = key_columns.find(name);
638 if (key_columns.end() != it)
639 {
640 out_key_column_num = it->second;
641 out_key_column_type = sample_block.getByName(it->first).type;
642 return true;
643 }
644
645 if (const auto * func = node->as<ASTFunction>())
646 {
647 const auto & args = func->arguments->children;
648 if (args.size() != 1)
649 return false;
650
651 out_functions_chain.push_back(func);
652
653 if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_key_column_num, out_key_column_type, out_functions_chain))
654 return false;
655
656 return true;
657 }
658
659 return false;
660}
661
662
663static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const ASTPtr & node)
664{
665 if (desired_type->equals(*src_type))
666 return;
667
668 try
669 {
670 /// NOTE: We don't need accurate info about src_type at this moment
671 src_value = convertFieldToType(src_value, *desired_type);
672 }
673 catch (...)
674 {
675 throw Exception("Key expression contains comparison between inconvertible types: " +
676 desired_type->getName() + " and " + src_type->getName() +
677 " inside " + queryToString(node),
678 ErrorCodes::BAD_TYPE_OF_FIELD);
679 }
680}
681
682
683bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)
684{
685 /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key,
686 * or itself, wrapped in a chain of possibly-monotonic functions,
687 * or constant expression - number.
688 */
689 Field const_value;
690 DataTypePtr const_type;
691 if (const auto * func = node->as<ASTFunction>())
692 {
693 const ASTs & args = func->arguments->children;
694
695 DataTypePtr key_expr_type; /// Type of expression containing key column
696 size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
697 MonotonicFunctionsChain chain;
698 std::string func_name = func->name;
699
700 if (atom_map.find(func_name) == std::end(atom_map))
701 return false;
702
703 if (args.size() == 1)
704 {
705 if (!(isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)))
706 return false;
707
708 if (key_column_num == static_cast<size_t>(-1))
709 throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
710 }
711 else if (args.size() == 2)
712 {
713 size_t key_arg_pos; /// Position of argument with key column (non-const argument)
714 bool is_set_const = false;
715 bool is_constant_transformed = false;
716
717 if (functionIsInOrGlobalInOperator(func_name)
718 && tryPrepareSetIndex(args, context, out, key_column_num))
719 {
720 key_arg_pos = 0;
721 is_set_const = true;
722 }
723 else if (getConstant(args[1], block_with_constants, const_value, const_type)
724 && isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
725 {
726 key_arg_pos = 0;
727 }
728 else if (getConstant(args[1], block_with_constants, const_value, const_type)
729 && canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type))
730 {
731 key_arg_pos = 0;
732 is_constant_transformed = true;
733 }
734 else if (getConstant(args[0], block_with_constants, const_value, const_type)
735 && isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
736 {
737 key_arg_pos = 1;
738 }
739 else if (getConstant(args[0], block_with_constants, const_value, const_type)
740 && canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type))
741 {
742 key_arg_pos = 1;
743 is_constant_transformed = true;
744 }
745 else
746 return false;
747
748 if (key_column_num == static_cast<size_t>(-1))
749 throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
750
751 /// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5"
752 if (is_constant_transformed)
753 {
754 if (func_name == "less")
755 func_name = "lessOrEquals";
756 else if (func_name == "greater")
757 func_name = "greaterOrEquals";
758 }
759
760 /// Replace <const> <sign> <data> on to <data> <-sign> <const>
761 if (key_arg_pos == 1)
762 {
763 if (func_name == "less")
764 func_name = "greater";
765 else if (func_name == "greater")
766 func_name = "less";
767 else if (func_name == "greaterOrEquals")
768 func_name = "lessOrEquals";
769 else if (func_name == "lessOrEquals")
770 func_name = "greaterOrEquals";
771 else if (func_name == "in" || func_name == "notIn" || func_name == "like")
772 {
773 /// "const IN data_column" doesn't make sense (unlike "data_column IN const")
774 return false;
775 }
776 }
777
778 bool cast_not_needed =
779 is_set_const /// Set args are already casted inside Set::createFromAST
780 || (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast.
781
782 if (!cast_not_needed)
783 castValueToType(key_expr_type, const_value, const_type, node);
784 }
785 else
786 return false;
787
788 const auto atom_it = atom_map.find(func_name);
789
790 out.key_column = key_column_num;
791 out.monotonic_functions_chain = std::move(chain);
792
793 return atom_it->second(out, const_value);
794 }
795 else if (getConstant(node, block_with_constants, const_value, const_type)) /// For cases where it says, for example, `WHERE 0 AND something`
796 {
797 if (const_value.getType() == Field::Types::UInt64
798 || const_value.getType() == Field::Types::Int64
799 || const_value.getType() == Field::Types::Float64)
800 {
801 /// Zero in all types is represented in memory the same way as in UInt64.
802 out.function = const_value.get<UInt64>()
803 ? RPNElement::ALWAYS_TRUE
804 : RPNElement::ALWAYS_FALSE;
805
806 return true;
807 }
808 }
809 return false;
810}
811
812bool KeyCondition::operatorFromAST(const ASTFunction * func, RPNElement & out)
813{
814 /// Functions AND, OR, NOT.
815 /** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
816 * (or, the same thing - calling the function `and` from one argument).
817 */
818 const ASTs & args = func->arguments->children;
819
820 if (func->name == "not")
821 {
822 if (args.size() != 1)
823 return false;
824
825 out.function = RPNElement::FUNCTION_NOT;
826 }
827 else
828 {
829 if (func->name == "and" || func->name == "indexHint")
830 out.function = RPNElement::FUNCTION_AND;
831 else if (func->name == "or")
832 out.function = RPNElement::FUNCTION_OR;
833 else
834 return false;
835 }
836
837 return true;
838}
839
840String KeyCondition::toString() const
841{
842 String res;
843 for (size_t i = 0; i < rpn.size(); ++i)
844 {
845 if (i)
846 res += ", ";
847 res += rpn[i].toString();
848 }
849 return res;
850}
851
852
853/** Index is the value of key every `index_granularity` rows.
854 * This value is called a "mark". That is, the index consists of marks.
855 *
856 * The key is the tuple.
857 * The data is sorted by key in the sense of lexicographic order over tuples.
858 *
859 * A pair of marks specifies a segment with respect to the order over the tuples.
860 * Denote it like this: [ x1 y1 z1 .. x2 y2 z2 ],
861 * where x1 y1 z1 - tuple - value of key in left border of segment;
862 * x2 y2 z2 - tuple - value of key in right boundary of segment.
863 * In this section there are data between these marks.
864 *
865 * Or, the last mark specifies the range open on the right: [ a b c .. + inf )
866 *
867 * The set of all possible tuples can be considered as an n-dimensional space, where n is the size of the tuple.
868 * A range of tuples specifies some subset of this space.
869 *
870 * Parallelograms (you can also find the term "rail")
871 * will be the subrange of an n-dimensional space that is a direct product of one-dimensional ranges.
872 * In this case, the one-dimensional range can be: a period, a segment, an interval, a half-interval, unlimited on the left, unlimited on the right ...
873 *
874 * The range of tuples can always be represented as a combination of parallelograms.
875 * For example, the range [ x1 y1 .. x2 y2 ] given x1 != x2 is equal to the union of the following three parallelograms:
876 * [x1] x [y1 .. +inf)
877 * (x1 .. x2) x (-inf .. +inf)
878 * [x2] x (-inf .. y2]
879 *
880 * Or, for example, the range [ x1 y1 .. +inf ] is equal to the union of the following two parallelograms:
881 * [x1] x [y1 .. +inf)
882 * (x1 .. +inf) x (-inf .. +inf)
883 * It's easy to see that this is a special case of the variant above.
884 *
885 * This is important because it is easy for us to check the feasibility of the condition over the parallelogram,
886 * and therefore, feasibility of condition on the range of tuples will be checked by feasibility of condition
887 * over at least one parallelogram from which this range consists.
888 */
889
890template <typename F>
891static bool forAnyParallelogram(
892 size_t key_size,
893 const Field * key_left,
894 const Field * key_right,
895 bool left_bounded,
896 bool right_bounded,
897 std::vector<Range> & parallelogram,
898 size_t prefix_size,
899 F && callback)
900{
901 if (!left_bounded && !right_bounded)
902 return callback(parallelogram);
903
904 if (left_bounded && right_bounded)
905 {
906 /// Let's go through the matching elements of the key.
907 while (prefix_size < key_size)
908 {
909 if (key_left[prefix_size] == key_right[prefix_size])
910 {
911 /// Point ranges.
912 parallelogram[prefix_size] = Range(key_left[prefix_size]);
913 ++prefix_size;
914 }
915 else
916 break;
917 }
918 }
919
920 if (prefix_size == key_size)
921 return callback(parallelogram);
922
923 if (prefix_size + 1 == key_size)
924 {
925 if (left_bounded && right_bounded)
926 parallelogram[prefix_size] = Range(key_left[prefix_size], true, key_right[prefix_size], true);
927 else if (left_bounded)
928 parallelogram[prefix_size] = Range::createLeftBounded(key_left[prefix_size], true);
929 else if (right_bounded)
930 parallelogram[prefix_size] = Range::createRightBounded(key_right[prefix_size], true);
931
932 return callback(parallelogram);
933 }
934
935 /// (x1 .. x2) x (-inf .. +inf)
936
937 if (left_bounded && right_bounded)
938 parallelogram[prefix_size] = Range(key_left[prefix_size], false, key_right[prefix_size], false);
939 else if (left_bounded)
940 parallelogram[prefix_size] = Range::createLeftBounded(key_left[prefix_size], false);
941 else if (right_bounded)
942 parallelogram[prefix_size] = Range::createRightBounded(key_right[prefix_size], false);
943
944 for (size_t i = prefix_size + 1; i < key_size; ++i)
945 parallelogram[i] = Range();
946
947 if (callback(parallelogram))
948 return true;
949
950 /// [x1] x [y1 .. +inf)
951
952 if (left_bounded)
953 {
954 parallelogram[prefix_size] = Range(key_left[prefix_size]);
955 if (forAnyParallelogram(key_size, key_left, key_right, true, false, parallelogram, prefix_size + 1, callback))
956 return true;
957 }
958
959 /// [x2] x (-inf .. y2]
960
961 if (right_bounded)
962 {
963 parallelogram[prefix_size] = Range(key_right[prefix_size]);
964 if (forAnyParallelogram(key_size, key_left, key_right, false, true, parallelogram, prefix_size + 1, callback))
965 return true;
966 }
967
968 return false;
969}
970
971
972bool KeyCondition::mayBeTrueInRange(
973 size_t used_key_size,
974 const Field * left_key,
975 const Field * right_key,
976 const DataTypes & data_types,
977 bool right_bounded) const
978{
979 std::vector<Range> key_ranges(used_key_size, Range());
980
981/* std::cerr << "Checking for: [";
982 for (size_t i = 0; i != used_key_size; ++i)
983 std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_key[i]);
984 std::cerr << " ... ";
985
986 if (right_bounded)
987 {
988 for (size_t i = 0; i != used_key_size; ++i)
989 std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_key[i]);
990 std::cerr << "]\n";
991 }
992 else
993 std::cerr << "+inf)\n";*/
994
995 return forAnyParallelogram(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0,
996 [&] (const std::vector<Range> & key_ranges_parallelogram)
997 {
998 auto res = mayBeTrueInParallelogram(key_ranges_parallelogram, data_types);
999
1000/* std::cerr << "Parallelogram: ";
1001 for (size_t i = 0, size = key_ranges.size(); i != size; ++i)
1002 std::cerr << (i != 0 ? " x " : "") << key_ranges[i].toString();
1003 std::cerr << ": " << res << "\n";*/
1004
1005 return res;
1006 });
1007}
1008
1009std::optional<Range> KeyCondition::applyMonotonicFunctionsChainToRange(
1010 Range key_range,
1011 MonotonicFunctionsChain & functions,
1012 DataTypePtr current_type
1013)
1014{
1015 for (auto & func : functions)
1016 {
1017 /// We check the monotonicity of each function on a specific range.
1018 IFunction::Monotonicity monotonicity = func->getMonotonicityForRange(
1019 *current_type.get(), key_range.left, key_range.right);
1020
1021 if (!monotonicity.is_monotonic)
1022 {
1023 return {};
1024 }
1025
1026 /// Apply the function.
1027 DataTypePtr new_type;
1028 if (!key_range.left.isNull())
1029 applyFunction(func, current_type, key_range.left, new_type, key_range.left);
1030 if (!key_range.right.isNull())
1031 applyFunction(func, current_type, key_range.right, new_type, key_range.right);
1032
1033 if (!new_type)
1034 {
1035 return {};
1036 }
1037
1038 current_type.swap(new_type);
1039
1040 if (!monotonicity.is_positive)
1041 key_range.swapLeftAndRight();
1042 }
1043 return key_range;
1044}
1045
1046bool KeyCondition::mayBeTrueInParallelogram(const std::vector<Range> & parallelogram, const DataTypes & data_types) const
1047{
1048 std::vector<BoolMask> rpn_stack;
1049 for (size_t i = 0; i < rpn.size(); ++i)
1050 {
1051 const auto & element = rpn[i];
1052 if (element.function == RPNElement::FUNCTION_UNKNOWN)
1053 {
1054 rpn_stack.emplace_back(true, true);
1055 }
1056 else if (element.function == RPNElement::FUNCTION_IN_RANGE
1057 || element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
1058 {
1059 const Range * key_range = &parallelogram[element.key_column];
1060
1061 /// The case when the column is wrapped in a chain of possibly monotonic functions.
1062 Range transformed_range;
1063 if (!element.monotonic_functions_chain.empty())
1064 {
1065 std::optional<Range> new_range = applyMonotonicFunctionsChainToRange(
1066 *key_range,
1067 element.monotonic_functions_chain,
1068 data_types[element.key_column]
1069 );
1070
1071 if (!new_range)
1072 {
1073 rpn_stack.emplace_back(true, true);
1074 continue;
1075 }
1076 transformed_range = *new_range;
1077 key_range = &transformed_range;
1078 }
1079
1080 bool intersects = element.range.intersectsRange(*key_range);
1081 bool contains = element.range.containsRange(*key_range);
1082
1083 rpn_stack.emplace_back(intersects, !contains);
1084 if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
1085 rpn_stack.back() = !rpn_stack.back();
1086 }
1087 else if (
1088 element.function == RPNElement::FUNCTION_IN_SET
1089 || element.function == RPNElement::FUNCTION_NOT_IN_SET)
1090 {
1091 if (!element.set_index)
1092 throw Exception("Set for IN is not created yet", ErrorCodes::LOGICAL_ERROR);
1093
1094 rpn_stack.emplace_back(element.set_index->mayBeTrueInRange(parallelogram, data_types));
1095 if (element.function == RPNElement::FUNCTION_NOT_IN_SET)
1096 rpn_stack.back() = !rpn_stack.back();
1097 }
1098 else if (element.function == RPNElement::FUNCTION_NOT)
1099 {
1100 assert(!rpn_stack.empty());
1101
1102 rpn_stack.back() = !rpn_stack.back();
1103 }
1104 else if (element.function == RPNElement::FUNCTION_AND)
1105 {
1106 assert(!rpn_stack.empty());
1107
1108 auto arg1 = rpn_stack.back();
1109 rpn_stack.pop_back();
1110 auto arg2 = rpn_stack.back();
1111 rpn_stack.back() = arg1 & arg2;
1112 }
1113 else if (element.function == RPNElement::FUNCTION_OR)
1114 {
1115 assert(!rpn_stack.empty());
1116
1117 auto arg1 = rpn_stack.back();
1118 rpn_stack.pop_back();
1119 auto arg2 = rpn_stack.back();
1120 rpn_stack.back() = arg1 | arg2;
1121 }
1122 else if (element.function == RPNElement::ALWAYS_FALSE)
1123 {
1124 rpn_stack.emplace_back(false, true);
1125 }
1126 else if (element.function == RPNElement::ALWAYS_TRUE)
1127 {
1128 rpn_stack.emplace_back(true, false);
1129 }
1130 else
1131 throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
1132 }
1133
1134 if (rpn_stack.size() != 1)
1135 throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInParallelogram", ErrorCodes::LOGICAL_ERROR);
1136
1137 return rpn_stack[0].can_be_true;
1138}
1139
1140
1141bool KeyCondition::mayBeTrueInRange(
1142 size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const
1143{
1144 return mayBeTrueInRange(used_key_size, left_key, right_key, data_types, true);
1145}
1146
1147bool KeyCondition::mayBeTrueAfter(
1148 size_t used_key_size, const Field * left_key, const DataTypes & data_types) const
1149{
1150 return mayBeTrueInRange(used_key_size, left_key, nullptr, data_types, false);
1151}
1152
1153
1154String KeyCondition::RPNElement::toString() const
1155{
1156 auto print_wrapped_column = [this](std::ostringstream & ss)
1157 {
1158 for (auto it = monotonic_functions_chain.rbegin(); it != monotonic_functions_chain.rend(); ++it)
1159 ss << (*it)->getName() << "(";
1160
1161 ss << "column " << key_column;
1162
1163 for (auto it = monotonic_functions_chain.rbegin(); it != monotonic_functions_chain.rend(); ++it)
1164 ss << ")";
1165 };
1166
1167 std::ostringstream ss;
1168 switch (function)
1169 {
1170 case FUNCTION_AND:
1171 return "and";
1172 case FUNCTION_OR:
1173 return "or";
1174 case FUNCTION_NOT:
1175 return "not";
1176 case FUNCTION_UNKNOWN:
1177 return "unknown";
1178 case FUNCTION_NOT_IN_SET:
1179 case FUNCTION_IN_SET:
1180 {
1181 ss << "(";
1182 print_wrapped_column(ss);
1183 ss << (function == FUNCTION_IN_SET ? " in " : " notIn ");
1184 if (!set_index)
1185 ss << "unknown size set";
1186 else
1187 ss << set_index->size() << "-element set";
1188 ss << ")";
1189 return ss.str();
1190 }
1191 case FUNCTION_IN_RANGE:
1192 case FUNCTION_NOT_IN_RANGE:
1193 {
1194 ss << "(";
1195 print_wrapped_column(ss);
1196 ss << (function == FUNCTION_NOT_IN_RANGE ? " not" : "") << " in " << range.toString();
1197 ss << ")";
1198 return ss.str();
1199 }
1200 case ALWAYS_FALSE:
1201 return "false";
1202 case ALWAYS_TRUE:
1203 return "true";
1204 }
1205
1206 __builtin_unreachable();
1207}
1208
1209
1210bool KeyCondition::alwaysUnknownOrTrue() const
1211{
1212 std::vector<UInt8> rpn_stack;
1213
1214 for (const auto & element : rpn)
1215 {
1216 if (element.function == RPNElement::FUNCTION_UNKNOWN
1217 || element.function == RPNElement::ALWAYS_TRUE)
1218 {
1219 rpn_stack.push_back(true);
1220 }
1221 else if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE
1222 || element.function == RPNElement::FUNCTION_IN_RANGE
1223 || element.function == RPNElement::FUNCTION_IN_SET
1224 || element.function == RPNElement::FUNCTION_NOT_IN_SET
1225 || element.function == RPNElement::ALWAYS_FALSE)
1226 {
1227 rpn_stack.push_back(false);
1228 }
1229 else if (element.function == RPNElement::FUNCTION_NOT)
1230 {
1231 }
1232 else if (element.function == RPNElement::FUNCTION_AND)
1233 {
1234 assert(!rpn_stack.empty());
1235
1236 auto arg1 = rpn_stack.back();
1237 rpn_stack.pop_back();
1238 auto arg2 = rpn_stack.back();
1239 rpn_stack.back() = arg1 & arg2;
1240 }
1241 else if (element.function == RPNElement::FUNCTION_OR)
1242 {
1243 assert(!rpn_stack.empty());
1244
1245 auto arg1 = rpn_stack.back();
1246 rpn_stack.pop_back();
1247 auto arg2 = rpn_stack.back();
1248 rpn_stack.back() = arg1 | arg2;
1249 }
1250 else
1251 throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
1252 }
1253
1254 if (rpn_stack.size() != 1)
1255 throw Exception("Unexpected stack size in KeyCondition::alwaysUnknownOrTrue", ErrorCodes::LOGICAL_ERROR);
1256
1257 return rpn_stack[0];
1258}
1259
1260
1261size_t KeyCondition::getMaxKeyColumn() const
1262{
1263 size_t res = 0;
1264 for (const auto & element : rpn)
1265 {
1266 if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE
1267 || element.function == RPNElement::FUNCTION_IN_RANGE
1268 || element.function == RPNElement::FUNCTION_IN_SET
1269 || element.function == RPNElement::FUNCTION_NOT_IN_SET)
1270 {
1271 if (element.key_column > res)
1272 res = element.key_column;
1273 }
1274 }
1275 return res;
1276}
1277
1278}
1279