| 1 | #pragma once |
| 2 | |
| 3 | #include <sstream> |
| 4 | #include <optional> |
| 5 | |
| 6 | #include <Interpreters/Context.h> |
| 7 | #include <Interpreters/ExpressionActions.h> |
| 8 | #include <Interpreters/Set.h> |
| 9 | #include <Core/SortDescription.h> |
| 10 | #include <Parsers/ASTExpressionList.h> |
| 11 | #include <Parsers/ASTSelectQuery.h> |
| 12 | #include <Parsers/ASTFunction.h> |
| 13 | #include <Storages/SelectQueryInfo.h> |
| 14 | |
| 15 | |
| 16 | namespace DB |
| 17 | { |
| 18 | |
| 19 | class IFunction; |
| 20 | using FunctionBasePtr = std::shared_ptr<IFunctionBase>; |
| 21 | |
| 22 | /** Range with open or closed ends; possibly unbounded. |
| 23 | */ |
| 24 | struct Range |
| 25 | { |
| 26 | private: |
| 27 | static bool equals(const Field & lhs, const Field & rhs); |
| 28 | static bool less(const Field & lhs, const Field & rhs); |
| 29 | |
| 30 | public: |
| 31 | Field left; /// the left border, if any |
| 32 | Field right; /// the right border, if any |
| 33 | bool left_bounded = false; /// bounded at the left |
| 34 | bool right_bounded = false; /// bounded at the right |
| 35 | bool left_included = false; /// includes the left border, if any |
| 36 | bool right_included = false; /// includes the right border, if any |
| 37 | |
| 38 | /// The whole unversum. |
| 39 | Range() {} |
| 40 | |
| 41 | /// One point. |
| 42 | Range(const Field & point) |
| 43 | : left(point), right(point), left_bounded(true), right_bounded(true), left_included(true), right_included(true) {} |
| 44 | |
| 45 | /// A bounded two-sided range. |
| 46 | Range(const Field & left_, bool left_included_, const Field & right_, bool right_included_) |
| 47 | : left(left_), right(right_), |
| 48 | left_bounded(true), right_bounded(true), |
| 49 | left_included(left_included_), right_included(right_included_) |
| 50 | { |
| 51 | shrinkToIncludedIfPossible(); |
| 52 | } |
| 53 | |
| 54 | static Range createRightBounded(const Field & right_point, bool right_included) |
| 55 | { |
| 56 | Range r; |
| 57 | r.right = right_point; |
| 58 | r.right_bounded = true; |
| 59 | r.right_included = right_included; |
| 60 | r.shrinkToIncludedIfPossible(); |
| 61 | return r; |
| 62 | } |
| 63 | |
| 64 | static Range createLeftBounded(const Field & left_point, bool left_included) |
| 65 | { |
| 66 | Range r; |
| 67 | r.left = left_point; |
| 68 | r.left_bounded = true; |
| 69 | r.left_included = left_included; |
| 70 | r.shrinkToIncludedIfPossible(); |
| 71 | return r; |
| 72 | } |
| 73 | |
| 74 | /** Optimize the range. If it has an open boundary and the Field type is "loose" |
| 75 | * - then convert it to closed, narrowing by one. |
| 76 | * That is, for example, turn (0,2) into [1]. |
| 77 | */ |
| 78 | void shrinkToIncludedIfPossible() |
| 79 | { |
| 80 | if (left_bounded && !left_included) |
| 81 | { |
| 82 | if (left.getType() == Field::Types::UInt64 && left.get<UInt64>() != std::numeric_limits<UInt64>::max()) |
| 83 | { |
| 84 | ++left.get<UInt64 &>(); |
| 85 | left_included = true; |
| 86 | } |
| 87 | if (left.getType() == Field::Types::Int64 && left.get<Int64>() != std::numeric_limits<Int64>::max()) |
| 88 | { |
| 89 | ++left.get<Int64 &>(); |
| 90 | left_included = true; |
| 91 | } |
| 92 | } |
| 93 | if (right_bounded && !right_included) |
| 94 | { |
| 95 | if (right.getType() == Field::Types::UInt64 && right.get<UInt64>() != std::numeric_limits<UInt64>::min()) |
| 96 | { |
| 97 | --right.get<UInt64 &>(); |
| 98 | right_included = true; |
| 99 | } |
| 100 | if (right.getType() == Field::Types::Int64 && right.get<Int64>() != std::numeric_limits<Int64>::min()) |
| 101 | { |
| 102 | --right.get<Int64 &>(); |
| 103 | right_included = true; |
| 104 | } |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | bool empty() const |
| 109 | { |
| 110 | return left_bounded && right_bounded |
| 111 | && (less(right, left) |
| 112 | || ((!left_included || !right_included) && !less(left, right))); |
| 113 | } |
| 114 | |
| 115 | /// x contained in the range |
| 116 | bool contains(const Field & x) const |
| 117 | { |
| 118 | return !leftThan(x) && !rightThan(x); |
| 119 | } |
| 120 | |
| 121 | /// x is to the left |
| 122 | bool rightThan(const Field & x) const |
| 123 | { |
| 124 | return (left_bounded |
| 125 | ? !(less(left, x) || (left_included && equals(x, left))) |
| 126 | : false); |
| 127 | } |
| 128 | |
| 129 | /// x is to the right |
| 130 | bool leftThan(const Field & x) const |
| 131 | { |
| 132 | return (right_bounded |
| 133 | ? !(less(x, right) || (right_included && equals(x, right))) |
| 134 | : false); |
| 135 | } |
| 136 | |
| 137 | bool intersectsRange(const Range & r) const |
| 138 | { |
| 139 | /// r to the left of me. |
| 140 | if (r.right_bounded |
| 141 | && left_bounded |
| 142 | && (less(r.right, left) |
| 143 | || ((!left_included || !r.right_included) |
| 144 | && equals(r.right, left)))) |
| 145 | return false; |
| 146 | |
| 147 | /// r to the right of me. |
| 148 | if (r.left_bounded |
| 149 | && right_bounded |
| 150 | && (less(right, r.left) /// ...} {... |
| 151 | || ((!right_included || !r.left_included) /// ...) [... or ...] (... |
| 152 | && equals(r.left, right)))) |
| 153 | return false; |
| 154 | |
| 155 | return true; |
| 156 | } |
| 157 | |
| 158 | bool containsRange(const Range & r) const |
| 159 | { |
| 160 | /// r starts to the left of me. |
| 161 | if (left_bounded |
| 162 | && (!r.left_bounded |
| 163 | || less(r.left, left) |
| 164 | || (r.left_included |
| 165 | && !left_included |
| 166 | && equals(r.left, left)))) |
| 167 | return false; |
| 168 | |
| 169 | /// r ends right of me. |
| 170 | if (right_bounded |
| 171 | && (!r.right_bounded |
| 172 | || less(right, r.right) |
| 173 | || (r.right_included |
| 174 | && !right_included |
| 175 | && equals(r.right, right)))) |
| 176 | return false; |
| 177 | |
| 178 | return true; |
| 179 | } |
| 180 | |
| 181 | void swapLeftAndRight() |
| 182 | { |
| 183 | std::swap(left, right); |
| 184 | std::swap(left_bounded, right_bounded); |
| 185 | std::swap(left_included, right_included); |
| 186 | } |
| 187 | |
| 188 | String toString() const; |
| 189 | }; |
| 190 | |
| 191 | |
| 192 | /// Class that extends arbitrary objects with infinities, like +-inf for floats |
| 193 | class FieldWithInfinity |
| 194 | { |
| 195 | public: |
| 196 | enum Type |
| 197 | { |
| 198 | MINUS_INFINITY = -1, |
| 199 | NORMAL = 0, |
| 200 | PLUS_INFINITY = 1 |
| 201 | }; |
| 202 | |
| 203 | explicit FieldWithInfinity(const Field & field_); |
| 204 | FieldWithInfinity(Field && field_); |
| 205 | |
| 206 | static FieldWithInfinity getMinusInfinity(); |
| 207 | static FieldWithInfinity getPlusinfinity(); |
| 208 | |
| 209 | bool operator<(const FieldWithInfinity & other) const; |
| 210 | bool operator==(const FieldWithInfinity & other) const; |
| 211 | |
| 212 | private: |
| 213 | Field field; |
| 214 | Type type; |
| 215 | |
| 216 | FieldWithInfinity(const Type type_); |
| 217 | }; |
| 218 | |
| 219 | |
| 220 | /** Condition on the index. |
| 221 | * |
| 222 | * Consists of the conditions for the key belonging to all possible ranges or sets, |
| 223 | * as well as logical operators AND/OR/NOT above these conditions. |
| 224 | * |
| 225 | * Constructs a reverse polish notation from these conditions |
| 226 | * and can calculate (interpret) its satisfiability over key ranges. |
| 227 | */ |
| 228 | class KeyCondition |
| 229 | { |
| 230 | public: |
| 231 | /// Does not take into account the SAMPLE section. all_columns - the set of all columns of the table. |
| 232 | KeyCondition( |
| 233 | const SelectQueryInfo & query_info, |
| 234 | const Context & context, |
| 235 | const Names & key_column_names, |
| 236 | const ExpressionActionsPtr & key_expr); |
| 237 | |
| 238 | /// Whether the condition is feasible in the key range. |
| 239 | /// left_key and right_key must contain all fields in the sort_descr in the appropriate order. |
| 240 | /// data_types - the types of the key columns. |
| 241 | bool mayBeTrueInRange(size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const; |
| 242 | |
| 243 | /// Whether the condition is feasible in the direct product of single column ranges specified by `parallelogram`. |
| 244 | bool mayBeTrueInParallelogram(const std::vector<Range> & parallelogram, const DataTypes & data_types) const; |
| 245 | |
| 246 | /// Is the condition valid in a semi-infinite (not limited to the right) key range. |
| 247 | /// left_key must contain all the fields in the sort_descr in the appropriate order. |
| 248 | bool mayBeTrueAfter(size_t used_key_size, const Field * left_key, const DataTypes & data_types) const; |
| 249 | |
| 250 | /// Checks that the index can not be used. |
| 251 | bool alwaysUnknownOrTrue() const; |
| 252 | |
| 253 | /// Get the maximum number of the key element used in the condition. |
| 254 | size_t getMaxKeyColumn() const; |
| 255 | |
| 256 | /// Impose an additional condition: the value in the column `column` must be in the range `range`. |
| 257 | /// Returns whether there is such a column in the key. |
| 258 | bool addCondition(const String & column, const Range & range); |
| 259 | |
| 260 | String toString() const; |
| 261 | |
| 262 | |
| 263 | /** A chain of possibly monotone functions. |
| 264 | * If the key column is wrapped in functions that can be monotonous in some value ranges |
| 265 | * (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate. |
| 266 | */ |
| 267 | using MonotonicFunctionsChain = std::vector<FunctionBasePtr>; |
| 268 | |
| 269 | /** Computes value of constant expression and its data type. |
| 270 | * Returns false, if expression isn't constant. |
| 271 | */ |
| 272 | static bool getConstant( |
| 273 | const ASTPtr & expr, Block & block_with_constants, Field & out_value, DataTypePtr & out_type); |
| 274 | |
| 275 | static Block getBlockWithConstants( |
| 276 | const ASTPtr & query, const SyntaxAnalyzerResultPtr & syntax_analyzer_result, const Context & context); |
| 277 | |
| 278 | static std::optional<Range> applyMonotonicFunctionsChainToRange( |
| 279 | Range key_range, |
| 280 | MonotonicFunctionsChain & functions, |
| 281 | DataTypePtr current_type); |
| 282 | |
| 283 | private: |
| 284 | /// The expression is stored as Reverse Polish Notation. |
| 285 | struct RPNElement |
| 286 | { |
| 287 | enum Function |
| 288 | { |
| 289 | /// Atoms of a Boolean expression. |
| 290 | FUNCTION_IN_RANGE, |
| 291 | FUNCTION_NOT_IN_RANGE, |
| 292 | FUNCTION_IN_SET, |
| 293 | FUNCTION_NOT_IN_SET, |
| 294 | FUNCTION_UNKNOWN, /// Can take any value. |
| 295 | /// Operators of the logical expression. |
| 296 | FUNCTION_NOT, |
| 297 | FUNCTION_AND, |
| 298 | FUNCTION_OR, |
| 299 | /// Constants |
| 300 | ALWAYS_FALSE, |
| 301 | ALWAYS_TRUE, |
| 302 | }; |
| 303 | |
| 304 | RPNElement() {} |
| 305 | RPNElement(Function function_) : function(function_) {} |
| 306 | RPNElement(Function function_, size_t key_column_) : function(function_), key_column(key_column_) {} |
| 307 | RPNElement(Function function_, size_t key_column_, const Range & range_) |
| 308 | : function(function_), range(range_), key_column(key_column_) {} |
| 309 | |
| 310 | String toString() const; |
| 311 | |
| 312 | Function function = FUNCTION_UNKNOWN; |
| 313 | |
| 314 | /// For FUNCTION_IN_RANGE and FUNCTION_NOT_IN_RANGE. |
| 315 | Range range; |
| 316 | size_t key_column = 0; |
| 317 | /// For FUNCTION_IN_SET, FUNCTION_NOT_IN_SET |
| 318 | using MergeTreeSetIndexPtr = std::shared_ptr<MergeTreeSetIndex>; |
| 319 | MergeTreeSetIndexPtr set_index; |
| 320 | |
| 321 | mutable MonotonicFunctionsChain monotonic_functions_chain; /// The function execution does not violate the constancy. |
| 322 | }; |
| 323 | |
| 324 | using RPN = std::vector<RPNElement>; |
| 325 | using ColumnIndices = std::map<String, size_t>; |
| 326 | |
| 327 | using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value)>; |
| 328 | |
| 329 | public: |
| 330 | static const AtomMap atom_map; |
| 331 | |
| 332 | private: |
| 333 | bool mayBeTrueInRange( |
| 334 | size_t used_key_size, |
| 335 | const Field * left_key, |
| 336 | const Field * right_key, |
| 337 | const DataTypes & data_types, |
| 338 | bool right_bounded) const; |
| 339 | |
| 340 | void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants); |
| 341 | bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out); |
| 342 | bool operatorFromAST(const ASTFunction * func, RPNElement & out); |
| 343 | |
| 344 | /** Is node the key column |
| 345 | * or expression in which column of key is wrapped by chain of functions, |
| 346 | * that can be monotonic on certain ranges? |
| 347 | * If these conditions are true, then returns number of column in key, type of resulting expression |
| 348 | * and fills chain of possibly-monotonic functions. |
| 349 | */ |
| 350 | bool isKeyPossiblyWrappedByMonotonicFunctions( |
| 351 | const ASTPtr & node, |
| 352 | const Context & context, |
| 353 | size_t & out_key_column_num, |
| 354 | DataTypePtr & out_key_res_column_type, |
| 355 | MonotonicFunctionsChain & out_functions_chain); |
| 356 | |
| 357 | bool isKeyPossiblyWrappedByMonotonicFunctionsImpl( |
| 358 | const ASTPtr & node, |
| 359 | size_t & out_key_column_num, |
| 360 | DataTypePtr & out_key_column_type, |
| 361 | std::vector<const ASTFunction *> & out_functions_chain); |
| 362 | |
| 363 | bool canConstantBeWrappedByMonotonicFunctions( |
| 364 | const ASTPtr & node, |
| 365 | size_t & out_key_column_num, |
| 366 | DataTypePtr & out_key_column_type, |
| 367 | Field & out_value, |
| 368 | DataTypePtr & out_type); |
| 369 | |
| 370 | /// If it's possible to make an RPNElement |
| 371 | /// that will filter values (possibly tuples) by the content of 'prepared_set', |
| 372 | /// do it and return true. |
| 373 | bool tryPrepareSetIndex( |
| 374 | const ASTs & args, |
| 375 | const Context & context, |
| 376 | RPNElement & out, |
| 377 | size_t & out_key_column_num); |
| 378 | |
| 379 | RPN rpn; |
| 380 | |
| 381 | ColumnIndices key_columns; |
| 382 | ExpressionActionsPtr key_expr; |
| 383 | PreparedSets prepared_sets; |
| 384 | }; |
| 385 | |
| 386 | } |
| 387 | |