| 1 | #pragma once |
| 2 | |
| 3 | #include <Parsers/IAST.h> |
| 4 | |
| 5 | #include <string> |
| 6 | #include <vector> |
| 7 | #include <map> |
| 8 | #include <unordered_map> |
| 9 | #include <unordered_set> |
| 10 | |
| 11 | |
| 12 | namespace DB |
| 13 | { |
| 14 | |
| 15 | struct Settings; |
| 16 | class ASTFunction; |
| 17 | class ASTSelectQuery; |
| 18 | |
| 19 | |
| 20 | /** This class provides functions for optimizing boolean expressions within queries. |
| 21 | * |
| 22 | * For simplicity, we call a homogeneous OR-chain any expression having the following structure: |
| 23 | * expr = x1 OR ... OR expr = xN |
| 24 | * where `expr` is an arbitrary expression and x1, ..., xN are literals of the same type |
| 25 | */ |
| 26 | class LogicalExpressionsOptimizer final |
| 27 | { |
| 28 | struct |
| 29 | { |
| 30 | const UInt64 ; |
| 31 | |
| 32 | (UInt64 optimize_min_equality_disjunction_chain_length_) |
| 33 | : optimize_min_equality_disjunction_chain_length(optimize_min_equality_disjunction_chain_length_) |
| 34 | {} |
| 35 | }; |
| 36 | |
| 37 | public: |
| 38 | /// Constructor. Accepts the root of the query DAG. |
| 39 | LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length); |
| 40 | |
| 41 | /** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN |
| 42 | * on the expressions `expr` IN (x1, ..., xN). |
| 43 | */ |
| 44 | void perform(); |
| 45 | |
| 46 | LogicalExpressionsOptimizer(const LogicalExpressionsOptimizer &) = delete; |
| 47 | LogicalExpressionsOptimizer & operator=(const LogicalExpressionsOptimizer &) = delete; |
| 48 | |
| 49 | private: |
| 50 | /** The OR function with the expression. |
| 51 | */ |
| 52 | struct OrWithExpression |
| 53 | { |
| 54 | OrWithExpression(const ASTFunction * or_function_, const IAST::Hash & expression_, const std::string & alias_); |
| 55 | bool operator<(const OrWithExpression & rhs) const; |
| 56 | |
| 57 | const ASTFunction * or_function; |
| 58 | const IAST::Hash expression; |
| 59 | const std::string alias; |
| 60 | }; |
| 61 | |
| 62 | struct Equalities |
| 63 | { |
| 64 | std::vector<ASTFunction *> functions; |
| 65 | bool is_processed = false; |
| 66 | }; |
| 67 | |
| 68 | using DisjunctiveEqualityChainsMap = std::map<OrWithExpression, Equalities>; |
| 69 | using DisjunctiveEqualityChain = DisjunctiveEqualityChainsMap::value_type; |
| 70 | |
| 71 | private: |
| 72 | /** Collect information about all the equations in the OR chains (not necessarily homogeneous). |
| 73 | * This information is grouped by the expression that is on the left side of the equation. |
| 74 | */ |
| 75 | void collectDisjunctiveEqualityChains(); |
| 76 | |
| 77 | /** Check that the set of equalities expr = x1, ..., expr = xN fulfills the following two requirements: |
| 78 | * 1. It's not too small |
| 79 | * 2. x1, ... xN have the same type |
| 80 | */ |
| 81 | bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const; |
| 82 | |
| 83 | /// Insert the IN expression into the OR chain. |
| 84 | void addInExpression(const DisjunctiveEqualityChain & chain); |
| 85 | |
| 86 | /// Delete the equalities that were replaced by the IN expressions. |
| 87 | void cleanupOrExpressions(); |
| 88 | |
| 89 | /// Delete OR expressions that have only one operand. |
| 90 | void fixBrokenOrExpressions(); |
| 91 | |
| 92 | /// Restore the original column order after optimization. |
| 93 | void reorderColumns(); |
| 94 | |
| 95 | private: |
| 96 | using ParentNodes = std::vector<IAST *>; |
| 97 | using FunctionParentMap = std::unordered_map<const IAST *, ParentNodes>; |
| 98 | using ColumnToPosition = std::unordered_map<const IAST *, size_t>; |
| 99 | |
| 100 | private: |
| 101 | ASTSelectQuery * select_query; |
| 102 | const ExtractedSettings settings; |
| 103 | /// Information about the OR-chains inside the query. |
| 104 | DisjunctiveEqualityChainsMap disjunctive_equality_chains_map; |
| 105 | /// Number of processed OR-chains. |
| 106 | size_t processed_count = 0; |
| 107 | /// Parents of OR functions. |
| 108 | FunctionParentMap or_parent_map; |
| 109 | /// The position of each column. |
| 110 | ColumnToPosition column_to_position; |
| 111 | /// Set of nodes, that was visited. |
| 112 | std::unordered_set<void *> visited_nodes; |
| 113 | }; |
| 114 | |
| 115 | } |
| 116 | |