1 | #pragma once |
2 | |
3 | #include <Parsers/IAST.h> |
4 | |
5 | #include <string> |
6 | #include <vector> |
7 | #include <map> |
8 | #include <unordered_map> |
9 | #include <unordered_set> |
10 | |
11 | |
12 | namespace DB |
13 | { |
14 | |
15 | struct Settings; |
16 | class ASTFunction; |
17 | class ASTSelectQuery; |
18 | |
19 | |
20 | /** This class provides functions for optimizing boolean expressions within queries. |
21 | * |
22 | * For simplicity, we call a homogeneous OR-chain any expression having the following structure: |
23 | * expr = x1 OR ... OR expr = xN |
24 | * where `expr` is an arbitrary expression and x1, ..., xN are literals of the same type |
25 | */ |
26 | class LogicalExpressionsOptimizer final |
27 | { |
28 | struct |
29 | { |
30 | const UInt64 ; |
31 | |
32 | (UInt64 optimize_min_equality_disjunction_chain_length_) |
33 | : optimize_min_equality_disjunction_chain_length(optimize_min_equality_disjunction_chain_length_) |
34 | {} |
35 | }; |
36 | |
37 | public: |
38 | /// Constructor. Accepts the root of the query DAG. |
39 | LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length); |
40 | |
41 | /** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN |
42 | * on the expressions `expr` IN (x1, ..., xN). |
43 | */ |
44 | void perform(); |
45 | |
46 | LogicalExpressionsOptimizer(const LogicalExpressionsOptimizer &) = delete; |
47 | LogicalExpressionsOptimizer & operator=(const LogicalExpressionsOptimizer &) = delete; |
48 | |
49 | private: |
50 | /** The OR function with the expression. |
51 | */ |
52 | struct OrWithExpression |
53 | { |
54 | OrWithExpression(const ASTFunction * or_function_, const IAST::Hash & expression_, const std::string & alias_); |
55 | bool operator<(const OrWithExpression & rhs) const; |
56 | |
57 | const ASTFunction * or_function; |
58 | const IAST::Hash expression; |
59 | const std::string alias; |
60 | }; |
61 | |
62 | struct Equalities |
63 | { |
64 | std::vector<ASTFunction *> functions; |
65 | bool is_processed = false; |
66 | }; |
67 | |
68 | using DisjunctiveEqualityChainsMap = std::map<OrWithExpression, Equalities>; |
69 | using DisjunctiveEqualityChain = DisjunctiveEqualityChainsMap::value_type; |
70 | |
71 | private: |
72 | /** Collect information about all the equations in the OR chains (not necessarily homogeneous). |
73 | * This information is grouped by the expression that is on the left side of the equation. |
74 | */ |
75 | void collectDisjunctiveEqualityChains(); |
76 | |
77 | /** Check that the set of equalities expr = x1, ..., expr = xN fulfills the following two requirements: |
78 | * 1. It's not too small |
79 | * 2. x1, ... xN have the same type |
80 | */ |
81 | bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const; |
82 | |
83 | /// Insert the IN expression into the OR chain. |
84 | void addInExpression(const DisjunctiveEqualityChain & chain); |
85 | |
86 | /// Delete the equalities that were replaced by the IN expressions. |
87 | void cleanupOrExpressions(); |
88 | |
89 | /// Delete OR expressions that have only one operand. |
90 | void fixBrokenOrExpressions(); |
91 | |
92 | /// Restore the original column order after optimization. |
93 | void reorderColumns(); |
94 | |
95 | private: |
96 | using ParentNodes = std::vector<IAST *>; |
97 | using FunctionParentMap = std::unordered_map<const IAST *, ParentNodes>; |
98 | using ColumnToPosition = std::unordered_map<const IAST *, size_t>; |
99 | |
100 | private: |
101 | ASTSelectQuery * select_query; |
102 | const ExtractedSettings settings; |
103 | /// Information about the OR-chains inside the query. |
104 | DisjunctiveEqualityChainsMap disjunctive_equality_chains_map; |
105 | /// Number of processed OR-chains. |
106 | size_t processed_count = 0; |
107 | /// Parents of OR functions. |
108 | FunctionParentMap or_parent_map; |
109 | /// The position of each column. |
110 | ColumnToPosition column_to_position; |
111 | /// Set of nodes, that was visited. |
112 | std::unordered_set<void *> visited_nodes; |
113 | }; |
114 | |
115 | } |
116 | |