1#pragma once
2
3#include <Parsers/IAST.h>
4
5#include <string>
6#include <vector>
7#include <map>
8#include <unordered_map>
9#include <unordered_set>
10
11
12namespace DB
13{
14
15struct Settings;
16class ASTFunction;
17class ASTSelectQuery;
18
19
20/** This class provides functions for optimizing boolean expressions within queries.
21 *
22 * For simplicity, we call a homogeneous OR-chain any expression having the following structure:
23 * expr = x1 OR ... OR expr = xN
24 * where `expr` is an arbitrary expression and x1, ..., xN are literals of the same type
25 */
26class LogicalExpressionsOptimizer final
27{
28 struct ExtractedSettings
29 {
30 const UInt64 optimize_min_equality_disjunction_chain_length;
31
32 ExtractedSettings(UInt64 optimize_min_equality_disjunction_chain_length_)
33 : optimize_min_equality_disjunction_chain_length(optimize_min_equality_disjunction_chain_length_)
34 {}
35 };
36
37public:
38 /// Constructor. Accepts the root of the query DAG.
39 LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length);
40
41 /** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN
42 * on the expressions `expr` IN (x1, ..., xN).
43 */
44 void perform();
45
46 LogicalExpressionsOptimizer(const LogicalExpressionsOptimizer &) = delete;
47 LogicalExpressionsOptimizer & operator=(const LogicalExpressionsOptimizer &) = delete;
48
49private:
50 /** The OR function with the expression.
51 */
52 struct OrWithExpression
53 {
54 OrWithExpression(const ASTFunction * or_function_, const IAST::Hash & expression_, const std::string & alias_);
55 bool operator<(const OrWithExpression & rhs) const;
56
57 const ASTFunction * or_function;
58 const IAST::Hash expression;
59 const std::string alias;
60 };
61
62 struct Equalities
63 {
64 std::vector<ASTFunction *> functions;
65 bool is_processed = false;
66 };
67
68 using DisjunctiveEqualityChainsMap = std::map<OrWithExpression, Equalities>;
69 using DisjunctiveEqualityChain = DisjunctiveEqualityChainsMap::value_type;
70
71private:
72 /** Collect information about all the equations in the OR chains (not necessarily homogeneous).
73 * This information is grouped by the expression that is on the left side of the equation.
74 */
75 void collectDisjunctiveEqualityChains();
76
77 /** Check that the set of equalities expr = x1, ..., expr = xN fulfills the following two requirements:
78 * 1. It's not too small
79 * 2. x1, ... xN have the same type
80 */
81 bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const;
82
83 /// Insert the IN expression into the OR chain.
84 void addInExpression(const DisjunctiveEqualityChain & chain);
85
86 /// Delete the equalities that were replaced by the IN expressions.
87 void cleanupOrExpressions();
88
89 /// Delete OR expressions that have only one operand.
90 void fixBrokenOrExpressions();
91
92 /// Restore the original column order after optimization.
93 void reorderColumns();
94
95private:
96 using ParentNodes = std::vector<IAST *>;
97 using FunctionParentMap = std::unordered_map<const IAST *, ParentNodes>;
98 using ColumnToPosition = std::unordered_map<const IAST *, size_t>;
99
100private:
101 ASTSelectQuery * select_query;
102 const ExtractedSettings settings;
103 /// Information about the OR-chains inside the query.
104 DisjunctiveEqualityChainsMap disjunctive_equality_chains_map;
105 /// Number of processed OR-chains.
106 size_t processed_count = 0;
107 /// Parents of OR functions.
108 FunctionParentMap or_parent_map;
109 /// The position of each column.
110 ColumnToPosition column_to_position;
111 /// Set of nodes, that was visited.
112 std::unordered_set<void *> visited_nodes;
113};
114
115}
116