1#pragma once
2
3#include <memory>
4#include <unordered_map>
5#include <set>
6#include <boost/noncopyable.hpp>
7#include <Core/Block.h>
8#include <Storages/SelectQueryInfo.h>
9
10
11namespace Poco { class Logger; }
12
13namespace DB
14{
15
16class ASTSelectQuery;
17class ASTFunction;
18class MergeTreeData;
19
20/** Identifies WHERE expressions that can be placed in PREWHERE by calculating respective
21 * sizes of columns used in particular expression and identifying "good" conditions of
22 * form "column_name = constant", where "constant" is outside some `threshold` specified in advance.
23 *
24 * If there are "good" conditions present in WHERE, the one with minimal summary column size is transferred to PREWHERE.
25 * Otherwise any condition with minimal summary column size can be transferred to PREWHERE.
26 */
27class MergeTreeWhereOptimizer : private boost::noncopyable
28{
29public:
30 MergeTreeWhereOptimizer(
31 SelectQueryInfo & query_info,
32 const Context & context,
33 const MergeTreeData & data,
34 const Names & queried_column_names_,
35 Poco::Logger * log_);
36
37private:
38 void optimize(ASTSelectQuery & select) const;
39
40 struct Condition
41 {
42 ASTPtr node;
43 UInt64 columns_size = 0;
44 NameSet identifiers;
45 bool viable = false;
46 bool good = false;
47
48 auto tuple() const
49 {
50 return std::make_tuple(!viable, !good, columns_size);
51 }
52
53 /// Is condition a better candidate for moving to PREWHERE?
54 bool operator< (const Condition & rhs) const
55 {
56 return tuple() < rhs.tuple();
57 }
58 };
59
60 using Conditions = std::list<Condition>;
61
62 void analyzeImpl(Conditions & res, const ASTPtr & node) const;
63
64 /// Transform conjunctions chain in WHERE expression to Conditions list.
65 Conditions analyze(const ASTPtr & expression) const;
66
67 /// Transform Conditions list to WHERE or PREWHERE expression.
68 ASTPtr reconstruct(const Conditions & conditions) const;
69
70 void calculateColumnSizes(const MergeTreeData & data, const Names & column_names);
71
72 void optimizeConjunction(ASTSelectQuery & select, ASTFunction * const fun) const;
73
74 void optimizeArbitrary(ASTSelectQuery & select) const;
75
76 UInt64 getIdentifiersColumnSize(const NameSet & identifiers) const;
77
78 bool isConditionGood(const ASTPtr & condition) const;
79
80 bool hasPrimaryKeyAtoms(const ASTPtr & ast) const;
81
82 bool isPrimaryKeyAtom(const ASTPtr & ast) const;
83
84 bool isConstant(const ASTPtr & expr) const;
85
86 bool isSubsetOfTableColumns(const NameSet & identifiers) const;
87
88 /** ARRAY JOIN'ed columns as well as arrayJoin() result cannot be used in PREWHERE, therefore expressions
89 * containing said columns should not be moved to PREWHERE at all.
90 * We assume all AS aliases have been expanded prior to using this class
91 *
92 * Also, disallow moving expressions with GLOBAL [NOT] IN.
93 */
94 bool cannotBeMoved(const ASTPtr & ptr) const;
95
96 void determineArrayJoinedNames(ASTSelectQuery & select);
97
98 using StringSet = std::unordered_set<std::string>;
99
100 String first_primary_key_column;
101 const StringSet table_columns;
102 const Names queried_columns;
103 const Block block_with_constants;
104 Poco::Logger * log;
105 std::unordered_map<std::string, UInt64> column_sizes;
106 UInt64 total_size_of_queried_columns = 0;
107 NameSet array_joined_names;
108};
109
110
111}
112