1 | #pragma once |
2 | |
3 | #include <memory> |
4 | #include <unordered_map> |
5 | #include <set> |
6 | #include <boost/noncopyable.hpp> |
7 | #include <Core/Block.h> |
8 | #include <Storages/SelectQueryInfo.h> |
9 | |
10 | |
11 | namespace Poco { class Logger; } |
12 | |
13 | namespace DB |
14 | { |
15 | |
16 | class ASTSelectQuery; |
17 | class ASTFunction; |
18 | class MergeTreeData; |
19 | |
20 | /** Identifies WHERE expressions that can be placed in PREWHERE by calculating respective |
21 | * sizes of columns used in particular expression and identifying "good" conditions of |
22 | * form "column_name = constant", where "constant" is outside some `threshold` specified in advance. |
23 | * |
24 | * If there are "good" conditions present in WHERE, the one with minimal summary column size is transferred to PREWHERE. |
25 | * Otherwise any condition with minimal summary column size can be transferred to PREWHERE. |
26 | */ |
27 | class MergeTreeWhereOptimizer : private boost::noncopyable |
28 | { |
29 | public: |
30 | MergeTreeWhereOptimizer( |
31 | SelectQueryInfo & query_info, |
32 | const Context & context, |
33 | const MergeTreeData & data, |
34 | const Names & queried_column_names_, |
35 | Poco::Logger * log_); |
36 | |
37 | private: |
38 | void optimize(ASTSelectQuery & select) const; |
39 | |
40 | struct Condition |
41 | { |
42 | ASTPtr node; |
43 | UInt64 columns_size = 0; |
44 | NameSet identifiers; |
45 | bool viable = false; |
46 | bool good = false; |
47 | |
48 | auto tuple() const |
49 | { |
50 | return std::make_tuple(!viable, !good, columns_size); |
51 | } |
52 | |
53 | /// Is condition a better candidate for moving to PREWHERE? |
54 | bool operator< (const Condition & rhs) const |
55 | { |
56 | return tuple() < rhs.tuple(); |
57 | } |
58 | }; |
59 | |
60 | using Conditions = std::list<Condition>; |
61 | |
62 | void analyzeImpl(Conditions & res, const ASTPtr & node) const; |
63 | |
64 | /// Transform conjunctions chain in WHERE expression to Conditions list. |
65 | Conditions analyze(const ASTPtr & expression) const; |
66 | |
67 | /// Transform Conditions list to WHERE or PREWHERE expression. |
68 | ASTPtr reconstruct(const Conditions & conditions) const; |
69 | |
70 | void calculateColumnSizes(const MergeTreeData & data, const Names & column_names); |
71 | |
72 | void optimizeConjunction(ASTSelectQuery & select, ASTFunction * const fun) const; |
73 | |
74 | void optimizeArbitrary(ASTSelectQuery & select) const; |
75 | |
76 | UInt64 getIdentifiersColumnSize(const NameSet & identifiers) const; |
77 | |
78 | bool isConditionGood(const ASTPtr & condition) const; |
79 | |
80 | bool hasPrimaryKeyAtoms(const ASTPtr & ast) const; |
81 | |
82 | bool isPrimaryKeyAtom(const ASTPtr & ast) const; |
83 | |
84 | bool isConstant(const ASTPtr & expr) const; |
85 | |
86 | bool isSubsetOfTableColumns(const NameSet & identifiers) const; |
87 | |
88 | /** ARRAY JOIN'ed columns as well as arrayJoin() result cannot be used in PREWHERE, therefore expressions |
89 | * containing said columns should not be moved to PREWHERE at all. |
90 | * We assume all AS aliases have been expanded prior to using this class |
91 | * |
92 | * Also, disallow moving expressions with GLOBAL [NOT] IN. |
93 | */ |
94 | bool cannotBeMoved(const ASTPtr & ptr) const; |
95 | |
96 | void determineArrayJoinedNames(ASTSelectQuery & select); |
97 | |
98 | using StringSet = std::unordered_set<std::string>; |
99 | |
100 | String first_primary_key_column; |
101 | const StringSet table_columns; |
102 | const Names queried_columns; |
103 | const Block block_with_constants; |
104 | Poco::Logger * log; |
105 | std::unordered_map<std::string, UInt64> column_sizes; |
106 | UInt64 total_size_of_queried_columns = 0; |
107 | NameSet array_joined_names; |
108 | }; |
109 | |
110 | |
111 | } |
112 | |