1 | #pragma once |
2 | |
3 | #include <Core/Settings.h> |
4 | #include <DataStreams/IBlockStream_fwd.h> |
5 | #include <Interpreters/AggregateDescription.h> |
6 | #include <Interpreters/SyntaxAnalyzer.h> |
7 | #include <Interpreters/SubqueryForSet.h> |
8 | #include <Parsers/IAST_fwd.h> |
9 | #include <Storages/IStorage_fwd.h> |
10 | |
11 | |
12 | namespace DB |
13 | { |
14 | |
15 | class Block; |
16 | class Context; |
17 | |
18 | struct ExpressionActionsChain; |
19 | class ExpressionActions; |
20 | using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; |
21 | using ManyExpressionActions = std::vector<ExpressionActionsPtr>; |
22 | |
23 | struct ASTTableJoin; |
24 | class IJoin; |
25 | using JoinPtr = std::shared_ptr<IJoin>; |
26 | |
27 | class ASTFunction; |
28 | class ASTExpressionList; |
29 | class ASTSelectQuery; |
30 | struct ASTTablesInSelectQueryElement; |
31 | |
32 | /// ExpressionAnalyzer sources, intermediates and results. It splits data and logic, allows to test them separately. |
33 | struct ExpressionAnalyzerData |
34 | { |
35 | SubqueriesForSets subqueries_for_sets; |
36 | PreparedSets prepared_sets; |
37 | |
38 | /// Columns after ARRAY JOIN, JOIN, and/or aggregation. |
39 | NamesAndTypesList aggregated_columns; |
40 | NamesAndTypesList array_join_columns; |
41 | |
42 | bool has_aggregation = false; |
43 | NamesAndTypesList aggregation_keys; |
44 | AggregateDescriptions aggregate_descriptions; |
45 | |
46 | bool has_global_subqueries = false; |
47 | |
48 | /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries. |
49 | Tables external_tables; |
50 | |
51 | /// Actions by every element of ORDER BY |
52 | ManyExpressionActions order_by_elements_actions; |
53 | }; |
54 | |
55 | |
56 | /** Transforms an expression from a syntax tree into a sequence of actions to execute it. |
57 | * |
58 | * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer. |
59 | */ |
60 | class ExpressionAnalyzer : protected ExpressionAnalyzerData, private boost::noncopyable |
61 | { |
62 | private: |
63 | /// Extracts settings to enlight which are used (and avoid copy of others). |
64 | struct |
65 | { |
66 | const bool ; |
67 | const SizeLimits ; |
68 | |
69 | (const Settings & settings_) |
70 | : use_index_for_in_with_subqueries(settings_.use_index_for_in_with_subqueries), |
71 | size_limits_for_set(settings_.max_rows_in_set, settings_.max_bytes_in_set, settings_.set_overflow_mode) |
72 | {} |
73 | }; |
74 | |
75 | public: |
76 | /// Ctor for non-select queries. Generally its usage is: |
77 | /// auto actions = ExpressionAnalyzer(query, syntax, context).getActions(); |
78 | ExpressionAnalyzer( |
79 | const ASTPtr & query_, |
80 | const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, |
81 | const Context & context_) |
82 | : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, 0, false) |
83 | {} |
84 | |
85 | void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types); |
86 | |
87 | /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression. |
88 | /// If add_aliases, only the calculated values in the desired order and add aliases. |
89 | /// If also project_result, than only aliases remain in the output block. |
90 | /// Otherwise, only temporary columns will be deleted from the block. |
91 | ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true); |
92 | |
93 | /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants. |
94 | /// Does not execute subqueries. |
95 | ExpressionActionsPtr getConstActions(); |
96 | |
97 | /** Sets that require a subquery to be create. |
98 | * Only the sets needed to perform actions returned from already executed `append*` or `getActions`. |
99 | * That is, you need to call getSetsWithSubqueries after all calls of `append*` or `getActions` |
100 | * and create all the returned sets before performing the actions. |
101 | */ |
102 | const SubqueriesForSets & getSubqueriesForSets() const { return subqueries_for_sets; } |
103 | |
104 | /// Get intermediates for tests |
105 | const ExpressionAnalyzerData & getAnalyzedData() const { return *this; } |
106 | |
107 | protected: |
108 | ExpressionAnalyzer( |
109 | const ASTPtr & query_, |
110 | const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, |
111 | const Context & context_, |
112 | size_t subquery_depth_, |
113 | bool do_global_); |
114 | |
115 | ASTPtr query; |
116 | const Context & context; |
117 | const ExtractedSettings settings; |
118 | size_t subquery_depth; |
119 | |
120 | SyntaxAnalyzerResultPtr syntax; |
121 | |
122 | const StoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists. |
123 | const AnalyzedJoin & analyzedJoin() const { return *syntax->analyzed_join; } |
124 | const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; } |
125 | const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; } |
126 | NamesAndTypesList sourceWithJoinedColumns() const; |
127 | |
128 | /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. |
129 | void initGlobalSubqueriesAndExternalTables(bool do_global); |
130 | |
131 | void addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const; |
132 | |
133 | void addJoinAction(ExpressionActionsPtr & actions, JoinPtr = {}) const; |
134 | |
135 | void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false); |
136 | |
137 | /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, |
138 | * Create a set of columns aggregated_columns resulting after the aggregation, if any, |
139 | * or after all the actions that are normally performed before aggregation. |
140 | * Set has_aggregation = true if there is GROUP BY or at least one aggregate function. |
141 | */ |
142 | void analyzeAggregation(); |
143 | bool makeAggregateDescriptions(ExpressionActionsPtr & actions); |
144 | |
145 | /// columns - the columns that are present before the transformations begin. |
146 | void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const; |
147 | |
148 | const ASTSelectQuery * getSelectQuery() const; |
149 | |
150 | bool isRemoteStorage() const; |
151 | }; |
152 | |
153 | /// SelectQuery specific ExpressionAnalyzer part. |
154 | class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer |
155 | { |
156 | public: |
157 | SelectQueryExpressionAnalyzer( |
158 | const ASTPtr & query_, |
159 | const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, |
160 | const Context & context_, |
161 | const NameSet & required_result_columns_ = {}, |
162 | size_t subquery_depth_ = 0, |
163 | bool do_global_ = false) |
164 | : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, subquery_depth_, do_global_) |
165 | , required_result_columns(required_result_columns_) |
166 | {} |
167 | |
168 | /// Does the expression have aggregate functions or a GROUP BY or HAVING section. |
169 | bool hasAggregation() const { return has_aggregation; } |
170 | bool hasGlobalSubqueries() { return has_global_subqueries; } |
171 | |
172 | /// Get a list of aggregation keys and descriptions of aggregate functions if the query contains GROUP BY. |
173 | void getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const; |
174 | |
175 | const PreparedSets & getPreparedSets() const { return prepared_sets; } |
176 | |
177 | const ManyExpressionActions & getOrderByActions() const { return order_by_elements_actions; } |
178 | |
179 | /// Tables that will need to be sent to remote servers for distributed query processing. |
180 | const Tables & getExternalTables() const { return external_tables; } |
181 | |
182 | /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query. |
183 | * |
184 | * Example usage: |
185 | * ExpressionActionsChain chain; |
186 | * analyzer.appendWhere(chain); |
187 | * chain.addStep(); |
188 | * analyzer.appendSelect(chain); |
189 | * analyzer.appendOrderBy(chain); |
190 | * chain.finalize(); |
191 | * |
192 | * If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way |
193 | * shouldn't be executed, they are only needed to get a list of columns with their types. |
194 | */ |
195 | |
196 | /// Before aggregation: |
197 | bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types); |
198 | bool appendJoin(ExpressionActionsChain & chain, bool only_types); |
199 | /// Add preliminary rows filtration. Actions are created in other expression analyzer to prevent any possible alias injection. |
200 | void appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name); |
201 | /// remove_filter is set in ExpressionActionsChain::finalize(); |
202 | /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). |
203 | bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns); |
204 | bool appendWhere(ExpressionActionsChain & chain, bool only_types); |
205 | bool appendGroupBy(ExpressionActionsChain & chain, bool only_types); |
206 | void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); |
207 | |
208 | /// After aggregation: |
209 | bool appendHaving(ExpressionActionsChain & chain, bool only_types); |
210 | void appendSelect(ExpressionActionsChain & chain, bool only_types); |
211 | bool appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order); |
212 | bool appendLimitBy(ExpressionActionsChain & chain, bool only_types); |
213 | /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. |
214 | void appendProjectResult(ExpressionActionsChain & chain) const; |
215 | |
216 | /// Create Set-s that we can from IN section to use the index on them. |
217 | void makeSetsForIndex(const ASTPtr & node); |
218 | |
219 | private: |
220 | /// If non-empty, ignore all expressions not from this list. |
221 | NameSet required_result_columns; |
222 | |
223 | /** |
224 | * Create Set from a subquery or a table expression in the query. The created set is suitable for using the index. |
225 | * The set will not be created if its size hits the limit. |
226 | */ |
227 | void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name); |
228 | |
229 | /** |
230 | * Checks if subquery is not a plain StorageSet. |
231 | * Because while making set we will read data from StorageSet which is not allowed. |
232 | * Returns valid SetPtr from StorageSet if the latter is used after IN or nullptr otherwise. |
233 | */ |
234 | SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_of_table_name); |
235 | |
236 | JoinPtr makeTableJoin(const ASTTablesInSelectQueryElement & join_element); |
237 | void makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element, NamesWithAliases && required_columns_with_aliases, |
238 | SubqueryForSet & subquery_for_set) const; |
239 | |
240 | const ASTSelectQuery * getAggregatingQuery() const; |
241 | }; |
242 | |
243 | } |
244 | |