1#pragma once
2
3#include <Core/Settings.h>
4#include <DataStreams/IBlockStream_fwd.h>
5#include <Interpreters/AggregateDescription.h>
6#include <Interpreters/SyntaxAnalyzer.h>
7#include <Interpreters/SubqueryForSet.h>
8#include <Parsers/IAST_fwd.h>
9#include <Storages/IStorage_fwd.h>
10
11
12namespace DB
13{
14
15class Block;
16class Context;
17
18struct ExpressionActionsChain;
19class ExpressionActions;
20using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
21using ManyExpressionActions = std::vector<ExpressionActionsPtr>;
22
23struct ASTTableJoin;
24class IJoin;
25using JoinPtr = std::shared_ptr<IJoin>;
26
27class ASTFunction;
28class ASTExpressionList;
29class ASTSelectQuery;
30struct ASTTablesInSelectQueryElement;
31
32/// ExpressionAnalyzer sources, intermediates and results. It splits data and logic, allows to test them separately.
33struct ExpressionAnalyzerData
34{
35 SubqueriesForSets subqueries_for_sets;
36 PreparedSets prepared_sets;
37
38 /// Columns after ARRAY JOIN, JOIN, and/or aggregation.
39 NamesAndTypesList aggregated_columns;
40 NamesAndTypesList array_join_columns;
41
42 bool has_aggregation = false;
43 NamesAndTypesList aggregation_keys;
44 AggregateDescriptions aggregate_descriptions;
45
46 bool has_global_subqueries = false;
47
48 /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
49 Tables external_tables;
50
51 /// Actions by every element of ORDER BY
52 ManyExpressionActions order_by_elements_actions;
53};
54
55
56/** Transforms an expression from a syntax tree into a sequence of actions to execute it.
57 *
58 * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer.
59 */
60class ExpressionAnalyzer : protected ExpressionAnalyzerData, private boost::noncopyable
61{
62private:
63 /// Extracts settings to enlight which are used (and avoid copy of others).
64 struct ExtractedSettings
65 {
66 const bool use_index_for_in_with_subqueries;
67 const SizeLimits size_limits_for_set;
68
69 ExtractedSettings(const Settings & settings_)
70 : use_index_for_in_with_subqueries(settings_.use_index_for_in_with_subqueries),
71 size_limits_for_set(settings_.max_rows_in_set, settings_.max_bytes_in_set, settings_.set_overflow_mode)
72 {}
73 };
74
75public:
76 /// Ctor for non-select queries. Generally its usage is:
77 /// auto actions = ExpressionAnalyzer(query, syntax, context).getActions();
78 ExpressionAnalyzer(
79 const ASTPtr & query_,
80 const SyntaxAnalyzerResultPtr & syntax_analyzer_result_,
81 const Context & context_)
82 : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, 0, false)
83 {}
84
85 void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types);
86
87 /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression.
88 /// If add_aliases, only the calculated values in the desired order and add aliases.
89 /// If also project_result, than only aliases remain in the output block.
90 /// Otherwise, only temporary columns will be deleted from the block.
91 ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true);
92
93 /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants.
94 /// Does not execute subqueries.
95 ExpressionActionsPtr getConstActions();
96
97 /** Sets that require a subquery to be create.
98 * Only the sets needed to perform actions returned from already executed `append*` or `getActions`.
99 * That is, you need to call getSetsWithSubqueries after all calls of `append*` or `getActions`
100 * and create all the returned sets before performing the actions.
101 */
102 const SubqueriesForSets & getSubqueriesForSets() const { return subqueries_for_sets; }
103
104 /// Get intermediates for tests
105 const ExpressionAnalyzerData & getAnalyzedData() const { return *this; }
106
107protected:
108 ExpressionAnalyzer(
109 const ASTPtr & query_,
110 const SyntaxAnalyzerResultPtr & syntax_analyzer_result_,
111 const Context & context_,
112 size_t subquery_depth_,
113 bool do_global_);
114
115 ASTPtr query;
116 const Context & context;
117 const ExtractedSettings settings;
118 size_t subquery_depth;
119
120 SyntaxAnalyzerResultPtr syntax;
121
122 const StoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists.
123 const AnalyzedJoin & analyzedJoin() const { return *syntax->analyzed_join; }
124 const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
125 const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
126 NamesAndTypesList sourceWithJoinedColumns() const;
127
128 /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
129 void initGlobalSubqueriesAndExternalTables(bool do_global);
130
131 void addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const;
132
133 void addJoinAction(ExpressionActionsPtr & actions, JoinPtr = {}) const;
134
135 void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false);
136
137 /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
138 * Create a set of columns aggregated_columns resulting after the aggregation, if any,
139 * or after all the actions that are normally performed before aggregation.
140 * Set has_aggregation = true if there is GROUP BY or at least one aggregate function.
141 */
142 void analyzeAggregation();
143 bool makeAggregateDescriptions(ExpressionActionsPtr & actions);
144
145 /// columns - the columns that are present before the transformations begin.
146 void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const;
147
148 const ASTSelectQuery * getSelectQuery() const;
149
150 bool isRemoteStorage() const;
151};
152
153/// SelectQuery specific ExpressionAnalyzer part.
154class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer
155{
156public:
157 SelectQueryExpressionAnalyzer(
158 const ASTPtr & query_,
159 const SyntaxAnalyzerResultPtr & syntax_analyzer_result_,
160 const Context & context_,
161 const NameSet & required_result_columns_ = {},
162 size_t subquery_depth_ = 0,
163 bool do_global_ = false)
164 : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, subquery_depth_, do_global_)
165 , required_result_columns(required_result_columns_)
166 {}
167
168 /// Does the expression have aggregate functions or a GROUP BY or HAVING section.
169 bool hasAggregation() const { return has_aggregation; }
170 bool hasGlobalSubqueries() { return has_global_subqueries; }
171
172 /// Get a list of aggregation keys and descriptions of aggregate functions if the query contains GROUP BY.
173 void getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const;
174
175 const PreparedSets & getPreparedSets() const { return prepared_sets; }
176
177 const ManyExpressionActions & getOrderByActions() const { return order_by_elements_actions; }
178
179 /// Tables that will need to be sent to remote servers for distributed query processing.
180 const Tables & getExternalTables() const { return external_tables; }
181
182 /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query.
183 *
184 * Example usage:
185 * ExpressionActionsChain chain;
186 * analyzer.appendWhere(chain);
187 * chain.addStep();
188 * analyzer.appendSelect(chain);
189 * analyzer.appendOrderBy(chain);
190 * chain.finalize();
191 *
192 * If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way
193 * shouldn't be executed, they are only needed to get a list of columns with their types.
194 */
195
196 /// Before aggregation:
197 bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types);
198 bool appendJoin(ExpressionActionsChain & chain, bool only_types);
199 /// Add preliminary rows filtration. Actions are created in other expression analyzer to prevent any possible alias injection.
200 void appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name);
201 /// remove_filter is set in ExpressionActionsChain::finalize();
202 /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier).
203 bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns);
204 bool appendWhere(ExpressionActionsChain & chain, bool only_types);
205 bool appendGroupBy(ExpressionActionsChain & chain, bool only_types);
206 void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
207
208 /// After aggregation:
209 bool appendHaving(ExpressionActionsChain & chain, bool only_types);
210 void appendSelect(ExpressionActionsChain & chain, bool only_types);
211 bool appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order);
212 bool appendLimitBy(ExpressionActionsChain & chain, bool only_types);
213 /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases.
214 void appendProjectResult(ExpressionActionsChain & chain) const;
215
216 /// Create Set-s that we can from IN section to use the index on them.
217 void makeSetsForIndex(const ASTPtr & node);
218
219private:
220 /// If non-empty, ignore all expressions not from this list.
221 NameSet required_result_columns;
222
223 /**
224 * Create Set from a subquery or a table expression in the query. The created set is suitable for using the index.
225 * The set will not be created if its size hits the limit.
226 */
227 void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name);
228
229 /**
230 * Checks if subquery is not a plain StorageSet.
231 * Because while making set we will read data from StorageSet which is not allowed.
232 * Returns valid SetPtr from StorageSet if the latter is used after IN or nullptr otherwise.
233 */
234 SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_of_table_name);
235
236 JoinPtr makeTableJoin(const ASTTablesInSelectQueryElement & join_element);
237 void makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element, NamesWithAliases && required_columns_with_aliases,
238 SubqueryForSet & subquery_for_set) const;
239
240 const ASTSelectQuery * getAggregatingQuery() const;
241};
242
243}
244