1#pragma once
2
3#include <Core/Block.h>
4#include <Core/ColumnWithTypeAndName.h>
5#include <Core/Names.h>
6#include <Core/Settings.h>
7#include <Interpreters/Context.h>
8#include <Common/SipHash.h>
9#include "config_core.h"
10#include <unordered_map>
11#include <unordered_set>
12#include <Parsers/ASTTablesInSelectQuery.h>
13
14
15namespace DB
16{
17
18namespace ErrorCodes
19{
20 extern const int LOGICAL_ERROR;
21}
22
23class AnalyzedJoin;
24class IJoin;
25using JoinPtr = std::shared_ptr<IJoin>;
26
27class IExecutableFunction;
28using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>;
29
30class IFunctionBase;
31using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
32
33class IFunctionOverloadResolver;
34using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
35
36class IDataType;
37using DataTypePtr = std::shared_ptr<const IDataType>;
38
39class ExpressionActions;
40
41/** Action on the block.
42 */
43struct ExpressionAction
44{
45private:
46 using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
47public:
48 enum Type
49 {
50 ADD_COLUMN,
51 REMOVE_COLUMN,
52 COPY_COLUMN,
53
54 APPLY_FUNCTION,
55
56 /** Replaces the specified columns with arrays into columns with elements.
57 * Duplicates the values in the remaining columns by the number of elements in the arrays.
58 * Arrays must be parallel (have the same lengths).
59 */
60 ARRAY_JOIN,
61
62 JOIN,
63
64 /// Reorder and rename the columns, delete the extra ones. The same column names are allowed in the result.
65 PROJECT,
66 /// Add columns with alias names. This columns are the same as non-aliased. PROJECT columns if you need to modify them.
67 ADD_ALIASES,
68 };
69
70 Type type{};
71
72 /// For ADD/REMOVE/COPY_COLUMN.
73 std::string source_name;
74 std::string result_name;
75 DataTypePtr result_type;
76
77 /// If COPY_COLUMN can replace the result column.
78 bool can_replace = false;
79
80 /// For ADD_COLUMN.
81 ColumnPtr added_column;
82
83 /// For APPLY_FUNCTION and LEFT ARRAY JOIN.
84 /// OverloadResolver is used before action was added to ExpressionActions (when we don't know types of arguments).
85 FunctionOverloadResolverPtr function_builder;
86
87 /// For unaligned [LEFT] ARRAY JOIN
88 FunctionOverloadResolverPtr function_length;
89 FunctionOverloadResolverPtr function_greatest;
90 FunctionOverloadResolverPtr function_arrayResize;
91
92 /// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity.
93 FunctionBasePtr function_base;
94 /// Prepared function which is used in function execution.
95 ExecutableFunctionPtr function;
96 Names argument_names;
97 bool is_function_compiled = false;
98
99 /// For ARRAY_JOIN
100 NameSet array_joined_columns;
101 bool array_join_is_left = false;
102 bool unaligned_array_join = false;
103
104 /// For JOIN
105 std::shared_ptr<const AnalyzedJoin> table_join;
106 JoinPtr join;
107
108 /// For PROJECT.
109 NamesWithAliases projection;
110
111 /// If result_name_ == "", as name "function_name(arguments separated by commas) is used".
112 static ExpressionAction applyFunction(
113 const FunctionOverloadResolverPtr & function_, const std::vector<std::string> & argument_names_, std::string result_name_ = "");
114
115 static ExpressionAction addColumn(const ColumnWithTypeAndName & added_column_);
116 static ExpressionAction removeColumn(const std::string & removed_name);
117 static ExpressionAction copyColumn(const std::string & from_name, const std::string & to_name, bool can_replace = false);
118 static ExpressionAction project(const NamesWithAliases & projected_columns_);
119 static ExpressionAction project(const Names & projected_columns_);
120 static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_);
121 static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context);
122 static ExpressionAction ordinaryJoin(std::shared_ptr<AnalyzedJoin> table_join, JoinPtr join);
123
124 /// Which columns necessary to perform this action.
125 Names getNeededColumns() const;
126
127 std::string toString() const;
128
129 bool operator==(const ExpressionAction & other) const;
130
131 struct ActionHash
132 {
133 UInt128 operator()(const ExpressionAction & action) const;
134 };
135
136private:
137 friend class ExpressionActions;
138
139 void prepare(Block & sample_block, const Settings & settings, NameSet & names_not_for_constant_folding);
140 void execute(Block & block, bool dry_run) const;
141 void executeOnTotals(Block & block) const;
142};
143
144
145/** Contains a sequence of actions on the block.
146 */
147class ExpressionActions
148{
149public:
150 using Actions = std::vector<ExpressionAction>;
151
152 ExpressionActions(const NamesAndTypesList & input_columns_, const Context & context_)
153 : input_columns(input_columns_), settings(context_.getSettingsRef())
154 {
155 for (const auto & input_elem : input_columns)
156 sample_block.insert(ColumnWithTypeAndName(nullptr, input_elem.type, input_elem.name));
157
158#if USE_EMBEDDED_COMPILER
159 compilation_cache = context_.getCompiledExpressionCache();
160#endif
161 }
162
163 /// For constant columns the columns themselves can be contained in `input_columns_`.
164 ExpressionActions(const ColumnsWithTypeAndName & input_columns_, const Context & context_)
165 : settings(context_.getSettingsRef())
166 {
167 for (const auto & input_elem : input_columns_)
168 {
169 input_columns.emplace_back(input_elem.name, input_elem.type);
170 sample_block.insert(input_elem);
171 }
172#if USE_EMBEDDED_COMPILER
173 compilation_cache = context_.getCompiledExpressionCache();
174#endif
175 }
176
177 /// Add the input column.
178 /// The name of the column must not match the names of the intermediate columns that occur when evaluating the expression.
179 /// The expression must not have any PROJECT actions.
180 void addInput(const ColumnWithTypeAndName & column);
181 void addInput(const NameAndTypePair & column);
182
183 void add(const ExpressionAction & action);
184
185 /// Adds new column names to out_new_columns (formed as a result of the added action).
186 void add(const ExpressionAction & action, Names & out_new_columns);
187
188 /// Adds to the beginning the removal of all extra columns.
189 void prependProjectInput();
190
191 /// Add the specified ARRAY JOIN action to the beginning. Change the appropriate input types to arrays.
192 /// If there are unknown columns in the ARRAY JOIN list, take their types from sample_block, and immediately after ARRAY JOIN remove them.
193 void prependArrayJoin(const ExpressionAction & action, const Block & sample_block_before);
194
195 /// If the last action is ARRAY JOIN, and it does not affect the columns from required_columns, discard and return it.
196 /// Change the corresponding output types to arrays.
197 bool popUnusedArrayJoin(const Names & required_columns, ExpressionAction & out_action);
198
199 /// - Adds actions to delete all but the specified columns.
200 /// - Removes unused input columns.
201 /// - Can somehow optimize the expression.
202 /// - Does not reorder the columns.
203 /// - Does not remove "unexpected" columns (for example, added by functions).
204 /// - If output_columns is empty, leaves one arbitrary column (so that the number of rows in the block is not lost).
205 void finalize(const Names & output_columns);
206
207 const Actions & getActions() const { return actions; }
208
209 /// Get a list of input columns.
210 Names getRequiredColumns() const
211 {
212 Names names;
213 for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it)
214 names.push_back(it->name);
215 return names;
216 }
217
218 const NamesAndTypesList & getRequiredColumnsWithTypes() const { return input_columns; }
219
220 /// Execute the expression on the block. The block must contain all the columns returned by getRequiredColumns.
221 void execute(Block & block, bool dry_run = false) const;
222
223 /// Check if joined subquery has totals.
224 bool hasTotalsInJoin() const;
225
226 /** Execute the expression on the block of total values.
227 * Almost the same as `execute`. The difference is only when JOIN is executed.
228 */
229 void executeOnTotals(Block & block) const;
230
231 /// Obtain a sample block that contains the names and types of result columns.
232 const Block & getSampleBlock() const { return sample_block; }
233
234 std::string dumpActions() const;
235
236 static std::string getSmallestColumn(const NamesAndTypesList & columns);
237
238 JoinPtr getTableJoinAlgo() const;
239
240 const Settings & getSettings() const { return settings; }
241
242 /// Check if result block has no rows. True if it's definite, false if we can't say for sure.
243 /// Call it only after subqueries for join were executed.
244 bool resultIsAlwaysEmpty() const;
245
246 /// Check if column is always zero. True if it's definite, false if we can't say for sure.
247 /// Call it only after subqueries for sets were executed.
248 bool checkColumnIsAlwaysFalse(const String & column_name) const;
249
250 struct ActionsHash
251 {
252 UInt128 operator()(const ExpressionActions::Actions & elems) const
253 {
254 SipHash hash;
255 for (const ExpressionAction & act : elems)
256 hash.update(ExpressionAction::ActionHash{}(act));
257 UInt128 result;
258 hash.get128(result.low, result.high);
259 return result;
260 }
261 };
262
263private:
264 /// These columns have to be in input blocks (arguments of execute* methods)
265 NamesAndTypesList input_columns;
266 /// These actions will be executed on input blocks
267 Actions actions;
268 /// The example of result (output) block.
269 Block sample_block;
270 /// Columns which can't be used for constant folding.
271 NameSet names_not_for_constant_folding;
272
273 Settings settings;
274#if USE_EMBEDDED_COMPILER
275 std::shared_ptr<CompiledExpressionCache> compilation_cache;
276#endif
277
278 void checkLimits(Block & block) const;
279
280 void addImpl(ExpressionAction action, Names & new_names);
281
282 /// Move all arrayJoin as close as possible to the end.
283 void optimizeArrayJoin();
284};
285
286using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
287
288
289/** The sequence of transformations over the block.
290 * It is assumed that the result of each step is fed to the input of the next step.
291 * Used to execute parts of the query individually.
292 *
293 * For example, you can create a chain of two steps:
294 * 1) evaluate the expression in the WHERE clause,
295 * 2) calculate the expression in the SELECT section,
296 * and between the two steps do the filtering by value in the WHERE clause.
297 */
298struct ExpressionActionsChain
299{
300 ExpressionActionsChain(const Context & context_)
301 : context(context_) {}
302 struct Step
303 {
304 ExpressionActionsPtr actions;
305 /// Columns were added to the block before current step in addition to prev step output.
306 NameSet additional_input;
307 /// Columns which are required in the result of current step.
308 Names required_output;
309 /// True if column from required_output is needed only for current step and not used in next actions
310 /// (and can be removed from block). Example: filter column for where actions.
311 /// If not empty, has the same size with required_output; is filled in finalize().
312 std::vector<bool> can_remove_required_output;
313
314 Step(const ExpressionActionsPtr & actions_ = nullptr, const Names & required_output_ = Names())
315 : actions(actions_), required_output(required_output_) {}
316 };
317
318 using Steps = std::vector<Step>;
319
320 const Context & context;
321 Steps steps;
322
323 void addStep();
324
325 void finalize();
326
327 void clear()
328 {
329 steps.clear();
330 }
331
332 ExpressionActionsPtr getLastActions()
333 {
334 if (steps.empty())
335 throw Exception("Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR);
336
337 return steps.back().actions;
338 }
339
340 Step & getLastStep()
341 {
342 if (steps.empty())
343 throw Exception("Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR);
344
345 return steps.back();
346 }
347
348 std::string dumpChain();
349};
350
351}
352