1 | #pragma once |
2 | |
3 | #include <Core/Block.h> |
4 | #include <Core/ColumnWithTypeAndName.h> |
5 | #include <Core/Names.h> |
6 | #include <Core/Settings.h> |
7 | #include <Interpreters/Context.h> |
8 | #include <Common/SipHash.h> |
9 | #include "config_core.h" |
10 | #include <unordered_map> |
11 | #include <unordered_set> |
12 | #include <Parsers/ASTTablesInSelectQuery.h> |
13 | |
14 | |
15 | namespace DB |
16 | { |
17 | |
18 | namespace ErrorCodes |
19 | { |
20 | extern const int LOGICAL_ERROR; |
21 | } |
22 | |
23 | class AnalyzedJoin; |
24 | class IJoin; |
25 | using JoinPtr = std::shared_ptr<IJoin>; |
26 | |
27 | class IExecutableFunction; |
28 | using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>; |
29 | |
30 | class IFunctionBase; |
31 | using FunctionBasePtr = std::shared_ptr<IFunctionBase>; |
32 | |
33 | class IFunctionOverloadResolver; |
34 | using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>; |
35 | |
36 | class IDataType; |
37 | using DataTypePtr = std::shared_ptr<const IDataType>; |
38 | |
39 | class ExpressionActions; |
40 | |
41 | /** Action on the block. |
42 | */ |
43 | struct ExpressionAction |
44 | { |
45 | private: |
46 | using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; |
47 | public: |
48 | enum Type |
49 | { |
50 | ADD_COLUMN, |
51 | REMOVE_COLUMN, |
52 | COPY_COLUMN, |
53 | |
54 | APPLY_FUNCTION, |
55 | |
56 | /** Replaces the specified columns with arrays into columns with elements. |
57 | * Duplicates the values in the remaining columns by the number of elements in the arrays. |
58 | * Arrays must be parallel (have the same lengths). |
59 | */ |
60 | ARRAY_JOIN, |
61 | |
62 | JOIN, |
63 | |
64 | /// Reorder and rename the columns, delete the extra ones. The same column names are allowed in the result. |
65 | PROJECT, |
66 | /// Add columns with alias names. This columns are the same as non-aliased. PROJECT columns if you need to modify them. |
67 | ADD_ALIASES, |
68 | }; |
69 | |
70 | Type type{}; |
71 | |
72 | /// For ADD/REMOVE/COPY_COLUMN. |
73 | std::string source_name; |
74 | std::string result_name; |
75 | DataTypePtr result_type; |
76 | |
77 | /// If COPY_COLUMN can replace the result column. |
78 | bool can_replace = false; |
79 | |
80 | /// For ADD_COLUMN. |
81 | ColumnPtr added_column; |
82 | |
83 | /// For APPLY_FUNCTION and LEFT ARRAY JOIN. |
84 | /// OverloadResolver is used before action was added to ExpressionActions (when we don't know types of arguments). |
85 | FunctionOverloadResolverPtr function_builder; |
86 | |
87 | /// For unaligned [LEFT] ARRAY JOIN |
88 | FunctionOverloadResolverPtr function_length; |
89 | FunctionOverloadResolverPtr function_greatest; |
90 | FunctionOverloadResolverPtr function_arrayResize; |
91 | |
92 | /// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity. |
93 | FunctionBasePtr function_base; |
94 | /// Prepared function which is used in function execution. |
95 | ExecutableFunctionPtr function; |
96 | Names argument_names; |
97 | bool is_function_compiled = false; |
98 | |
99 | /// For ARRAY_JOIN |
100 | NameSet array_joined_columns; |
101 | bool array_join_is_left = false; |
102 | bool unaligned_array_join = false; |
103 | |
104 | /// For JOIN |
105 | std::shared_ptr<const AnalyzedJoin> table_join; |
106 | JoinPtr join; |
107 | |
108 | /// For PROJECT. |
109 | NamesWithAliases projection; |
110 | |
111 | /// If result_name_ == "", as name "function_name(arguments separated by commas) is used". |
112 | static ExpressionAction applyFunction( |
113 | const FunctionOverloadResolverPtr & function_, const std::vector<std::string> & argument_names_, std::string result_name_ = "" ); |
114 | |
115 | static ExpressionAction addColumn(const ColumnWithTypeAndName & added_column_); |
116 | static ExpressionAction removeColumn(const std::string & removed_name); |
117 | static ExpressionAction copyColumn(const std::string & from_name, const std::string & to_name, bool can_replace = false); |
118 | static ExpressionAction project(const NamesWithAliases & projected_columns_); |
119 | static ExpressionAction project(const Names & projected_columns_); |
120 | static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_); |
121 | static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); |
122 | static ExpressionAction ordinaryJoin(std::shared_ptr<AnalyzedJoin> table_join, JoinPtr join); |
123 | |
124 | /// Which columns necessary to perform this action. |
125 | Names getNeededColumns() const; |
126 | |
127 | std::string toString() const; |
128 | |
129 | bool operator==(const ExpressionAction & other) const; |
130 | |
131 | struct ActionHash |
132 | { |
133 | UInt128 operator()(const ExpressionAction & action) const; |
134 | }; |
135 | |
136 | private: |
137 | friend class ExpressionActions; |
138 | |
139 | void prepare(Block & sample_block, const Settings & settings, NameSet & names_not_for_constant_folding); |
140 | void execute(Block & block, bool dry_run) const; |
141 | void executeOnTotals(Block & block) const; |
142 | }; |
143 | |
144 | |
145 | /** Contains a sequence of actions on the block. |
146 | */ |
147 | class ExpressionActions |
148 | { |
149 | public: |
150 | using Actions = std::vector<ExpressionAction>; |
151 | |
152 | ExpressionActions(const NamesAndTypesList & input_columns_, const Context & context_) |
153 | : input_columns(input_columns_), settings(context_.getSettingsRef()) |
154 | { |
155 | for (const auto & input_elem : input_columns) |
156 | sample_block.insert(ColumnWithTypeAndName(nullptr, input_elem.type, input_elem.name)); |
157 | |
158 | #if USE_EMBEDDED_COMPILER |
159 | compilation_cache = context_.getCompiledExpressionCache(); |
160 | #endif |
161 | } |
162 | |
163 | /// For constant columns the columns themselves can be contained in `input_columns_`. |
164 | ExpressionActions(const ColumnsWithTypeAndName & input_columns_, const Context & context_) |
165 | : settings(context_.getSettingsRef()) |
166 | { |
167 | for (const auto & input_elem : input_columns_) |
168 | { |
169 | input_columns.emplace_back(input_elem.name, input_elem.type); |
170 | sample_block.insert(input_elem); |
171 | } |
172 | #if USE_EMBEDDED_COMPILER |
173 | compilation_cache = context_.getCompiledExpressionCache(); |
174 | #endif |
175 | } |
176 | |
177 | /// Add the input column. |
178 | /// The name of the column must not match the names of the intermediate columns that occur when evaluating the expression. |
179 | /// The expression must not have any PROJECT actions. |
180 | void addInput(const ColumnWithTypeAndName & column); |
181 | void addInput(const NameAndTypePair & column); |
182 | |
183 | void add(const ExpressionAction & action); |
184 | |
185 | /// Adds new column names to out_new_columns (formed as a result of the added action). |
186 | void add(const ExpressionAction & action, Names & out_new_columns); |
187 | |
188 | /// Adds to the beginning the removal of all extra columns. |
189 | void prependProjectInput(); |
190 | |
191 | /// Add the specified ARRAY JOIN action to the beginning. Change the appropriate input types to arrays. |
192 | /// If there are unknown columns in the ARRAY JOIN list, take their types from sample_block, and immediately after ARRAY JOIN remove them. |
193 | void prependArrayJoin(const ExpressionAction & action, const Block & sample_block_before); |
194 | |
195 | /// If the last action is ARRAY JOIN, and it does not affect the columns from required_columns, discard and return it. |
196 | /// Change the corresponding output types to arrays. |
197 | bool popUnusedArrayJoin(const Names & required_columns, ExpressionAction & out_action); |
198 | |
199 | /// - Adds actions to delete all but the specified columns. |
200 | /// - Removes unused input columns. |
201 | /// - Can somehow optimize the expression. |
202 | /// - Does not reorder the columns. |
203 | /// - Does not remove "unexpected" columns (for example, added by functions). |
204 | /// - If output_columns is empty, leaves one arbitrary column (so that the number of rows in the block is not lost). |
205 | void finalize(const Names & output_columns); |
206 | |
207 | const Actions & getActions() const { return actions; } |
208 | |
209 | /// Get a list of input columns. |
210 | Names getRequiredColumns() const |
211 | { |
212 | Names names; |
213 | for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it) |
214 | names.push_back(it->name); |
215 | return names; |
216 | } |
217 | |
218 | const NamesAndTypesList & getRequiredColumnsWithTypes() const { return input_columns; } |
219 | |
220 | /// Execute the expression on the block. The block must contain all the columns returned by getRequiredColumns. |
221 | void execute(Block & block, bool dry_run = false) const; |
222 | |
223 | /// Check if joined subquery has totals. |
224 | bool hasTotalsInJoin() const; |
225 | |
226 | /** Execute the expression on the block of total values. |
227 | * Almost the same as `execute`. The difference is only when JOIN is executed. |
228 | */ |
229 | void executeOnTotals(Block & block) const; |
230 | |
231 | /// Obtain a sample block that contains the names and types of result columns. |
232 | const Block & getSampleBlock() const { return sample_block; } |
233 | |
234 | std::string dumpActions() const; |
235 | |
236 | static std::string getSmallestColumn(const NamesAndTypesList & columns); |
237 | |
238 | JoinPtr getTableJoinAlgo() const; |
239 | |
240 | const Settings & getSettings() const { return settings; } |
241 | |
242 | /// Check if result block has no rows. True if it's definite, false if we can't say for sure. |
243 | /// Call it only after subqueries for join were executed. |
244 | bool resultIsAlwaysEmpty() const; |
245 | |
246 | /// Check if column is always zero. True if it's definite, false if we can't say for sure. |
247 | /// Call it only after subqueries for sets were executed. |
248 | bool checkColumnIsAlwaysFalse(const String & column_name) const; |
249 | |
250 | struct ActionsHash |
251 | { |
252 | UInt128 operator()(const ExpressionActions::Actions & elems) const |
253 | { |
254 | SipHash hash; |
255 | for (const ExpressionAction & act : elems) |
256 | hash.update(ExpressionAction::ActionHash{}(act)); |
257 | UInt128 result; |
258 | hash.get128(result.low, result.high); |
259 | return result; |
260 | } |
261 | }; |
262 | |
263 | private: |
264 | /// These columns have to be in input blocks (arguments of execute* methods) |
265 | NamesAndTypesList input_columns; |
266 | /// These actions will be executed on input blocks |
267 | Actions actions; |
268 | /// The example of result (output) block. |
269 | Block sample_block; |
270 | /// Columns which can't be used for constant folding. |
271 | NameSet names_not_for_constant_folding; |
272 | |
273 | Settings settings; |
274 | #if USE_EMBEDDED_COMPILER |
275 | std::shared_ptr<CompiledExpressionCache> compilation_cache; |
276 | #endif |
277 | |
278 | void checkLimits(Block & block) const; |
279 | |
280 | void addImpl(ExpressionAction action, Names & new_names); |
281 | |
282 | /// Move all arrayJoin as close as possible to the end. |
283 | void optimizeArrayJoin(); |
284 | }; |
285 | |
286 | using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; |
287 | |
288 | |
289 | /** The sequence of transformations over the block. |
290 | * It is assumed that the result of each step is fed to the input of the next step. |
291 | * Used to execute parts of the query individually. |
292 | * |
293 | * For example, you can create a chain of two steps: |
294 | * 1) evaluate the expression in the WHERE clause, |
295 | * 2) calculate the expression in the SELECT section, |
296 | * and between the two steps do the filtering by value in the WHERE clause. |
297 | */ |
298 | struct ExpressionActionsChain |
299 | { |
300 | ExpressionActionsChain(const Context & context_) |
301 | : context(context_) {} |
302 | struct Step |
303 | { |
304 | ExpressionActionsPtr actions; |
305 | /// Columns were added to the block before current step in addition to prev step output. |
306 | NameSet additional_input; |
307 | /// Columns which are required in the result of current step. |
308 | Names required_output; |
309 | /// True if column from required_output is needed only for current step and not used in next actions |
310 | /// (and can be removed from block). Example: filter column for where actions. |
311 | /// If not empty, has the same size with required_output; is filled in finalize(). |
312 | std::vector<bool> can_remove_required_output; |
313 | |
314 | Step(const ExpressionActionsPtr & actions_ = nullptr, const Names & required_output_ = Names()) |
315 | : actions(actions_), required_output(required_output_) {} |
316 | }; |
317 | |
318 | using Steps = std::vector<Step>; |
319 | |
320 | const Context & context; |
321 | Steps steps; |
322 | |
323 | void addStep(); |
324 | |
325 | void finalize(); |
326 | |
327 | void clear() |
328 | { |
329 | steps.clear(); |
330 | } |
331 | |
332 | ExpressionActionsPtr getLastActions() |
333 | { |
334 | if (steps.empty()) |
335 | throw Exception("Empty ExpressionActionsChain" , ErrorCodes::LOGICAL_ERROR); |
336 | |
337 | return steps.back().actions; |
338 | } |
339 | |
340 | Step & getLastStep() |
341 | { |
342 | if (steps.empty()) |
343 | throw Exception("Empty ExpressionActionsChain" , ErrorCodes::LOGICAL_ERROR); |
344 | |
345 | return steps.back(); |
346 | } |
347 | |
348 | std::string dumpChain(); |
349 | }; |
350 | |
351 | } |
352 | |