1 | #include <Poco/Util/Application.h> |
2 | #include <Poco/String.h> |
3 | |
4 | #include <Core/Block.h> |
5 | |
6 | #include <Parsers/ASTFunction.h> |
7 | #include <Parsers/ASTIdentifier.h> |
8 | #include <Parsers/ASTLiteral.h> |
9 | #include <Parsers/ASTQualifiedAsterisk.h> |
10 | #include <Parsers/ASTExpressionList.h> |
11 | #include <Parsers/ASTSelectQuery.h> |
12 | #include <Parsers/ASTSelectWithUnionQuery.h> |
13 | #include <Parsers/ASTSubquery.h> |
14 | #include <Parsers/ASTOrderByElement.h> |
15 | #include <Parsers/formatAST.h> |
16 | #include <Parsers/DumpASTNode.h> |
17 | |
18 | #include <DataTypes/DataTypeNullable.h> |
19 | #include <DataTypes/NestedUtils.h> |
20 | #include <DataTypes/DataTypesNumber.h> |
21 | #include <DataTypes/DataTypeLowCardinality.h> |
22 | |
23 | #include <Columns/IColumn.h> |
24 | |
25 | #include <Interpreters/ExpressionAnalyzer.h> |
26 | #include <Interpreters/ExpressionActions.h> |
27 | #include <Interpreters/InJoinSubqueriesPreprocessor.h> |
28 | #include <Interpreters/LogicalExpressionsOptimizer.h> |
29 | #include <Interpreters/PredicateExpressionsOptimizer.h> |
30 | #include <Interpreters/ExternalDictionariesLoader.h> |
31 | #include <Interpreters/Set.h> |
32 | #include <Interpreters/AnalyzedJoin.h> |
33 | #include <Interpreters/Join.h> |
34 | |
35 | #include <AggregateFunctions/AggregateFunctionFactory.h> |
36 | #include <AggregateFunctions/parseAggregateFunctionParameters.h> |
37 | |
38 | #include <Storages/StorageDistributed.h> |
39 | #include <Storages/StorageJoin.h> |
40 | |
41 | #include <DataStreams/copyData.h> |
42 | #include <DataStreams/IBlockInputStream.h> |
43 | |
44 | #include <Dictionaries/IDictionary.h> |
45 | |
46 | #include <Common/typeid_cast.h> |
47 | #include <Common/StringUtils/StringUtils.h> |
48 | |
49 | #include <ext/range.h> |
50 | #include <DataTypes/DataTypeFactory.h> |
51 | #include <Functions/FunctionsMiscellaneous.h> |
52 | #include <Parsers/ExpressionListParsers.h> |
53 | #include <Parsers/parseQuery.h> |
54 | #include <Parsers/queryToString.h> |
55 | #include <Interpreters/interpretSubquery.h> |
56 | #include <Interpreters/DatabaseAndTableWithAlias.h> |
57 | #include <Interpreters/misc.h> |
58 | |
59 | #include <Interpreters/ActionsVisitor.h> |
60 | |
61 | #include <Interpreters/ExternalTablesVisitor.h> |
62 | #include <Interpreters/GlobalSubqueriesVisitor.h> |
63 | #include <Interpreters/GetAggregatesVisitor.h> |
64 | |
65 | namespace DB |
66 | { |
67 | |
68 | using LogAST = DebugASTLog<false>; /// set to true to enable logs |
69 | |
70 | |
71 | namespace ErrorCodes |
72 | { |
73 | extern const int UNKNOWN_IDENTIFIER; |
74 | extern const int LOGICAL_ERROR; |
75 | } |
76 | |
77 | ExpressionAnalyzer::ExpressionAnalyzer( |
78 | const ASTPtr & query_, |
79 | const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, |
80 | const Context & context_, |
81 | size_t subquery_depth_, |
82 | bool do_global) |
83 | : query(query_), context(context_), settings(context.getSettings()) |
84 | , subquery_depth(subquery_depth_) |
85 | , syntax(syntax_analyzer_result_) |
86 | { |
87 | /// external_tables, subqueries_for_sets for global subqueries. |
88 | /// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers. |
89 | initGlobalSubqueriesAndExternalTables(do_global); |
90 | |
91 | /// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns. |
92 | /// This analysis should be performed after processing global subqueries, because otherwise, |
93 | /// if the aggregate function contains a global subquery, then `analyzeAggregation` method will save |
94 | /// in `aggregate_descriptions` the information about the parameters of this aggregate function, among which |
95 | /// global subquery. Then, when you call `initGlobalSubqueriesAndExternalTables` method, this |
96 | /// the global subquery will be replaced with a temporary table, resulting in aggregate_descriptions |
97 | /// will contain out-of-date information, which will lead to an error when the query is executed. |
98 | analyzeAggregation(); |
99 | } |
100 | |
101 | bool ExpressionAnalyzer::isRemoteStorage() const |
102 | { |
103 | return storage() && storage()->isRemote(); |
104 | } |
105 | |
106 | |
107 | void ExpressionAnalyzer::analyzeAggregation() |
108 | { |
109 | /** Find aggregation keys (aggregation_keys), information about aggregate functions (aggregate_descriptions), |
110 | * as well as a set of columns obtained after the aggregation, if any, |
111 | * or after all the actions that are usually performed before aggregation (aggregated_columns). |
112 | * |
113 | * Everything below (compiling temporary ExpressionActions) - only for the purpose of query analysis (type output). |
114 | */ |
115 | |
116 | auto * select_query = query->as<ASTSelectQuery>(); |
117 | |
118 | ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context); |
119 | |
120 | if (select_query) |
121 | { |
122 | bool is_array_join_left; |
123 | ASTPtr array_join_expression_list = select_query->array_join_expression_list(is_array_join_left); |
124 | if (array_join_expression_list) |
125 | { |
126 | getRootActions(array_join_expression_list, true, temp_actions); |
127 | addMultipleArrayJoinAction(temp_actions, is_array_join_left); |
128 | |
129 | array_join_columns.clear(); |
130 | for (auto & column : temp_actions->getSampleBlock().getNamesAndTypesList()) |
131 | if (syntax->array_join_result_to_source.count(column.name)) |
132 | array_join_columns.emplace_back(column); |
133 | } |
134 | |
135 | const ASTTablesInSelectQueryElement * join = select_query->join(); |
136 | if (join) |
137 | { |
138 | getRootActions(analyzedJoin().leftKeysList(), true, temp_actions); |
139 | addJoinAction(temp_actions); |
140 | } |
141 | } |
142 | |
143 | has_aggregation = makeAggregateDescriptions(temp_actions); |
144 | if (select_query && (select_query->groupBy() || select_query->having())) |
145 | has_aggregation = true; |
146 | |
147 | if (has_aggregation) |
148 | { |
149 | getSelectQuery(); /// assertSelect() |
150 | |
151 | /// Find out aggregation keys. |
152 | if (select_query->groupBy()) |
153 | { |
154 | NameSet unique_keys; |
155 | ASTs & group_asts = select_query->groupBy()->children; |
156 | for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i) |
157 | { |
158 | ssize_t size = group_asts.size(); |
159 | getRootActions(group_asts[i], true, temp_actions); |
160 | |
161 | const auto & column_name = group_asts[i]->getColumnName(); |
162 | const auto & block = temp_actions->getSampleBlock(); |
163 | |
164 | if (!block.has(column_name)) |
165 | throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); |
166 | |
167 | const auto & col = block.getByName(column_name); |
168 | |
169 | /// Constant expressions have non-null column pointer at this stage. |
170 | if (col.column && isColumnConst(*col.column)) |
171 | { |
172 | /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. |
173 | if (!aggregate_descriptions.empty() || size > 1) |
174 | { |
175 | if (i + 1 < static_cast<ssize_t>(size)) |
176 | group_asts[i] = std::move(group_asts.back()); |
177 | |
178 | group_asts.pop_back(); |
179 | |
180 | --i; |
181 | continue; |
182 | } |
183 | } |
184 | |
185 | NameAndTypePair key{column_name, col.type}; |
186 | |
187 | /// Aggregation keys are uniqued. |
188 | if (!unique_keys.count(key.name)) |
189 | { |
190 | unique_keys.insert(key.name); |
191 | aggregation_keys.push_back(key); |
192 | |
193 | /// Key is no longer needed, therefore we can save a little by moving it. |
194 | aggregated_columns.push_back(std::move(key)); |
195 | } |
196 | } |
197 | |
198 | if (group_asts.empty()) |
199 | { |
200 | select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); |
201 | has_aggregation = select_query->having() || aggregate_descriptions.size(); |
202 | } |
203 | } |
204 | |
205 | for (size_t i = 0; i < aggregate_descriptions.size(); ++i) |
206 | { |
207 | AggregateDescription & desc = aggregate_descriptions[i]; |
208 | aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType()); |
209 | } |
210 | } |
211 | else |
212 | { |
213 | aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList(); |
214 | } |
215 | } |
216 | |
217 | |
218 | void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global) |
219 | { |
220 | /// Adds existing external tables (not subqueries) to the external_tables dictionary. |
221 | ExternalTablesVisitor::Data tables_data{context, external_tables}; |
222 | ExternalTablesVisitor(tables_data).visit(query); |
223 | |
224 | if (do_global) |
225 | { |
226 | GlobalSubqueriesVisitor::Data subqueries_data(context, subquery_depth, isRemoteStorage(), |
227 | external_tables, subqueries_for_sets, has_global_subqueries); |
228 | GlobalSubqueriesVisitor(subqueries_data).visit(query); |
229 | } |
230 | } |
231 | |
232 | |
233 | NamesAndTypesList ExpressionAnalyzer::sourceWithJoinedColumns() const |
234 | { |
235 | auto result_columns = sourceColumns(); |
236 | result_columns.insert(result_columns.end(), array_join_columns.begin(), array_join_columns.end()); |
237 | result_columns.insert(result_columns.end(), |
238 | analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end()); |
239 | return result_columns; |
240 | } |
241 | |
242 | |
243 | void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) |
244 | { |
245 | auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); |
246 | |
247 | if (prepared_sets.count(set_key)) |
248 | return; /// Already prepared. |
249 | |
250 | if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name)) |
251 | { |
252 | prepared_sets.insert({set_key, set_ptr_from_storage_set}); |
253 | return; |
254 | } |
255 | |
256 | auto interpreter_subquery = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {}); |
257 | BlockIO res = interpreter_subquery->execute(); |
258 | |
259 | SetPtr set = std::make_shared<Set>(settings.size_limits_for_set, true); |
260 | set->setHeader(res.in->getHeader()); |
261 | |
262 | res.in->readPrefix(); |
263 | while (Block block = res.in->read()) |
264 | { |
265 | /// If the limits have been exceeded, give up and let the default subquery processing actions take place. |
266 | if (!set->insertFromBlock(block)) |
267 | return; |
268 | } |
269 | |
270 | set->finishInsert(); |
271 | res.in->readSuffix(); |
272 | |
273 | prepared_sets[set_key] = std::move(set); |
274 | } |
275 | |
276 | SetPtr SelectQueryExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name) |
277 | { |
278 | const auto * table = subquery_or_table_name->as<ASTIdentifier>(); |
279 | if (!table) |
280 | return nullptr; |
281 | const DatabaseAndTableWithAlias database_table(*table); |
282 | const auto storage = context.getTable(database_table.database, database_table.table); |
283 | if (storage->getName() != "Set" ) |
284 | return nullptr; |
285 | const auto storage_set = std::dynamic_pointer_cast<StorageSet>(storage); |
286 | return storage_set->getSet(); |
287 | } |
288 | |
289 | |
290 | /// Perfomance optimisation for IN() if storage supports it. |
291 | void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) |
292 | { |
293 | if (!node || !storage() || !storage()->supportsIndexForIn()) |
294 | return; |
295 | |
296 | for (auto & child : node->children) |
297 | { |
298 | /// Don't descend into subqueries. |
299 | if (child->as<ASTSubquery>()) |
300 | continue; |
301 | |
302 | /// Don't descend into lambda functions |
303 | const auto * func = child->as<ASTFunction>(); |
304 | if (func && func->name == "lambda" ) |
305 | continue; |
306 | |
307 | makeSetsForIndex(child); |
308 | } |
309 | |
310 | const auto * func = node->as<ASTFunction>(); |
311 | if (func && functionIsInOperator(func->name)) |
312 | { |
313 | const IAST & args = *func->arguments; |
314 | const ASTPtr & left_in_operand = args.children.at(0); |
315 | |
316 | if (storage()->mayBenefitFromIndexForIn(left_in_operand, context)) |
317 | { |
318 | const ASTPtr & arg = args.children.at(1); |
319 | if (arg->as<ASTSubquery>() || arg->as<ASTIdentifier>()) |
320 | { |
321 | if (settings.use_index_for_in_with_subqueries) |
322 | tryMakeSetForIndexFromSubquery(arg); |
323 | } |
324 | else |
325 | { |
326 | ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceWithJoinedColumns(), context); |
327 | getRootActions(left_in_operand, true, temp_actions); |
328 | |
329 | Block sample_block_with_calculated_columns = temp_actions->getSampleBlock(); |
330 | if (sample_block_with_calculated_columns.has(left_in_operand->getColumnName())) |
331 | makeExplicitSet(func, sample_block_with_calculated_columns, true, context, |
332 | settings.size_limits_for_set, prepared_sets); |
333 | } |
334 | } |
335 | } |
336 | } |
337 | |
338 | |
339 | void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts) |
340 | { |
341 | LogAST log; |
342 | ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth, |
343 | sourceColumns(), actions, prepared_sets, subqueries_for_sets, |
344 | no_subqueries, only_consts, !isRemoteStorage()); |
345 | ActionsVisitor(visitor_data, log.stream()).visit(ast); |
346 | visitor_data.updateActions(actions); |
347 | } |
348 | |
349 | |
350 | bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions) |
351 | { |
352 | for (const ASTFunction * node : aggregates()) |
353 | { |
354 | AggregateDescription aggregate; |
355 | aggregate.column_name = node->getColumnName(); |
356 | |
357 | const ASTs & arguments = node->arguments->children; |
358 | aggregate.argument_names.resize(arguments.size()); |
359 | DataTypes types(arguments.size()); |
360 | |
361 | for (size_t i = 0; i < arguments.size(); ++i) |
362 | { |
363 | getRootActions(arguments[i], true, actions); |
364 | const std::string & name = arguments[i]->getColumnName(); |
365 | types[i] = actions->getSampleBlock().getByName(name).type; |
366 | aggregate.argument_names[i] = name; |
367 | } |
368 | |
369 | aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters) : Array(); |
370 | aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters); |
371 | |
372 | aggregate_descriptions.push_back(aggregate); |
373 | } |
374 | |
375 | return !aggregates().empty(); |
376 | } |
377 | |
378 | |
379 | const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const |
380 | { |
381 | const auto * select_query = query->as<ASTSelectQuery>(); |
382 | if (!select_query) |
383 | throw Exception("Not a select query" , ErrorCodes::LOGICAL_ERROR); |
384 | return select_query; |
385 | } |
386 | |
387 | const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() const |
388 | { |
389 | if (!has_aggregation) |
390 | throw Exception("No aggregation" , ErrorCodes::LOGICAL_ERROR); |
391 | return getSelectQuery(); |
392 | } |
393 | |
394 | void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const |
395 | { |
396 | if (chain.steps.empty()) |
397 | { |
398 | chain.steps.emplace_back(std::make_shared<ExpressionActions>(columns, context)); |
399 | } |
400 | } |
401 | |
402 | /// "Big" ARRAY JOIN. |
403 | void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool array_join_is_left) const |
404 | { |
405 | NameSet result_columns; |
406 | for (const auto & result_source : syntax->array_join_result_to_source) |
407 | { |
408 | /// Assign new names to columns, if needed. |
409 | if (result_source.first != result_source.second) |
410 | actions->add(ExpressionAction::copyColumn(result_source.second, result_source.first)); |
411 | |
412 | /// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names. |
413 | result_columns.insert(result_source.first); |
414 | } |
415 | |
416 | actions->add(ExpressionAction::arrayJoin(result_columns, array_join_is_left, context)); |
417 | } |
418 | |
419 | bool SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types) |
420 | { |
421 | const auto * select_query = getSelectQuery(); |
422 | |
423 | bool is_array_join_left; |
424 | ASTPtr array_join_expression_list = select_query->array_join_expression_list(is_array_join_left); |
425 | if (!array_join_expression_list) |
426 | return false; |
427 | |
428 | initChain(chain, sourceColumns()); |
429 | ExpressionActionsChain::Step & step = chain.steps.back(); |
430 | |
431 | getRootActions(array_join_expression_list, only_types, step.actions); |
432 | |
433 | addMultipleArrayJoinAction(step.actions, is_array_join_left); |
434 | |
435 | return true; |
436 | } |
437 | |
438 | void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, JoinPtr join) const |
439 | { |
440 | actions->add(ExpressionAction::ordinaryJoin(syntax->analyzed_join, join)); |
441 | } |
442 | |
443 | bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) |
444 | { |
445 | const ASTTablesInSelectQueryElement * ast_join = getSelectQuery()->join(); |
446 | if (!ast_join) |
447 | return false; |
448 | |
449 | JoinPtr table_join = makeTableJoin(*ast_join); |
450 | |
451 | initChain(chain, sourceColumns()); |
452 | ExpressionActionsChain::Step & step = chain.steps.back(); |
453 | |
454 | getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions); |
455 | addJoinAction(step.actions, table_join); |
456 | return true; |
457 | } |
458 | |
459 | static JoinPtr tryGetStorageJoin(const ASTTablesInSelectQueryElement & join_element, std::shared_ptr<AnalyzedJoin> analyzed_join, |
460 | const Context & context) |
461 | { |
462 | const auto & table_to_join = join_element.table_expression->as<ASTTableExpression &>(); |
463 | |
464 | /// TODO This syntax does not support specifying a database name. |
465 | if (table_to_join.database_and_table_name) |
466 | { |
467 | DatabaseAndTableWithAlias database_table(table_to_join.database_and_table_name); |
468 | StoragePtr table = context.tryGetTable(database_table.database, database_table.table); |
469 | |
470 | if (table) |
471 | { |
472 | auto * storage_join = dynamic_cast<StorageJoin *>(table.get()); |
473 | if (storage_join) |
474 | return storage_join->getJoin(analyzed_join); |
475 | } |
476 | } |
477 | |
478 | return {}; |
479 | } |
480 | |
481 | static ExpressionActionsPtr createJoinedBlockActions(const Context & context, const AnalyzedJoin & analyzed_join) |
482 | { |
483 | ASTPtr expression_list = analyzed_join.rightKeysList(); |
484 | auto syntax_result = SyntaxAnalyzer(context).analyze(expression_list, |
485 | analyzed_join.columnsFromJoinedTable(), analyzed_join.requiredJoinedNames()); |
486 | return ExpressionAnalyzer(expression_list, syntax_result, context).getActions(true, false); |
487 | } |
488 | |
489 | JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQueryElement & join_element) |
490 | { |
491 | /// Two JOINs are not supported with the same subquery, but different USINGs. |
492 | auto join_hash = join_element.getTreeHash(); |
493 | String join_subquery_id = toString(join_hash.first) + "_" + toString(join_hash.second); |
494 | |
495 | SubqueryForSet & subquery_for_join = subqueries_for_sets[join_subquery_id]; |
496 | |
497 | /// Special case - if table name is specified on the right of JOIN, then the table has the type Join (the previously prepared mapping). |
498 | if (!subquery_for_join.join) |
499 | subquery_for_join.join = tryGetStorageJoin(join_element, syntax->analyzed_join, context); |
500 | |
501 | if (!subquery_for_join.join) |
502 | { |
503 | /// Actions which need to be calculated on joined block. |
504 | ExpressionActionsPtr joined_block_actions = createJoinedBlockActions(context, analyzedJoin()); |
505 | |
506 | if (!subquery_for_join.source) |
507 | { |
508 | NamesWithAliases required_columns_with_aliases = |
509 | analyzedJoin().getRequiredColumns(joined_block_actions->getSampleBlock(), joined_block_actions->getRequiredColumns()); |
510 | makeSubqueryForJoin(join_element, std::move(required_columns_with_aliases), subquery_for_join); |
511 | } |
512 | |
513 | /// TODO You do not need to set this up when JOIN is only needed on remote servers. |
514 | subquery_for_join.setJoinActions(joined_block_actions); /// changes subquery_for_join.sample_block inside |
515 | subquery_for_join.join = makeJoin(syntax->analyzed_join, subquery_for_join.sample_block); |
516 | } |
517 | |
518 | return subquery_for_join.join; |
519 | } |
520 | |
521 | void SelectQueryExpressionAnalyzer::makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element, |
522 | NamesWithAliases && required_columns_with_aliases, |
523 | SubqueryForSet & subquery_for_set) const |
524 | { |
525 | /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs |
526 | * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, |
527 | * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. |
528 | * - this function shows the expression JOIN _data1. |
529 | */ |
530 | Names original_columns; |
531 | for (auto & pr : required_columns_with_aliases) |
532 | original_columns.push_back(pr.first); |
533 | |
534 | auto interpreter = interpretSubquery(join_element.table_expression, context, subquery_depth, original_columns); |
535 | |
536 | subquery_for_set.makeSource(interpreter, std::move(required_columns_with_aliases)); |
537 | } |
538 | |
539 | bool SelectQueryExpressionAnalyzer::appendPrewhere( |
540 | ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns) |
541 | { |
542 | const auto * select_query = getSelectQuery(); |
543 | |
544 | if (!select_query->prewhere()) |
545 | return false; |
546 | |
547 | initChain(chain, sourceColumns()); |
548 | auto & step = chain.getLastStep(); |
549 | getRootActions(select_query->prewhere(), only_types, step.actions); |
550 | String prewhere_column_name = select_query->prewhere()->getColumnName(); |
551 | step.required_output.push_back(prewhere_column_name); |
552 | step.can_remove_required_output.push_back(true); |
553 | |
554 | { |
555 | /// Remove unused source_columns from prewhere actions. |
556 | auto tmp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context); |
557 | getRootActions(select_query->prewhere(), only_types, tmp_actions); |
558 | tmp_actions->finalize({prewhere_column_name}); |
559 | auto required_columns = tmp_actions->getRequiredColumns(); |
560 | NameSet required_source_columns(required_columns.begin(), required_columns.end()); |
561 | |
562 | /// Add required columns to required output in order not to remove them after prewhere execution. |
563 | /// TODO: add sampling and final execution to common chain. |
564 | for (const auto & column : additional_required_columns) |
565 | { |
566 | if (required_source_columns.count(column)) |
567 | { |
568 | step.required_output.push_back(column); |
569 | step.can_remove_required_output.push_back(true); |
570 | } |
571 | } |
572 | |
573 | auto names = step.actions->getSampleBlock().getNames(); |
574 | NameSet name_set(names.begin(), names.end()); |
575 | |
576 | for (const auto & column : sourceColumns()) |
577 | if (required_source_columns.count(column.name) == 0) |
578 | name_set.erase(column.name); |
579 | |
580 | Names required_output(name_set.begin(), name_set.end()); |
581 | step.actions->finalize(required_output); |
582 | } |
583 | |
584 | { |
585 | /// Add empty action with input = {prewhere actions output} + {unused source columns} |
586 | /// Reasons: |
587 | /// 1. Remove remove source columns which are used only in prewhere actions during prewhere actions execution. |
588 | /// Example: select A prewhere B > 0. B can be removed at prewhere step. |
589 | /// 2. Store side columns which were calculated during prewhere actions execution if they are used. |
590 | /// Example: select F(A) prewhere F(A) > 0. F(A) can be saved from prewhere step. |
591 | /// 3. Check if we can remove filter column at prewhere step. If we can, action will store single REMOVE_COLUMN. |
592 | ColumnsWithTypeAndName columns = step.actions->getSampleBlock().getColumnsWithTypeAndName(); |
593 | auto required_columns = step.actions->getRequiredColumns(); |
594 | NameSet prewhere_input_names(required_columns.begin(), required_columns.end()); |
595 | NameSet unused_source_columns; |
596 | |
597 | for (const auto & column : sourceColumns()) |
598 | { |
599 | if (prewhere_input_names.count(column.name) == 0) |
600 | { |
601 | columns.emplace_back(column.type, column.name); |
602 | unused_source_columns.emplace(column.name); |
603 | } |
604 | } |
605 | |
606 | chain.steps.emplace_back(std::make_shared<ExpressionActions>(std::move(columns), context)); |
607 | chain.steps.back().additional_input = std::move(unused_source_columns); |
608 | } |
609 | |
610 | return true; |
611 | } |
612 | |
613 | void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name) |
614 | { |
615 | initChain(chain, sourceColumns()); |
616 | ExpressionActionsChain::Step & step = chain.steps.back(); |
617 | |
618 | // FIXME: assert(filter_info); |
619 | step.actions = std::move(actions); |
620 | step.required_output.push_back(std::move(column_name)); |
621 | step.can_remove_required_output = {true}; |
622 | |
623 | chain.addStep(); |
624 | } |
625 | |
626 | bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types) |
627 | { |
628 | const auto * select_query = getSelectQuery(); |
629 | |
630 | if (!select_query->where()) |
631 | return false; |
632 | |
633 | initChain(chain, sourceColumns()); |
634 | ExpressionActionsChain::Step & step = chain.steps.back(); |
635 | |
636 | step.required_output.push_back(select_query->where()->getColumnName()); |
637 | step.can_remove_required_output = {true}; |
638 | |
639 | getRootActions(select_query->where(), only_types, step.actions); |
640 | |
641 | return true; |
642 | } |
643 | |
644 | bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types) |
645 | { |
646 | const auto * select_query = getAggregatingQuery(); |
647 | |
648 | if (!select_query->groupBy()) |
649 | return false; |
650 | |
651 | initChain(chain, sourceColumns()); |
652 | ExpressionActionsChain::Step & step = chain.steps.back(); |
653 | |
654 | ASTs asts = select_query->groupBy()->children; |
655 | for (size_t i = 0; i < asts.size(); ++i) |
656 | { |
657 | step.required_output.push_back(asts[i]->getColumnName()); |
658 | getRootActions(asts[i], only_types, step.actions); |
659 | } |
660 | |
661 | return true; |
662 | } |
663 | |
664 | void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types) |
665 | { |
666 | const auto * select_query = getAggregatingQuery(); |
667 | |
668 | initChain(chain, sourceColumns()); |
669 | ExpressionActionsChain::Step & step = chain.steps.back(); |
670 | |
671 | for (size_t i = 0; i < aggregate_descriptions.size(); ++i) |
672 | { |
673 | for (size_t j = 0; j < aggregate_descriptions[i].argument_names.size(); ++j) |
674 | { |
675 | step.required_output.push_back(aggregate_descriptions[i].argument_names[j]); |
676 | } |
677 | } |
678 | |
679 | /// Collect aggregates removing duplicates by node.getColumnName() |
680 | /// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query) |
681 | /// @note The original recollection logic didn't remove duplicates. |
682 | GetAggregatesVisitor::Data data; |
683 | GetAggregatesVisitor(data).visit(select_query->select()); |
684 | |
685 | if (select_query->having()) |
686 | GetAggregatesVisitor(data).visit(select_query->having()); |
687 | |
688 | if (select_query->orderBy()) |
689 | GetAggregatesVisitor(data).visit(select_query->orderBy()); |
690 | |
691 | /// TODO: data.aggregates -> aggregates() |
692 | for (const ASTFunction * node : data.aggregates) |
693 | for (auto & argument : node->arguments->children) |
694 | getRootActions(argument, only_types, step.actions); |
695 | } |
696 | |
697 | bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types) |
698 | { |
699 | const auto * select_query = getAggregatingQuery(); |
700 | |
701 | if (!select_query->having()) |
702 | return false; |
703 | |
704 | initChain(chain, aggregated_columns); |
705 | ExpressionActionsChain::Step & step = chain.steps.back(); |
706 | |
707 | step.required_output.push_back(select_query->having()->getColumnName()); |
708 | getRootActions(select_query->having(), only_types, step.actions); |
709 | |
710 | return true; |
711 | } |
712 | |
713 | void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_types) |
714 | { |
715 | const auto * select_query = getSelectQuery(); |
716 | |
717 | initChain(chain, aggregated_columns); |
718 | ExpressionActionsChain::Step & step = chain.steps.back(); |
719 | |
720 | getRootActions(select_query->select(), only_types, step.actions); |
721 | |
722 | for (const auto & child : select_query->select()->children) |
723 | step.required_output.push_back(child->getColumnName()); |
724 | } |
725 | |
726 | bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order) |
727 | { |
728 | const auto * select_query = getSelectQuery(); |
729 | |
730 | if (!select_query->orderBy()) |
731 | return false; |
732 | |
733 | initChain(chain, aggregated_columns); |
734 | ExpressionActionsChain::Step & step = chain.steps.back(); |
735 | |
736 | getRootActions(select_query->orderBy(), only_types, step.actions); |
737 | |
738 | for (auto & child : select_query->orderBy()->children) |
739 | { |
740 | const auto * ast = child->as<ASTOrderByElement>(); |
741 | if (!ast || ast->children.size() < 1) |
742 | throw Exception("Bad order expression AST" , ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); |
743 | ASTPtr order_expression = ast->children.at(0); |
744 | step.required_output.push_back(order_expression->getColumnName()); |
745 | } |
746 | |
747 | if (optimize_read_in_order) |
748 | { |
749 | auto all_columns = sourceWithJoinedColumns(); |
750 | for (auto & child : select_query->orderBy()->children) |
751 | { |
752 | order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(all_columns, context)); |
753 | getRootActions(child, only_types, order_by_elements_actions.back()); |
754 | } |
755 | } |
756 | |
757 | return true; |
758 | } |
759 | |
760 | bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types) |
761 | { |
762 | const auto * select_query = getSelectQuery(); |
763 | |
764 | if (!select_query->limitBy()) |
765 | return false; |
766 | |
767 | initChain(chain, aggregated_columns); |
768 | ExpressionActionsChain::Step & step = chain.steps.back(); |
769 | |
770 | getRootActions(select_query->limitBy(), only_types, step.actions); |
771 | |
772 | NameSet aggregated_names; |
773 | for (const auto & column : aggregated_columns) |
774 | { |
775 | step.required_output.push_back(column.name); |
776 | aggregated_names.insert(column.name); |
777 | } |
778 | |
779 | for (const auto & child : select_query->limitBy()->children) |
780 | { |
781 | auto child_name = child->getColumnName(); |
782 | if (!aggregated_names.count(child_name)) |
783 | step.required_output.push_back(std::move(child_name)); |
784 | } |
785 | |
786 | return true; |
787 | } |
788 | |
789 | void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const |
790 | { |
791 | const auto * select_query = getSelectQuery(); |
792 | |
793 | initChain(chain, aggregated_columns); |
794 | ExpressionActionsChain::Step & step = chain.steps.back(); |
795 | |
796 | NamesWithAliases result_columns; |
797 | |
798 | ASTs asts = select_query->select()->children; |
799 | for (size_t i = 0; i < asts.size(); ++i) |
800 | { |
801 | String result_name = asts[i]->getAliasOrColumnName(); |
802 | if (required_result_columns.empty() || required_result_columns.count(result_name)) |
803 | { |
804 | result_columns.emplace_back(asts[i]->getColumnName(), result_name); |
805 | step.required_output.push_back(result_columns.back().second); |
806 | } |
807 | } |
808 | |
809 | step.actions->add(ExpressionAction::project(result_columns)); |
810 | } |
811 | |
812 | |
813 | void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types) |
814 | { |
815 | initChain(chain, sourceColumns()); |
816 | ExpressionActionsChain::Step & step = chain.steps.back(); |
817 | getRootActions(expr, only_types, step.actions); |
818 | step.required_output.push_back(expr->getColumnName()); |
819 | } |
820 | |
821 | |
822 | ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result) |
823 | { |
824 | ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(sourceColumns(), context); |
825 | NamesWithAliases result_columns; |
826 | Names result_names; |
827 | |
828 | ASTs asts; |
829 | |
830 | if (const auto * node = query->as<ASTExpressionList>()) |
831 | asts = node->children; |
832 | else |
833 | asts = ASTs(1, query); |
834 | |
835 | for (size_t i = 0; i < asts.size(); ++i) |
836 | { |
837 | std::string name = asts[i]->getColumnName(); |
838 | std::string alias; |
839 | if (add_aliases) |
840 | alias = asts[i]->getAliasOrColumnName(); |
841 | else |
842 | alias = name; |
843 | result_columns.emplace_back(name, alias); |
844 | result_names.push_back(alias); |
845 | getRootActions(asts[i], false, actions); |
846 | } |
847 | |
848 | if (add_aliases) |
849 | { |
850 | if (project_result) |
851 | actions->add(ExpressionAction::project(result_columns)); |
852 | else |
853 | actions->add(ExpressionAction::addAliases(result_columns)); |
854 | } |
855 | |
856 | if (!(add_aliases && project_result)) |
857 | { |
858 | /// We will not delete the original columns. |
859 | for (const auto & column_name_type : sourceColumns()) |
860 | result_names.push_back(column_name_type.name); |
861 | } |
862 | |
863 | actions->finalize(result_names); |
864 | |
865 | return actions; |
866 | } |
867 | |
868 | |
869 | ExpressionActionsPtr ExpressionAnalyzer::getConstActions() |
870 | { |
871 | ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(NamesAndTypesList(), context); |
872 | |
873 | getRootActions(query, true, actions, true); |
874 | return actions; |
875 | } |
876 | |
877 | void SelectQueryExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const |
878 | { |
879 | for (const auto & name_and_type : aggregation_keys) |
880 | key_names.emplace_back(name_and_type.name); |
881 | |
882 | aggregates = aggregate_descriptions; |
883 | } |
884 | |
885 | } |
886 | |