1#include <Poco/Util/Application.h>
2#include <Poco/String.h>
3
4#include <Core/Block.h>
5
6#include <Parsers/ASTFunction.h>
7#include <Parsers/ASTIdentifier.h>
8#include <Parsers/ASTLiteral.h>
9#include <Parsers/ASTQualifiedAsterisk.h>
10#include <Parsers/ASTExpressionList.h>
11#include <Parsers/ASTSelectQuery.h>
12#include <Parsers/ASTSelectWithUnionQuery.h>
13#include <Parsers/ASTSubquery.h>
14#include <Parsers/ASTOrderByElement.h>
15#include <Parsers/formatAST.h>
16#include <Parsers/DumpASTNode.h>
17
18#include <DataTypes/DataTypeNullable.h>
19#include <DataTypes/NestedUtils.h>
20#include <DataTypes/DataTypesNumber.h>
21#include <DataTypes/DataTypeLowCardinality.h>
22
23#include <Columns/IColumn.h>
24
25#include <Interpreters/ExpressionAnalyzer.h>
26#include <Interpreters/ExpressionActions.h>
27#include <Interpreters/InJoinSubqueriesPreprocessor.h>
28#include <Interpreters/LogicalExpressionsOptimizer.h>
29#include <Interpreters/PredicateExpressionsOptimizer.h>
30#include <Interpreters/ExternalDictionariesLoader.h>
31#include <Interpreters/Set.h>
32#include <Interpreters/AnalyzedJoin.h>
33#include <Interpreters/Join.h>
34
35#include <AggregateFunctions/AggregateFunctionFactory.h>
36#include <AggregateFunctions/parseAggregateFunctionParameters.h>
37
38#include <Storages/StorageDistributed.h>
39#include <Storages/StorageJoin.h>
40
41#include <DataStreams/copyData.h>
42#include <DataStreams/IBlockInputStream.h>
43
44#include <Dictionaries/IDictionary.h>
45
46#include <Common/typeid_cast.h>
47#include <Common/StringUtils/StringUtils.h>
48
49#include <ext/range.h>
50#include <DataTypes/DataTypeFactory.h>
51#include <Functions/FunctionsMiscellaneous.h>
52#include <Parsers/ExpressionListParsers.h>
53#include <Parsers/parseQuery.h>
54#include <Parsers/queryToString.h>
55#include <Interpreters/interpretSubquery.h>
56#include <Interpreters/DatabaseAndTableWithAlias.h>
57#include <Interpreters/misc.h>
58
59#include <Interpreters/ActionsVisitor.h>
60
61#include <Interpreters/ExternalTablesVisitor.h>
62#include <Interpreters/GlobalSubqueriesVisitor.h>
63#include <Interpreters/GetAggregatesVisitor.h>
64
65namespace DB
66{
67
68using LogAST = DebugASTLog<false>; /// set to true to enable logs
69
70
71namespace ErrorCodes
72{
73 extern const int UNKNOWN_IDENTIFIER;
74 extern const int LOGICAL_ERROR;
75}
76
77ExpressionAnalyzer::ExpressionAnalyzer(
78 const ASTPtr & query_,
79 const SyntaxAnalyzerResultPtr & syntax_analyzer_result_,
80 const Context & context_,
81 size_t subquery_depth_,
82 bool do_global)
83 : query(query_), context(context_), settings(context.getSettings())
84 , subquery_depth(subquery_depth_)
85 , syntax(syntax_analyzer_result_)
86{
87 /// external_tables, subqueries_for_sets for global subqueries.
88 /// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers.
89 initGlobalSubqueriesAndExternalTables(do_global);
90
91 /// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns.
92 /// This analysis should be performed after processing global subqueries, because otherwise,
93 /// if the aggregate function contains a global subquery, then `analyzeAggregation` method will save
94 /// in `aggregate_descriptions` the information about the parameters of this aggregate function, among which
95 /// global subquery. Then, when you call `initGlobalSubqueriesAndExternalTables` method, this
96 /// the global subquery will be replaced with a temporary table, resulting in aggregate_descriptions
97 /// will contain out-of-date information, which will lead to an error when the query is executed.
98 analyzeAggregation();
99}
100
101bool ExpressionAnalyzer::isRemoteStorage() const
102{
103 return storage() && storage()->isRemote();
104}
105
106
107void ExpressionAnalyzer::analyzeAggregation()
108{
109 /** Find aggregation keys (aggregation_keys), information about aggregate functions (aggregate_descriptions),
110 * as well as a set of columns obtained after the aggregation, if any,
111 * or after all the actions that are usually performed before aggregation (aggregated_columns).
112 *
113 * Everything below (compiling temporary ExpressionActions) - only for the purpose of query analysis (type output).
114 */
115
116 auto * select_query = query->as<ASTSelectQuery>();
117
118 ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
119
120 if (select_query)
121 {
122 bool is_array_join_left;
123 ASTPtr array_join_expression_list = select_query->array_join_expression_list(is_array_join_left);
124 if (array_join_expression_list)
125 {
126 getRootActions(array_join_expression_list, true, temp_actions);
127 addMultipleArrayJoinAction(temp_actions, is_array_join_left);
128
129 array_join_columns.clear();
130 for (auto & column : temp_actions->getSampleBlock().getNamesAndTypesList())
131 if (syntax->array_join_result_to_source.count(column.name))
132 array_join_columns.emplace_back(column);
133 }
134
135 const ASTTablesInSelectQueryElement * join = select_query->join();
136 if (join)
137 {
138 getRootActions(analyzedJoin().leftKeysList(), true, temp_actions);
139 addJoinAction(temp_actions);
140 }
141 }
142
143 has_aggregation = makeAggregateDescriptions(temp_actions);
144 if (select_query && (select_query->groupBy() || select_query->having()))
145 has_aggregation = true;
146
147 if (has_aggregation)
148 {
149 getSelectQuery(); /// assertSelect()
150
151 /// Find out aggregation keys.
152 if (select_query->groupBy())
153 {
154 NameSet unique_keys;
155 ASTs & group_asts = select_query->groupBy()->children;
156 for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i)
157 {
158 ssize_t size = group_asts.size();
159 getRootActions(group_asts[i], true, temp_actions);
160
161 const auto & column_name = group_asts[i]->getColumnName();
162 const auto & block = temp_actions->getSampleBlock();
163
164 if (!block.has(column_name))
165 throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
166
167 const auto & col = block.getByName(column_name);
168
169 /// Constant expressions have non-null column pointer at this stage.
170 if (col.column && isColumnConst(*col.column))
171 {
172 /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work.
173 if (!aggregate_descriptions.empty() || size > 1)
174 {
175 if (i + 1 < static_cast<ssize_t>(size))
176 group_asts[i] = std::move(group_asts.back());
177
178 group_asts.pop_back();
179
180 --i;
181 continue;
182 }
183 }
184
185 NameAndTypePair key{column_name, col.type};
186
187 /// Aggregation keys are uniqued.
188 if (!unique_keys.count(key.name))
189 {
190 unique_keys.insert(key.name);
191 aggregation_keys.push_back(key);
192
193 /// Key is no longer needed, therefore we can save a little by moving it.
194 aggregated_columns.push_back(std::move(key));
195 }
196 }
197
198 if (group_asts.empty())
199 {
200 select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, {});
201 has_aggregation = select_query->having() || aggregate_descriptions.size();
202 }
203 }
204
205 for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
206 {
207 AggregateDescription & desc = aggregate_descriptions[i];
208 aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType());
209 }
210 }
211 else
212 {
213 aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList();
214 }
215}
216
217
218void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
219{
220 /// Adds existing external tables (not subqueries) to the external_tables dictionary.
221 ExternalTablesVisitor::Data tables_data{context, external_tables};
222 ExternalTablesVisitor(tables_data).visit(query);
223
224 if (do_global)
225 {
226 GlobalSubqueriesVisitor::Data subqueries_data(context, subquery_depth, isRemoteStorage(),
227 external_tables, subqueries_for_sets, has_global_subqueries);
228 GlobalSubqueriesVisitor(subqueries_data).visit(query);
229 }
230}
231
232
233NamesAndTypesList ExpressionAnalyzer::sourceWithJoinedColumns() const
234{
235 auto result_columns = sourceColumns();
236 result_columns.insert(result_columns.end(), array_join_columns.begin(), array_join_columns.end());
237 result_columns.insert(result_columns.end(),
238 analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end());
239 return result_columns;
240}
241
242
243void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
244{
245 auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
246
247 if (prepared_sets.count(set_key))
248 return; /// Already prepared.
249
250 if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name))
251 {
252 prepared_sets.insert({set_key, set_ptr_from_storage_set});
253 return;
254 }
255
256 auto interpreter_subquery = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {});
257 BlockIO res = interpreter_subquery->execute();
258
259 SetPtr set = std::make_shared<Set>(settings.size_limits_for_set, true);
260 set->setHeader(res.in->getHeader());
261
262 res.in->readPrefix();
263 while (Block block = res.in->read())
264 {
265 /// If the limits have been exceeded, give up and let the default subquery processing actions take place.
266 if (!set->insertFromBlock(block))
267 return;
268 }
269
270 set->finishInsert();
271 res.in->readSuffix();
272
273 prepared_sets[set_key] = std::move(set);
274}
275
276SetPtr SelectQueryExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name)
277{
278 const auto * table = subquery_or_table_name->as<ASTIdentifier>();
279 if (!table)
280 return nullptr;
281 const DatabaseAndTableWithAlias database_table(*table);
282 const auto storage = context.getTable(database_table.database, database_table.table);
283 if (storage->getName() != "Set")
284 return nullptr;
285 const auto storage_set = std::dynamic_pointer_cast<StorageSet>(storage);
286 return storage_set->getSet();
287}
288
289
290/// Perfomance optimisation for IN() if storage supports it.
291void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
292{
293 if (!node || !storage() || !storage()->supportsIndexForIn())
294 return;
295
296 for (auto & child : node->children)
297 {
298 /// Don't descend into subqueries.
299 if (child->as<ASTSubquery>())
300 continue;
301
302 /// Don't descend into lambda functions
303 const auto * func = child->as<ASTFunction>();
304 if (func && func->name == "lambda")
305 continue;
306
307 makeSetsForIndex(child);
308 }
309
310 const auto * func = node->as<ASTFunction>();
311 if (func && functionIsInOperator(func->name))
312 {
313 const IAST & args = *func->arguments;
314 const ASTPtr & left_in_operand = args.children.at(0);
315
316 if (storage()->mayBenefitFromIndexForIn(left_in_operand, context))
317 {
318 const ASTPtr & arg = args.children.at(1);
319 if (arg->as<ASTSubquery>() || arg->as<ASTIdentifier>())
320 {
321 if (settings.use_index_for_in_with_subqueries)
322 tryMakeSetForIndexFromSubquery(arg);
323 }
324 else
325 {
326 ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceWithJoinedColumns(), context);
327 getRootActions(left_in_operand, true, temp_actions);
328
329 Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
330 if (sample_block_with_calculated_columns.has(left_in_operand->getColumnName()))
331 makeExplicitSet(func, sample_block_with_calculated_columns, true, context,
332 settings.size_limits_for_set, prepared_sets);
333 }
334 }
335 }
336}
337
338
339void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts)
340{
341 LogAST log;
342 ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth,
343 sourceColumns(), actions, prepared_sets, subqueries_for_sets,
344 no_subqueries, only_consts, !isRemoteStorage());
345 ActionsVisitor(visitor_data, log.stream()).visit(ast);
346 visitor_data.updateActions(actions);
347}
348
349
350bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions)
351{
352 for (const ASTFunction * node : aggregates())
353 {
354 AggregateDescription aggregate;
355 aggregate.column_name = node->getColumnName();
356
357 const ASTs & arguments = node->arguments->children;
358 aggregate.argument_names.resize(arguments.size());
359 DataTypes types(arguments.size());
360
361 for (size_t i = 0; i < arguments.size(); ++i)
362 {
363 getRootActions(arguments[i], true, actions);
364 const std::string & name = arguments[i]->getColumnName();
365 types[i] = actions->getSampleBlock().getByName(name).type;
366 aggregate.argument_names[i] = name;
367 }
368
369 aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters) : Array();
370 aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters);
371
372 aggregate_descriptions.push_back(aggregate);
373 }
374
375 return !aggregates().empty();
376}
377
378
379const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const
380{
381 const auto * select_query = query->as<ASTSelectQuery>();
382 if (!select_query)
383 throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR);
384 return select_query;
385}
386
387const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() const
388{
389 if (!has_aggregation)
390 throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
391 return getSelectQuery();
392}
393
394void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const
395{
396 if (chain.steps.empty())
397 {
398 chain.steps.emplace_back(std::make_shared<ExpressionActions>(columns, context));
399 }
400}
401
402/// "Big" ARRAY JOIN.
403void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool array_join_is_left) const
404{
405 NameSet result_columns;
406 for (const auto & result_source : syntax->array_join_result_to_source)
407 {
408 /// Assign new names to columns, if needed.
409 if (result_source.first != result_source.second)
410 actions->add(ExpressionAction::copyColumn(result_source.second, result_source.first));
411
412 /// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names.
413 result_columns.insert(result_source.first);
414 }
415
416 actions->add(ExpressionAction::arrayJoin(result_columns, array_join_is_left, context));
417}
418
419bool SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types)
420{
421 const auto * select_query = getSelectQuery();
422
423 bool is_array_join_left;
424 ASTPtr array_join_expression_list = select_query->array_join_expression_list(is_array_join_left);
425 if (!array_join_expression_list)
426 return false;
427
428 initChain(chain, sourceColumns());
429 ExpressionActionsChain::Step & step = chain.steps.back();
430
431 getRootActions(array_join_expression_list, only_types, step.actions);
432
433 addMultipleArrayJoinAction(step.actions, is_array_join_left);
434
435 return true;
436}
437
438void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, JoinPtr join) const
439{
440 actions->add(ExpressionAction::ordinaryJoin(syntax->analyzed_join, join));
441}
442
443bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types)
444{
445 const ASTTablesInSelectQueryElement * ast_join = getSelectQuery()->join();
446 if (!ast_join)
447 return false;
448
449 JoinPtr table_join = makeTableJoin(*ast_join);
450
451 initChain(chain, sourceColumns());
452 ExpressionActionsChain::Step & step = chain.steps.back();
453
454 getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions);
455 addJoinAction(step.actions, table_join);
456 return true;
457}
458
459static JoinPtr tryGetStorageJoin(const ASTTablesInSelectQueryElement & join_element, std::shared_ptr<AnalyzedJoin> analyzed_join,
460 const Context & context)
461{
462 const auto & table_to_join = join_element.table_expression->as<ASTTableExpression &>();
463
464 /// TODO This syntax does not support specifying a database name.
465 if (table_to_join.database_and_table_name)
466 {
467 DatabaseAndTableWithAlias database_table(table_to_join.database_and_table_name);
468 StoragePtr table = context.tryGetTable(database_table.database, database_table.table);
469
470 if (table)
471 {
472 auto * storage_join = dynamic_cast<StorageJoin *>(table.get());
473 if (storage_join)
474 return storage_join->getJoin(analyzed_join);
475 }
476 }
477
478 return {};
479}
480
481static ExpressionActionsPtr createJoinedBlockActions(const Context & context, const AnalyzedJoin & analyzed_join)
482{
483 ASTPtr expression_list = analyzed_join.rightKeysList();
484 auto syntax_result = SyntaxAnalyzer(context).analyze(expression_list,
485 analyzed_join.columnsFromJoinedTable(), analyzed_join.requiredJoinedNames());
486 return ExpressionAnalyzer(expression_list, syntax_result, context).getActions(true, false);
487}
488
489JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQueryElement & join_element)
490{
491 /// Two JOINs are not supported with the same subquery, but different USINGs.
492 auto join_hash = join_element.getTreeHash();
493 String join_subquery_id = toString(join_hash.first) + "_" + toString(join_hash.second);
494
495 SubqueryForSet & subquery_for_join = subqueries_for_sets[join_subquery_id];
496
497 /// Special case - if table name is specified on the right of JOIN, then the table has the type Join (the previously prepared mapping).
498 if (!subquery_for_join.join)
499 subquery_for_join.join = tryGetStorageJoin(join_element, syntax->analyzed_join, context);
500
501 if (!subquery_for_join.join)
502 {
503 /// Actions which need to be calculated on joined block.
504 ExpressionActionsPtr joined_block_actions = createJoinedBlockActions(context, analyzedJoin());
505
506 if (!subquery_for_join.source)
507 {
508 NamesWithAliases required_columns_with_aliases =
509 analyzedJoin().getRequiredColumns(joined_block_actions->getSampleBlock(), joined_block_actions->getRequiredColumns());
510 makeSubqueryForJoin(join_element, std::move(required_columns_with_aliases), subquery_for_join);
511 }
512
513 /// TODO You do not need to set this up when JOIN is only needed on remote servers.
514 subquery_for_join.setJoinActions(joined_block_actions); /// changes subquery_for_join.sample_block inside
515 subquery_for_join.join = makeJoin(syntax->analyzed_join, subquery_for_join.sample_block);
516 }
517
518 return subquery_for_join.join;
519}
520
521void SelectQueryExpressionAnalyzer::makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element,
522 NamesWithAliases && required_columns_with_aliases,
523 SubqueryForSet & subquery_for_set) const
524{
525 /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs
526 * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1,
527 * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`.
528 * - this function shows the expression JOIN _data1.
529 */
530 Names original_columns;
531 for (auto & pr : required_columns_with_aliases)
532 original_columns.push_back(pr.first);
533
534 auto interpreter = interpretSubquery(join_element.table_expression, context, subquery_depth, original_columns);
535
536 subquery_for_set.makeSource(interpreter, std::move(required_columns_with_aliases));
537}
538
539bool SelectQueryExpressionAnalyzer::appendPrewhere(
540 ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns)
541{
542 const auto * select_query = getSelectQuery();
543
544 if (!select_query->prewhere())
545 return false;
546
547 initChain(chain, sourceColumns());
548 auto & step = chain.getLastStep();
549 getRootActions(select_query->prewhere(), only_types, step.actions);
550 String prewhere_column_name = select_query->prewhere()->getColumnName();
551 step.required_output.push_back(prewhere_column_name);
552 step.can_remove_required_output.push_back(true);
553
554 {
555 /// Remove unused source_columns from prewhere actions.
556 auto tmp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
557 getRootActions(select_query->prewhere(), only_types, tmp_actions);
558 tmp_actions->finalize({prewhere_column_name});
559 auto required_columns = tmp_actions->getRequiredColumns();
560 NameSet required_source_columns(required_columns.begin(), required_columns.end());
561
562 /// Add required columns to required output in order not to remove them after prewhere execution.
563 /// TODO: add sampling and final execution to common chain.
564 for (const auto & column : additional_required_columns)
565 {
566 if (required_source_columns.count(column))
567 {
568 step.required_output.push_back(column);
569 step.can_remove_required_output.push_back(true);
570 }
571 }
572
573 auto names = step.actions->getSampleBlock().getNames();
574 NameSet name_set(names.begin(), names.end());
575
576 for (const auto & column : sourceColumns())
577 if (required_source_columns.count(column.name) == 0)
578 name_set.erase(column.name);
579
580 Names required_output(name_set.begin(), name_set.end());
581 step.actions->finalize(required_output);
582 }
583
584 {
585 /// Add empty action with input = {prewhere actions output} + {unused source columns}
586 /// Reasons:
587 /// 1. Remove remove source columns which are used only in prewhere actions during prewhere actions execution.
588 /// Example: select A prewhere B > 0. B can be removed at prewhere step.
589 /// 2. Store side columns which were calculated during prewhere actions execution if they are used.
590 /// Example: select F(A) prewhere F(A) > 0. F(A) can be saved from prewhere step.
591 /// 3. Check if we can remove filter column at prewhere step. If we can, action will store single REMOVE_COLUMN.
592 ColumnsWithTypeAndName columns = step.actions->getSampleBlock().getColumnsWithTypeAndName();
593 auto required_columns = step.actions->getRequiredColumns();
594 NameSet prewhere_input_names(required_columns.begin(), required_columns.end());
595 NameSet unused_source_columns;
596
597 for (const auto & column : sourceColumns())
598 {
599 if (prewhere_input_names.count(column.name) == 0)
600 {
601 columns.emplace_back(column.type, column.name);
602 unused_source_columns.emplace(column.name);
603 }
604 }
605
606 chain.steps.emplace_back(std::make_shared<ExpressionActions>(std::move(columns), context));
607 chain.steps.back().additional_input = std::move(unused_source_columns);
608 }
609
610 return true;
611}
612
613void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name)
614{
615 initChain(chain, sourceColumns());
616 ExpressionActionsChain::Step & step = chain.steps.back();
617
618 // FIXME: assert(filter_info);
619 step.actions = std::move(actions);
620 step.required_output.push_back(std::move(column_name));
621 step.can_remove_required_output = {true};
622
623 chain.addStep();
624}
625
626bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types)
627{
628 const auto * select_query = getSelectQuery();
629
630 if (!select_query->where())
631 return false;
632
633 initChain(chain, sourceColumns());
634 ExpressionActionsChain::Step & step = chain.steps.back();
635
636 step.required_output.push_back(select_query->where()->getColumnName());
637 step.can_remove_required_output = {true};
638
639 getRootActions(select_query->where(), only_types, step.actions);
640
641 return true;
642}
643
644bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types)
645{
646 const auto * select_query = getAggregatingQuery();
647
648 if (!select_query->groupBy())
649 return false;
650
651 initChain(chain, sourceColumns());
652 ExpressionActionsChain::Step & step = chain.steps.back();
653
654 ASTs asts = select_query->groupBy()->children;
655 for (size_t i = 0; i < asts.size(); ++i)
656 {
657 step.required_output.push_back(asts[i]->getColumnName());
658 getRootActions(asts[i], only_types, step.actions);
659 }
660
661 return true;
662}
663
664void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types)
665{
666 const auto * select_query = getAggregatingQuery();
667
668 initChain(chain, sourceColumns());
669 ExpressionActionsChain::Step & step = chain.steps.back();
670
671 for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
672 {
673 for (size_t j = 0; j < aggregate_descriptions[i].argument_names.size(); ++j)
674 {
675 step.required_output.push_back(aggregate_descriptions[i].argument_names[j]);
676 }
677 }
678
679 /// Collect aggregates removing duplicates by node.getColumnName()
680 /// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
681 /// @note The original recollection logic didn't remove duplicates.
682 GetAggregatesVisitor::Data data;
683 GetAggregatesVisitor(data).visit(select_query->select());
684
685 if (select_query->having())
686 GetAggregatesVisitor(data).visit(select_query->having());
687
688 if (select_query->orderBy())
689 GetAggregatesVisitor(data).visit(select_query->orderBy());
690
691 /// TODO: data.aggregates -> aggregates()
692 for (const ASTFunction * node : data.aggregates)
693 for (auto & argument : node->arguments->children)
694 getRootActions(argument, only_types, step.actions);
695}
696
697bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types)
698{
699 const auto * select_query = getAggregatingQuery();
700
701 if (!select_query->having())
702 return false;
703
704 initChain(chain, aggregated_columns);
705 ExpressionActionsChain::Step & step = chain.steps.back();
706
707 step.required_output.push_back(select_query->having()->getColumnName());
708 getRootActions(select_query->having(), only_types, step.actions);
709
710 return true;
711}
712
713void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_types)
714{
715 const auto * select_query = getSelectQuery();
716
717 initChain(chain, aggregated_columns);
718 ExpressionActionsChain::Step & step = chain.steps.back();
719
720 getRootActions(select_query->select(), only_types, step.actions);
721
722 for (const auto & child : select_query->select()->children)
723 step.required_output.push_back(child->getColumnName());
724}
725
726bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order)
727{
728 const auto * select_query = getSelectQuery();
729
730 if (!select_query->orderBy())
731 return false;
732
733 initChain(chain, aggregated_columns);
734 ExpressionActionsChain::Step & step = chain.steps.back();
735
736 getRootActions(select_query->orderBy(), only_types, step.actions);
737
738 for (auto & child : select_query->orderBy()->children)
739 {
740 const auto * ast = child->as<ASTOrderByElement>();
741 if (!ast || ast->children.size() < 1)
742 throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
743 ASTPtr order_expression = ast->children.at(0);
744 step.required_output.push_back(order_expression->getColumnName());
745 }
746
747 if (optimize_read_in_order)
748 {
749 auto all_columns = sourceWithJoinedColumns();
750 for (auto & child : select_query->orderBy()->children)
751 {
752 order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(all_columns, context));
753 getRootActions(child, only_types, order_by_elements_actions.back());
754 }
755 }
756
757 return true;
758}
759
760bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types)
761{
762 const auto * select_query = getSelectQuery();
763
764 if (!select_query->limitBy())
765 return false;
766
767 initChain(chain, aggregated_columns);
768 ExpressionActionsChain::Step & step = chain.steps.back();
769
770 getRootActions(select_query->limitBy(), only_types, step.actions);
771
772 NameSet aggregated_names;
773 for (const auto & column : aggregated_columns)
774 {
775 step.required_output.push_back(column.name);
776 aggregated_names.insert(column.name);
777 }
778
779 for (const auto & child : select_query->limitBy()->children)
780 {
781 auto child_name = child->getColumnName();
782 if (!aggregated_names.count(child_name))
783 step.required_output.push_back(std::move(child_name));
784 }
785
786 return true;
787}
788
789void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const
790{
791 const auto * select_query = getSelectQuery();
792
793 initChain(chain, aggregated_columns);
794 ExpressionActionsChain::Step & step = chain.steps.back();
795
796 NamesWithAliases result_columns;
797
798 ASTs asts = select_query->select()->children;
799 for (size_t i = 0; i < asts.size(); ++i)
800 {
801 String result_name = asts[i]->getAliasOrColumnName();
802 if (required_result_columns.empty() || required_result_columns.count(result_name))
803 {
804 result_columns.emplace_back(asts[i]->getColumnName(), result_name);
805 step.required_output.push_back(result_columns.back().second);
806 }
807 }
808
809 step.actions->add(ExpressionAction::project(result_columns));
810}
811
812
813void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types)
814{
815 initChain(chain, sourceColumns());
816 ExpressionActionsChain::Step & step = chain.steps.back();
817 getRootActions(expr, only_types, step.actions);
818 step.required_output.push_back(expr->getColumnName());
819}
820
821
822ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result)
823{
824 ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
825 NamesWithAliases result_columns;
826 Names result_names;
827
828 ASTs asts;
829
830 if (const auto * node = query->as<ASTExpressionList>())
831 asts = node->children;
832 else
833 asts = ASTs(1, query);
834
835 for (size_t i = 0; i < asts.size(); ++i)
836 {
837 std::string name = asts[i]->getColumnName();
838 std::string alias;
839 if (add_aliases)
840 alias = asts[i]->getAliasOrColumnName();
841 else
842 alias = name;
843 result_columns.emplace_back(name, alias);
844 result_names.push_back(alias);
845 getRootActions(asts[i], false, actions);
846 }
847
848 if (add_aliases)
849 {
850 if (project_result)
851 actions->add(ExpressionAction::project(result_columns));
852 else
853 actions->add(ExpressionAction::addAliases(result_columns));
854 }
855
856 if (!(add_aliases && project_result))
857 {
858 /// We will not delete the original columns.
859 for (const auto & column_name_type : sourceColumns())
860 result_names.push_back(column_name_type.name);
861 }
862
863 actions->finalize(result_names);
864
865 return actions;
866}
867
868
869ExpressionActionsPtr ExpressionAnalyzer::getConstActions()
870{
871 ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(NamesAndTypesList(), context);
872
873 getRootActions(query, true, actions, true);
874 return actions;
875}
876
877void SelectQueryExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const
878{
879 for (const auto & name_and_type : aggregation_keys)
880 key_names.emplace_back(name_and_type.name);
881
882 aggregates = aggregate_descriptions;
883}
884
885}
886