| 1 | #include <Poco/Util/Application.h> |
| 2 | #include <Poco/String.h> |
| 3 | |
| 4 | #include <Core/Block.h> |
| 5 | |
| 6 | #include <Parsers/ASTFunction.h> |
| 7 | #include <Parsers/ASTIdentifier.h> |
| 8 | #include <Parsers/ASTLiteral.h> |
| 9 | #include <Parsers/ASTQualifiedAsterisk.h> |
| 10 | #include <Parsers/ASTExpressionList.h> |
| 11 | #include <Parsers/ASTSelectQuery.h> |
| 12 | #include <Parsers/ASTSelectWithUnionQuery.h> |
| 13 | #include <Parsers/ASTSubquery.h> |
| 14 | #include <Parsers/ASTOrderByElement.h> |
| 15 | #include <Parsers/formatAST.h> |
| 16 | #include <Parsers/DumpASTNode.h> |
| 17 | |
| 18 | #include <DataTypes/DataTypeNullable.h> |
| 19 | #include <DataTypes/NestedUtils.h> |
| 20 | #include <DataTypes/DataTypesNumber.h> |
| 21 | #include <DataTypes/DataTypeLowCardinality.h> |
| 22 | |
| 23 | #include <Columns/IColumn.h> |
| 24 | |
| 25 | #include <Interpreters/ExpressionAnalyzer.h> |
| 26 | #include <Interpreters/ExpressionActions.h> |
| 27 | #include <Interpreters/InJoinSubqueriesPreprocessor.h> |
| 28 | #include <Interpreters/LogicalExpressionsOptimizer.h> |
| 29 | #include <Interpreters/PredicateExpressionsOptimizer.h> |
| 30 | #include <Interpreters/ExternalDictionariesLoader.h> |
| 31 | #include <Interpreters/Set.h> |
| 32 | #include <Interpreters/AnalyzedJoin.h> |
| 33 | #include <Interpreters/Join.h> |
| 34 | |
| 35 | #include <AggregateFunctions/AggregateFunctionFactory.h> |
| 36 | #include <AggregateFunctions/parseAggregateFunctionParameters.h> |
| 37 | |
| 38 | #include <Storages/StorageDistributed.h> |
| 39 | #include <Storages/StorageJoin.h> |
| 40 | |
| 41 | #include <DataStreams/copyData.h> |
| 42 | #include <DataStreams/IBlockInputStream.h> |
| 43 | |
| 44 | #include <Dictionaries/IDictionary.h> |
| 45 | |
| 46 | #include <Common/typeid_cast.h> |
| 47 | #include <Common/StringUtils/StringUtils.h> |
| 48 | |
| 49 | #include <ext/range.h> |
| 50 | #include <DataTypes/DataTypeFactory.h> |
| 51 | #include <Functions/FunctionsMiscellaneous.h> |
| 52 | #include <Parsers/ExpressionListParsers.h> |
| 53 | #include <Parsers/parseQuery.h> |
| 54 | #include <Parsers/queryToString.h> |
| 55 | #include <Interpreters/interpretSubquery.h> |
| 56 | #include <Interpreters/DatabaseAndTableWithAlias.h> |
| 57 | #include <Interpreters/misc.h> |
| 58 | |
| 59 | #include <Interpreters/ActionsVisitor.h> |
| 60 | |
| 61 | #include <Interpreters/ExternalTablesVisitor.h> |
| 62 | #include <Interpreters/GlobalSubqueriesVisitor.h> |
| 63 | #include <Interpreters/GetAggregatesVisitor.h> |
| 64 | |
| 65 | namespace DB |
| 66 | { |
| 67 | |
| 68 | using LogAST = DebugASTLog<false>; /// set to true to enable logs |
| 69 | |
| 70 | |
| 71 | namespace ErrorCodes |
| 72 | { |
| 73 | extern const int UNKNOWN_IDENTIFIER; |
| 74 | extern const int LOGICAL_ERROR; |
| 75 | } |
| 76 | |
| 77 | ExpressionAnalyzer::ExpressionAnalyzer( |
| 78 | const ASTPtr & query_, |
| 79 | const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, |
| 80 | const Context & context_, |
| 81 | size_t subquery_depth_, |
| 82 | bool do_global) |
| 83 | : query(query_), context(context_), settings(context.getSettings()) |
| 84 | , subquery_depth(subquery_depth_) |
| 85 | , syntax(syntax_analyzer_result_) |
| 86 | { |
| 87 | /// external_tables, subqueries_for_sets for global subqueries. |
| 88 | /// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers. |
| 89 | initGlobalSubqueriesAndExternalTables(do_global); |
| 90 | |
| 91 | /// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns. |
| 92 | /// This analysis should be performed after processing global subqueries, because otherwise, |
| 93 | /// if the aggregate function contains a global subquery, then `analyzeAggregation` method will save |
| 94 | /// in `aggregate_descriptions` the information about the parameters of this aggregate function, among which |
| 95 | /// global subquery. Then, when you call `initGlobalSubqueriesAndExternalTables` method, this |
| 96 | /// the global subquery will be replaced with a temporary table, resulting in aggregate_descriptions |
| 97 | /// will contain out-of-date information, which will lead to an error when the query is executed. |
| 98 | analyzeAggregation(); |
| 99 | } |
| 100 | |
| 101 | bool ExpressionAnalyzer::isRemoteStorage() const |
| 102 | { |
| 103 | return storage() && storage()->isRemote(); |
| 104 | } |
| 105 | |
| 106 | |
| 107 | void ExpressionAnalyzer::analyzeAggregation() |
| 108 | { |
| 109 | /** Find aggregation keys (aggregation_keys), information about aggregate functions (aggregate_descriptions), |
| 110 | * as well as a set of columns obtained after the aggregation, if any, |
| 111 | * or after all the actions that are usually performed before aggregation (aggregated_columns). |
| 112 | * |
| 113 | * Everything below (compiling temporary ExpressionActions) - only for the purpose of query analysis (type output). |
| 114 | */ |
| 115 | |
| 116 | auto * select_query = query->as<ASTSelectQuery>(); |
| 117 | |
| 118 | ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context); |
| 119 | |
| 120 | if (select_query) |
| 121 | { |
| 122 | bool is_array_join_left; |
| 123 | ASTPtr array_join_expression_list = select_query->array_join_expression_list(is_array_join_left); |
| 124 | if (array_join_expression_list) |
| 125 | { |
| 126 | getRootActions(array_join_expression_list, true, temp_actions); |
| 127 | addMultipleArrayJoinAction(temp_actions, is_array_join_left); |
| 128 | |
| 129 | array_join_columns.clear(); |
| 130 | for (auto & column : temp_actions->getSampleBlock().getNamesAndTypesList()) |
| 131 | if (syntax->array_join_result_to_source.count(column.name)) |
| 132 | array_join_columns.emplace_back(column); |
| 133 | } |
| 134 | |
| 135 | const ASTTablesInSelectQueryElement * join = select_query->join(); |
| 136 | if (join) |
| 137 | { |
| 138 | getRootActions(analyzedJoin().leftKeysList(), true, temp_actions); |
| 139 | addJoinAction(temp_actions); |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | has_aggregation = makeAggregateDescriptions(temp_actions); |
| 144 | if (select_query && (select_query->groupBy() || select_query->having())) |
| 145 | has_aggregation = true; |
| 146 | |
| 147 | if (has_aggregation) |
| 148 | { |
| 149 | getSelectQuery(); /// assertSelect() |
| 150 | |
| 151 | /// Find out aggregation keys. |
| 152 | if (select_query->groupBy()) |
| 153 | { |
| 154 | NameSet unique_keys; |
| 155 | ASTs & group_asts = select_query->groupBy()->children; |
| 156 | for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i) |
| 157 | { |
| 158 | ssize_t size = group_asts.size(); |
| 159 | getRootActions(group_asts[i], true, temp_actions); |
| 160 | |
| 161 | const auto & column_name = group_asts[i]->getColumnName(); |
| 162 | const auto & block = temp_actions->getSampleBlock(); |
| 163 | |
| 164 | if (!block.has(column_name)) |
| 165 | throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); |
| 166 | |
| 167 | const auto & col = block.getByName(column_name); |
| 168 | |
| 169 | /// Constant expressions have non-null column pointer at this stage. |
| 170 | if (col.column && isColumnConst(*col.column)) |
| 171 | { |
| 172 | /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. |
| 173 | if (!aggregate_descriptions.empty() || size > 1) |
| 174 | { |
| 175 | if (i + 1 < static_cast<ssize_t>(size)) |
| 176 | group_asts[i] = std::move(group_asts.back()); |
| 177 | |
| 178 | group_asts.pop_back(); |
| 179 | |
| 180 | --i; |
| 181 | continue; |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | NameAndTypePair key{column_name, col.type}; |
| 186 | |
| 187 | /// Aggregation keys are uniqued. |
| 188 | if (!unique_keys.count(key.name)) |
| 189 | { |
| 190 | unique_keys.insert(key.name); |
| 191 | aggregation_keys.push_back(key); |
| 192 | |
| 193 | /// Key is no longer needed, therefore we can save a little by moving it. |
| 194 | aggregated_columns.push_back(std::move(key)); |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | if (group_asts.empty()) |
| 199 | { |
| 200 | select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); |
| 201 | has_aggregation = select_query->having() || aggregate_descriptions.size(); |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | for (size_t i = 0; i < aggregate_descriptions.size(); ++i) |
| 206 | { |
| 207 | AggregateDescription & desc = aggregate_descriptions[i]; |
| 208 | aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType()); |
| 209 | } |
| 210 | } |
| 211 | else |
| 212 | { |
| 213 | aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList(); |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | |
| 218 | void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global) |
| 219 | { |
| 220 | /// Adds existing external tables (not subqueries) to the external_tables dictionary. |
| 221 | ExternalTablesVisitor::Data tables_data{context, external_tables}; |
| 222 | ExternalTablesVisitor(tables_data).visit(query); |
| 223 | |
| 224 | if (do_global) |
| 225 | { |
| 226 | GlobalSubqueriesVisitor::Data subqueries_data(context, subquery_depth, isRemoteStorage(), |
| 227 | external_tables, subqueries_for_sets, has_global_subqueries); |
| 228 | GlobalSubqueriesVisitor(subqueries_data).visit(query); |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | |
| 233 | NamesAndTypesList ExpressionAnalyzer::sourceWithJoinedColumns() const |
| 234 | { |
| 235 | auto result_columns = sourceColumns(); |
| 236 | result_columns.insert(result_columns.end(), array_join_columns.begin(), array_join_columns.end()); |
| 237 | result_columns.insert(result_columns.end(), |
| 238 | analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end()); |
| 239 | return result_columns; |
| 240 | } |
| 241 | |
| 242 | |
| 243 | void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) |
| 244 | { |
| 245 | auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); |
| 246 | |
| 247 | if (prepared_sets.count(set_key)) |
| 248 | return; /// Already prepared. |
| 249 | |
| 250 | if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name)) |
| 251 | { |
| 252 | prepared_sets.insert({set_key, set_ptr_from_storage_set}); |
| 253 | return; |
| 254 | } |
| 255 | |
| 256 | auto interpreter_subquery = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {}); |
| 257 | BlockIO res = interpreter_subquery->execute(); |
| 258 | |
| 259 | SetPtr set = std::make_shared<Set>(settings.size_limits_for_set, true); |
| 260 | set->setHeader(res.in->getHeader()); |
| 261 | |
| 262 | res.in->readPrefix(); |
| 263 | while (Block block = res.in->read()) |
| 264 | { |
| 265 | /// If the limits have been exceeded, give up and let the default subquery processing actions take place. |
| 266 | if (!set->insertFromBlock(block)) |
| 267 | return; |
| 268 | } |
| 269 | |
| 270 | set->finishInsert(); |
| 271 | res.in->readSuffix(); |
| 272 | |
| 273 | prepared_sets[set_key] = std::move(set); |
| 274 | } |
| 275 | |
| 276 | SetPtr SelectQueryExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name) |
| 277 | { |
| 278 | const auto * table = subquery_or_table_name->as<ASTIdentifier>(); |
| 279 | if (!table) |
| 280 | return nullptr; |
| 281 | const DatabaseAndTableWithAlias database_table(*table); |
| 282 | const auto storage = context.getTable(database_table.database, database_table.table); |
| 283 | if (storage->getName() != "Set" ) |
| 284 | return nullptr; |
| 285 | const auto storage_set = std::dynamic_pointer_cast<StorageSet>(storage); |
| 286 | return storage_set->getSet(); |
| 287 | } |
| 288 | |
| 289 | |
| 290 | /// Perfomance optimisation for IN() if storage supports it. |
| 291 | void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) |
| 292 | { |
| 293 | if (!node || !storage() || !storage()->supportsIndexForIn()) |
| 294 | return; |
| 295 | |
| 296 | for (auto & child : node->children) |
| 297 | { |
| 298 | /// Don't descend into subqueries. |
| 299 | if (child->as<ASTSubquery>()) |
| 300 | continue; |
| 301 | |
| 302 | /// Don't descend into lambda functions |
| 303 | const auto * func = child->as<ASTFunction>(); |
| 304 | if (func && func->name == "lambda" ) |
| 305 | continue; |
| 306 | |
| 307 | makeSetsForIndex(child); |
| 308 | } |
| 309 | |
| 310 | const auto * func = node->as<ASTFunction>(); |
| 311 | if (func && functionIsInOperator(func->name)) |
| 312 | { |
| 313 | const IAST & args = *func->arguments; |
| 314 | const ASTPtr & left_in_operand = args.children.at(0); |
| 315 | |
| 316 | if (storage()->mayBenefitFromIndexForIn(left_in_operand, context)) |
| 317 | { |
| 318 | const ASTPtr & arg = args.children.at(1); |
| 319 | if (arg->as<ASTSubquery>() || arg->as<ASTIdentifier>()) |
| 320 | { |
| 321 | if (settings.use_index_for_in_with_subqueries) |
| 322 | tryMakeSetForIndexFromSubquery(arg); |
| 323 | } |
| 324 | else |
| 325 | { |
| 326 | ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceWithJoinedColumns(), context); |
| 327 | getRootActions(left_in_operand, true, temp_actions); |
| 328 | |
| 329 | Block sample_block_with_calculated_columns = temp_actions->getSampleBlock(); |
| 330 | if (sample_block_with_calculated_columns.has(left_in_operand->getColumnName())) |
| 331 | makeExplicitSet(func, sample_block_with_calculated_columns, true, context, |
| 332 | settings.size_limits_for_set, prepared_sets); |
| 333 | } |
| 334 | } |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | |
| 339 | void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts) |
| 340 | { |
| 341 | LogAST log; |
| 342 | ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth, |
| 343 | sourceColumns(), actions, prepared_sets, subqueries_for_sets, |
| 344 | no_subqueries, only_consts, !isRemoteStorage()); |
| 345 | ActionsVisitor(visitor_data, log.stream()).visit(ast); |
| 346 | visitor_data.updateActions(actions); |
| 347 | } |
| 348 | |
| 349 | |
| 350 | bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions) |
| 351 | { |
| 352 | for (const ASTFunction * node : aggregates()) |
| 353 | { |
| 354 | AggregateDescription aggregate; |
| 355 | aggregate.column_name = node->getColumnName(); |
| 356 | |
| 357 | const ASTs & arguments = node->arguments->children; |
| 358 | aggregate.argument_names.resize(arguments.size()); |
| 359 | DataTypes types(arguments.size()); |
| 360 | |
| 361 | for (size_t i = 0; i < arguments.size(); ++i) |
| 362 | { |
| 363 | getRootActions(arguments[i], true, actions); |
| 364 | const std::string & name = arguments[i]->getColumnName(); |
| 365 | types[i] = actions->getSampleBlock().getByName(name).type; |
| 366 | aggregate.argument_names[i] = name; |
| 367 | } |
| 368 | |
| 369 | aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters) : Array(); |
| 370 | aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters); |
| 371 | |
| 372 | aggregate_descriptions.push_back(aggregate); |
| 373 | } |
| 374 | |
| 375 | return !aggregates().empty(); |
| 376 | } |
| 377 | |
| 378 | |
| 379 | const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const |
| 380 | { |
| 381 | const auto * select_query = query->as<ASTSelectQuery>(); |
| 382 | if (!select_query) |
| 383 | throw Exception("Not a select query" , ErrorCodes::LOGICAL_ERROR); |
| 384 | return select_query; |
| 385 | } |
| 386 | |
| 387 | const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() const |
| 388 | { |
| 389 | if (!has_aggregation) |
| 390 | throw Exception("No aggregation" , ErrorCodes::LOGICAL_ERROR); |
| 391 | return getSelectQuery(); |
| 392 | } |
| 393 | |
| 394 | void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const |
| 395 | { |
| 396 | if (chain.steps.empty()) |
| 397 | { |
| 398 | chain.steps.emplace_back(std::make_shared<ExpressionActions>(columns, context)); |
| 399 | } |
| 400 | } |
| 401 | |
| 402 | /// "Big" ARRAY JOIN. |
| 403 | void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool array_join_is_left) const |
| 404 | { |
| 405 | NameSet result_columns; |
| 406 | for (const auto & result_source : syntax->array_join_result_to_source) |
| 407 | { |
| 408 | /// Assign new names to columns, if needed. |
| 409 | if (result_source.first != result_source.second) |
| 410 | actions->add(ExpressionAction::copyColumn(result_source.second, result_source.first)); |
| 411 | |
| 412 | /// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names. |
| 413 | result_columns.insert(result_source.first); |
| 414 | } |
| 415 | |
| 416 | actions->add(ExpressionAction::arrayJoin(result_columns, array_join_is_left, context)); |
| 417 | } |
| 418 | |
| 419 | bool SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types) |
| 420 | { |
| 421 | const auto * select_query = getSelectQuery(); |
| 422 | |
| 423 | bool is_array_join_left; |
| 424 | ASTPtr array_join_expression_list = select_query->array_join_expression_list(is_array_join_left); |
| 425 | if (!array_join_expression_list) |
| 426 | return false; |
| 427 | |
| 428 | initChain(chain, sourceColumns()); |
| 429 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 430 | |
| 431 | getRootActions(array_join_expression_list, only_types, step.actions); |
| 432 | |
| 433 | addMultipleArrayJoinAction(step.actions, is_array_join_left); |
| 434 | |
| 435 | return true; |
| 436 | } |
| 437 | |
| 438 | void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, JoinPtr join) const |
| 439 | { |
| 440 | actions->add(ExpressionAction::ordinaryJoin(syntax->analyzed_join, join)); |
| 441 | } |
| 442 | |
| 443 | bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) |
| 444 | { |
| 445 | const ASTTablesInSelectQueryElement * ast_join = getSelectQuery()->join(); |
| 446 | if (!ast_join) |
| 447 | return false; |
| 448 | |
| 449 | JoinPtr table_join = makeTableJoin(*ast_join); |
| 450 | |
| 451 | initChain(chain, sourceColumns()); |
| 452 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 453 | |
| 454 | getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions); |
| 455 | addJoinAction(step.actions, table_join); |
| 456 | return true; |
| 457 | } |
| 458 | |
| 459 | static JoinPtr tryGetStorageJoin(const ASTTablesInSelectQueryElement & join_element, std::shared_ptr<AnalyzedJoin> analyzed_join, |
| 460 | const Context & context) |
| 461 | { |
| 462 | const auto & table_to_join = join_element.table_expression->as<ASTTableExpression &>(); |
| 463 | |
| 464 | /// TODO This syntax does not support specifying a database name. |
| 465 | if (table_to_join.database_and_table_name) |
| 466 | { |
| 467 | DatabaseAndTableWithAlias database_table(table_to_join.database_and_table_name); |
| 468 | StoragePtr table = context.tryGetTable(database_table.database, database_table.table); |
| 469 | |
| 470 | if (table) |
| 471 | { |
| 472 | auto * storage_join = dynamic_cast<StorageJoin *>(table.get()); |
| 473 | if (storage_join) |
| 474 | return storage_join->getJoin(analyzed_join); |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | return {}; |
| 479 | } |
| 480 | |
| 481 | static ExpressionActionsPtr createJoinedBlockActions(const Context & context, const AnalyzedJoin & analyzed_join) |
| 482 | { |
| 483 | ASTPtr expression_list = analyzed_join.rightKeysList(); |
| 484 | auto syntax_result = SyntaxAnalyzer(context).analyze(expression_list, |
| 485 | analyzed_join.columnsFromJoinedTable(), analyzed_join.requiredJoinedNames()); |
| 486 | return ExpressionAnalyzer(expression_list, syntax_result, context).getActions(true, false); |
| 487 | } |
| 488 | |
| 489 | JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQueryElement & join_element) |
| 490 | { |
| 491 | /// Two JOINs are not supported with the same subquery, but different USINGs. |
| 492 | auto join_hash = join_element.getTreeHash(); |
| 493 | String join_subquery_id = toString(join_hash.first) + "_" + toString(join_hash.second); |
| 494 | |
| 495 | SubqueryForSet & subquery_for_join = subqueries_for_sets[join_subquery_id]; |
| 496 | |
| 497 | /// Special case - if table name is specified on the right of JOIN, then the table has the type Join (the previously prepared mapping). |
| 498 | if (!subquery_for_join.join) |
| 499 | subquery_for_join.join = tryGetStorageJoin(join_element, syntax->analyzed_join, context); |
| 500 | |
| 501 | if (!subquery_for_join.join) |
| 502 | { |
| 503 | /// Actions which need to be calculated on joined block. |
| 504 | ExpressionActionsPtr joined_block_actions = createJoinedBlockActions(context, analyzedJoin()); |
| 505 | |
| 506 | if (!subquery_for_join.source) |
| 507 | { |
| 508 | NamesWithAliases required_columns_with_aliases = |
| 509 | analyzedJoin().getRequiredColumns(joined_block_actions->getSampleBlock(), joined_block_actions->getRequiredColumns()); |
| 510 | makeSubqueryForJoin(join_element, std::move(required_columns_with_aliases), subquery_for_join); |
| 511 | } |
| 512 | |
| 513 | /// TODO You do not need to set this up when JOIN is only needed on remote servers. |
| 514 | subquery_for_join.setJoinActions(joined_block_actions); /// changes subquery_for_join.sample_block inside |
| 515 | subquery_for_join.join = makeJoin(syntax->analyzed_join, subquery_for_join.sample_block); |
| 516 | } |
| 517 | |
| 518 | return subquery_for_join.join; |
| 519 | } |
| 520 | |
| 521 | void SelectQueryExpressionAnalyzer::makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element, |
| 522 | NamesWithAliases && required_columns_with_aliases, |
| 523 | SubqueryForSet & subquery_for_set) const |
| 524 | { |
| 525 | /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs |
| 526 | * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, |
| 527 | * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. |
| 528 | * - this function shows the expression JOIN _data1. |
| 529 | */ |
| 530 | Names original_columns; |
| 531 | for (auto & pr : required_columns_with_aliases) |
| 532 | original_columns.push_back(pr.first); |
| 533 | |
| 534 | auto interpreter = interpretSubquery(join_element.table_expression, context, subquery_depth, original_columns); |
| 535 | |
| 536 | subquery_for_set.makeSource(interpreter, std::move(required_columns_with_aliases)); |
| 537 | } |
| 538 | |
| 539 | bool SelectQueryExpressionAnalyzer::appendPrewhere( |
| 540 | ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns) |
| 541 | { |
| 542 | const auto * select_query = getSelectQuery(); |
| 543 | |
| 544 | if (!select_query->prewhere()) |
| 545 | return false; |
| 546 | |
| 547 | initChain(chain, sourceColumns()); |
| 548 | auto & step = chain.getLastStep(); |
| 549 | getRootActions(select_query->prewhere(), only_types, step.actions); |
| 550 | String prewhere_column_name = select_query->prewhere()->getColumnName(); |
| 551 | step.required_output.push_back(prewhere_column_name); |
| 552 | step.can_remove_required_output.push_back(true); |
| 553 | |
| 554 | { |
| 555 | /// Remove unused source_columns from prewhere actions. |
| 556 | auto tmp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context); |
| 557 | getRootActions(select_query->prewhere(), only_types, tmp_actions); |
| 558 | tmp_actions->finalize({prewhere_column_name}); |
| 559 | auto required_columns = tmp_actions->getRequiredColumns(); |
| 560 | NameSet required_source_columns(required_columns.begin(), required_columns.end()); |
| 561 | |
| 562 | /// Add required columns to required output in order not to remove them after prewhere execution. |
| 563 | /// TODO: add sampling and final execution to common chain. |
| 564 | for (const auto & column : additional_required_columns) |
| 565 | { |
| 566 | if (required_source_columns.count(column)) |
| 567 | { |
| 568 | step.required_output.push_back(column); |
| 569 | step.can_remove_required_output.push_back(true); |
| 570 | } |
| 571 | } |
| 572 | |
| 573 | auto names = step.actions->getSampleBlock().getNames(); |
| 574 | NameSet name_set(names.begin(), names.end()); |
| 575 | |
| 576 | for (const auto & column : sourceColumns()) |
| 577 | if (required_source_columns.count(column.name) == 0) |
| 578 | name_set.erase(column.name); |
| 579 | |
| 580 | Names required_output(name_set.begin(), name_set.end()); |
| 581 | step.actions->finalize(required_output); |
| 582 | } |
| 583 | |
| 584 | { |
| 585 | /// Add empty action with input = {prewhere actions output} + {unused source columns} |
| 586 | /// Reasons: |
| 587 | /// 1. Remove remove source columns which are used only in prewhere actions during prewhere actions execution. |
| 588 | /// Example: select A prewhere B > 0. B can be removed at prewhere step. |
| 589 | /// 2. Store side columns which were calculated during prewhere actions execution if they are used. |
| 590 | /// Example: select F(A) prewhere F(A) > 0. F(A) can be saved from prewhere step. |
| 591 | /// 3. Check if we can remove filter column at prewhere step. If we can, action will store single REMOVE_COLUMN. |
| 592 | ColumnsWithTypeAndName columns = step.actions->getSampleBlock().getColumnsWithTypeAndName(); |
| 593 | auto required_columns = step.actions->getRequiredColumns(); |
| 594 | NameSet prewhere_input_names(required_columns.begin(), required_columns.end()); |
| 595 | NameSet unused_source_columns; |
| 596 | |
| 597 | for (const auto & column : sourceColumns()) |
| 598 | { |
| 599 | if (prewhere_input_names.count(column.name) == 0) |
| 600 | { |
| 601 | columns.emplace_back(column.type, column.name); |
| 602 | unused_source_columns.emplace(column.name); |
| 603 | } |
| 604 | } |
| 605 | |
| 606 | chain.steps.emplace_back(std::make_shared<ExpressionActions>(std::move(columns), context)); |
| 607 | chain.steps.back().additional_input = std::move(unused_source_columns); |
| 608 | } |
| 609 | |
| 610 | return true; |
| 611 | } |
| 612 | |
| 613 | void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name) |
| 614 | { |
| 615 | initChain(chain, sourceColumns()); |
| 616 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 617 | |
| 618 | // FIXME: assert(filter_info); |
| 619 | step.actions = std::move(actions); |
| 620 | step.required_output.push_back(std::move(column_name)); |
| 621 | step.can_remove_required_output = {true}; |
| 622 | |
| 623 | chain.addStep(); |
| 624 | } |
| 625 | |
| 626 | bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types) |
| 627 | { |
| 628 | const auto * select_query = getSelectQuery(); |
| 629 | |
| 630 | if (!select_query->where()) |
| 631 | return false; |
| 632 | |
| 633 | initChain(chain, sourceColumns()); |
| 634 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 635 | |
| 636 | step.required_output.push_back(select_query->where()->getColumnName()); |
| 637 | step.can_remove_required_output = {true}; |
| 638 | |
| 639 | getRootActions(select_query->where(), only_types, step.actions); |
| 640 | |
| 641 | return true; |
| 642 | } |
| 643 | |
| 644 | bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types) |
| 645 | { |
| 646 | const auto * select_query = getAggregatingQuery(); |
| 647 | |
| 648 | if (!select_query->groupBy()) |
| 649 | return false; |
| 650 | |
| 651 | initChain(chain, sourceColumns()); |
| 652 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 653 | |
| 654 | ASTs asts = select_query->groupBy()->children; |
| 655 | for (size_t i = 0; i < asts.size(); ++i) |
| 656 | { |
| 657 | step.required_output.push_back(asts[i]->getColumnName()); |
| 658 | getRootActions(asts[i], only_types, step.actions); |
| 659 | } |
| 660 | |
| 661 | return true; |
| 662 | } |
| 663 | |
| 664 | void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types) |
| 665 | { |
| 666 | const auto * select_query = getAggregatingQuery(); |
| 667 | |
| 668 | initChain(chain, sourceColumns()); |
| 669 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 670 | |
| 671 | for (size_t i = 0; i < aggregate_descriptions.size(); ++i) |
| 672 | { |
| 673 | for (size_t j = 0; j < aggregate_descriptions[i].argument_names.size(); ++j) |
| 674 | { |
| 675 | step.required_output.push_back(aggregate_descriptions[i].argument_names[j]); |
| 676 | } |
| 677 | } |
| 678 | |
| 679 | /// Collect aggregates removing duplicates by node.getColumnName() |
| 680 | /// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query) |
| 681 | /// @note The original recollection logic didn't remove duplicates. |
| 682 | GetAggregatesVisitor::Data data; |
| 683 | GetAggregatesVisitor(data).visit(select_query->select()); |
| 684 | |
| 685 | if (select_query->having()) |
| 686 | GetAggregatesVisitor(data).visit(select_query->having()); |
| 687 | |
| 688 | if (select_query->orderBy()) |
| 689 | GetAggregatesVisitor(data).visit(select_query->orderBy()); |
| 690 | |
| 691 | /// TODO: data.aggregates -> aggregates() |
| 692 | for (const ASTFunction * node : data.aggregates) |
| 693 | for (auto & argument : node->arguments->children) |
| 694 | getRootActions(argument, only_types, step.actions); |
| 695 | } |
| 696 | |
| 697 | bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types) |
| 698 | { |
| 699 | const auto * select_query = getAggregatingQuery(); |
| 700 | |
| 701 | if (!select_query->having()) |
| 702 | return false; |
| 703 | |
| 704 | initChain(chain, aggregated_columns); |
| 705 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 706 | |
| 707 | step.required_output.push_back(select_query->having()->getColumnName()); |
| 708 | getRootActions(select_query->having(), only_types, step.actions); |
| 709 | |
| 710 | return true; |
| 711 | } |
| 712 | |
| 713 | void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_types) |
| 714 | { |
| 715 | const auto * select_query = getSelectQuery(); |
| 716 | |
| 717 | initChain(chain, aggregated_columns); |
| 718 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 719 | |
| 720 | getRootActions(select_query->select(), only_types, step.actions); |
| 721 | |
| 722 | for (const auto & child : select_query->select()->children) |
| 723 | step.required_output.push_back(child->getColumnName()); |
| 724 | } |
| 725 | |
| 726 | bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order) |
| 727 | { |
| 728 | const auto * select_query = getSelectQuery(); |
| 729 | |
| 730 | if (!select_query->orderBy()) |
| 731 | return false; |
| 732 | |
| 733 | initChain(chain, aggregated_columns); |
| 734 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 735 | |
| 736 | getRootActions(select_query->orderBy(), only_types, step.actions); |
| 737 | |
| 738 | for (auto & child : select_query->orderBy()->children) |
| 739 | { |
| 740 | const auto * ast = child->as<ASTOrderByElement>(); |
| 741 | if (!ast || ast->children.size() < 1) |
| 742 | throw Exception("Bad order expression AST" , ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); |
| 743 | ASTPtr order_expression = ast->children.at(0); |
| 744 | step.required_output.push_back(order_expression->getColumnName()); |
| 745 | } |
| 746 | |
| 747 | if (optimize_read_in_order) |
| 748 | { |
| 749 | auto all_columns = sourceWithJoinedColumns(); |
| 750 | for (auto & child : select_query->orderBy()->children) |
| 751 | { |
| 752 | order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(all_columns, context)); |
| 753 | getRootActions(child, only_types, order_by_elements_actions.back()); |
| 754 | } |
| 755 | } |
| 756 | |
| 757 | return true; |
| 758 | } |
| 759 | |
| 760 | bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types) |
| 761 | { |
| 762 | const auto * select_query = getSelectQuery(); |
| 763 | |
| 764 | if (!select_query->limitBy()) |
| 765 | return false; |
| 766 | |
| 767 | initChain(chain, aggregated_columns); |
| 768 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 769 | |
| 770 | getRootActions(select_query->limitBy(), only_types, step.actions); |
| 771 | |
| 772 | NameSet aggregated_names; |
| 773 | for (const auto & column : aggregated_columns) |
| 774 | { |
| 775 | step.required_output.push_back(column.name); |
| 776 | aggregated_names.insert(column.name); |
| 777 | } |
| 778 | |
| 779 | for (const auto & child : select_query->limitBy()->children) |
| 780 | { |
| 781 | auto child_name = child->getColumnName(); |
| 782 | if (!aggregated_names.count(child_name)) |
| 783 | step.required_output.push_back(std::move(child_name)); |
| 784 | } |
| 785 | |
| 786 | return true; |
| 787 | } |
| 788 | |
| 789 | void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const |
| 790 | { |
| 791 | const auto * select_query = getSelectQuery(); |
| 792 | |
| 793 | initChain(chain, aggregated_columns); |
| 794 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 795 | |
| 796 | NamesWithAliases result_columns; |
| 797 | |
| 798 | ASTs asts = select_query->select()->children; |
| 799 | for (size_t i = 0; i < asts.size(); ++i) |
| 800 | { |
| 801 | String result_name = asts[i]->getAliasOrColumnName(); |
| 802 | if (required_result_columns.empty() || required_result_columns.count(result_name)) |
| 803 | { |
| 804 | result_columns.emplace_back(asts[i]->getColumnName(), result_name); |
| 805 | step.required_output.push_back(result_columns.back().second); |
| 806 | } |
| 807 | } |
| 808 | |
| 809 | step.actions->add(ExpressionAction::project(result_columns)); |
| 810 | } |
| 811 | |
| 812 | |
| 813 | void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types) |
| 814 | { |
| 815 | initChain(chain, sourceColumns()); |
| 816 | ExpressionActionsChain::Step & step = chain.steps.back(); |
| 817 | getRootActions(expr, only_types, step.actions); |
| 818 | step.required_output.push_back(expr->getColumnName()); |
| 819 | } |
| 820 | |
| 821 | |
| 822 | ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result) |
| 823 | { |
| 824 | ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(sourceColumns(), context); |
| 825 | NamesWithAliases result_columns; |
| 826 | Names result_names; |
| 827 | |
| 828 | ASTs asts; |
| 829 | |
| 830 | if (const auto * node = query->as<ASTExpressionList>()) |
| 831 | asts = node->children; |
| 832 | else |
| 833 | asts = ASTs(1, query); |
| 834 | |
| 835 | for (size_t i = 0; i < asts.size(); ++i) |
| 836 | { |
| 837 | std::string name = asts[i]->getColumnName(); |
| 838 | std::string alias; |
| 839 | if (add_aliases) |
| 840 | alias = asts[i]->getAliasOrColumnName(); |
| 841 | else |
| 842 | alias = name; |
| 843 | result_columns.emplace_back(name, alias); |
| 844 | result_names.push_back(alias); |
| 845 | getRootActions(asts[i], false, actions); |
| 846 | } |
| 847 | |
| 848 | if (add_aliases) |
| 849 | { |
| 850 | if (project_result) |
| 851 | actions->add(ExpressionAction::project(result_columns)); |
| 852 | else |
| 853 | actions->add(ExpressionAction::addAliases(result_columns)); |
| 854 | } |
| 855 | |
| 856 | if (!(add_aliases && project_result)) |
| 857 | { |
| 858 | /// We will not delete the original columns. |
| 859 | for (const auto & column_name_type : sourceColumns()) |
| 860 | result_names.push_back(column_name_type.name); |
| 861 | } |
| 862 | |
| 863 | actions->finalize(result_names); |
| 864 | |
| 865 | return actions; |
| 866 | } |
| 867 | |
| 868 | |
| 869 | ExpressionActionsPtr ExpressionAnalyzer::getConstActions() |
| 870 | { |
| 871 | ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(NamesAndTypesList(), context); |
| 872 | |
| 873 | getRootActions(query, true, actions, true); |
| 874 | return actions; |
| 875 | } |
| 876 | |
| 877 | void SelectQueryExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const |
| 878 | { |
| 879 | for (const auto & name_and_type : aggregation_keys) |
| 880 | key_names.emplace_back(name_and_type.name); |
| 881 | |
| 882 | aggregates = aggregate_descriptions; |
| 883 | } |
| 884 | |
| 885 | } |
| 886 | |