| 1 | #include <Common/typeid_cast.h> |
| 2 | #include <Common/PODArray.h> |
| 3 | |
| 4 | #include <Functions/FunctionFactory.h> |
| 5 | #include <Functions/FunctionsMiscellaneous.h> |
| 6 | |
| 7 | #include <AggregateFunctions/AggregateFunctionFactory.h> |
| 8 | |
| 9 | #include <DataTypes/DataTypeSet.h> |
| 10 | #include <DataTypes/DataTypesNumber.h> |
| 11 | #include <DataTypes/DataTypeFunction.h> |
| 12 | #include <DataTypes/DataTypeString.h> |
| 13 | #include <DataTypes/DataTypeTuple.h> |
| 14 | #include <DataTypes/DataTypeLowCardinality.h> |
| 15 | #include <DataTypes/FieldToDataType.h> |
| 16 | |
| 17 | #include <DataStreams/LazyBlockInputStream.h> |
| 18 | |
| 19 | #include <Columns/ColumnSet.h> |
| 20 | #include <Columns/ColumnConst.h> |
| 21 | #include <Columns/ColumnsNumber.h> |
| 22 | |
| 23 | #include <Storages/StorageSet.h> |
| 24 | |
| 25 | #include <Parsers/ASTFunction.h> |
| 26 | #include <Parsers/ASTIdentifier.h> |
| 27 | #include <Parsers/ASTLiteral.h> |
| 28 | #include <Parsers/ASTSelectQuery.h> |
| 29 | #include <Parsers/ASTSubquery.h> |
| 30 | #include <Parsers/ASTTablesInSelectQuery.h> |
| 31 | |
| 32 | #include <Interpreters/ExpressionActions.h> |
| 33 | #include <Interpreters/misc.h> |
| 34 | #include <Interpreters/ActionsVisitor.h> |
| 35 | #include <Interpreters/InterpreterSelectWithUnionQuery.h> |
| 36 | #include <Interpreters/Set.h> |
| 37 | #include <Interpreters/evaluateConstantExpression.h> |
| 38 | #include <Interpreters/convertFieldToType.h> |
| 39 | #include <Interpreters/interpretSubquery.h> |
| 40 | #include <Interpreters/DatabaseAndTableWithAlias.h> |
| 41 | #include <Interpreters/IdentifierSemantic.h> |
| 42 | |
| 43 | namespace DB |
| 44 | { |
| 45 | |
| 46 | namespace ErrorCodes |
| 47 | { |
| 48 | extern const int UNKNOWN_IDENTIFIER; |
| 49 | extern const int NOT_AN_AGGREGATE; |
| 50 | extern const int UNEXPECTED_EXPRESSION; |
| 51 | extern const int TYPE_MISMATCH; |
| 52 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; |
| 53 | } |
| 54 | |
| 55 | static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) |
| 56 | { |
| 57 | return std::find_if(cols.begin(), cols.end(), |
| 58 | [&](const NamesAndTypesList::value_type & val) { return val.name == name; }); |
| 59 | } |
| 60 | |
| 61 | SetPtr makeExplicitSet( |
| 62 | const ASTFunction * node, const Block & sample_block, bool create_ordered_set, |
| 63 | const Context & context, const SizeLimits & size_limits, PreparedSets & prepared_sets) |
| 64 | { |
| 65 | const IAST & args = *node->arguments; |
| 66 | |
| 67 | if (args.children.size() != 2) |
| 68 | throw Exception("Wrong number of arguments passed to function in" , ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); |
| 69 | |
| 70 | const ASTPtr & left_arg = args.children.at(0); |
| 71 | const ASTPtr & right_arg = args.children.at(1); |
| 72 | |
| 73 | const DataTypePtr & left_arg_type = sample_block.getByName(left_arg->getColumnName()).type; |
| 74 | |
| 75 | DataTypes set_element_types = {left_arg_type}; |
| 76 | auto left_tuple_type = typeid_cast<const DataTypeTuple *>(left_arg_type.get()); |
| 77 | if (left_tuple_type && left_tuple_type->getElements().size() != 1) |
| 78 | set_element_types = left_tuple_type->getElements(); |
| 79 | |
| 80 | for (auto & element_type : set_element_types) |
| 81 | if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(element_type.get())) |
| 82 | element_type = low_cardinality_type->getDictionaryType(); |
| 83 | |
| 84 | auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types); |
| 85 | if (prepared_sets.count(set_key)) |
| 86 | return prepared_sets.at(set_key); /// Already prepared. |
| 87 | |
| 88 | auto getTupleTypeFromAst = [&context](const ASTPtr & tuple_ast) -> DataTypePtr |
| 89 | { |
| 90 | const auto * func = tuple_ast->as<ASTFunction>(); |
| 91 | if (func && func->name == "tuple" && !func->arguments->children.empty()) |
| 92 | { |
| 93 | /// Won't parse all values of outer tuple. |
| 94 | auto element = func->arguments->children.at(0); |
| 95 | std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(element, context); |
| 96 | return std::make_shared<DataTypeTuple>(DataTypes({value_raw.second})); |
| 97 | } |
| 98 | |
| 99 | return evaluateConstantExpression(tuple_ast, context).second; |
| 100 | }; |
| 101 | |
| 102 | const DataTypePtr & right_arg_type = getTupleTypeFromAst(right_arg); |
| 103 | |
| 104 | std::function<size_t(const DataTypePtr &)> getTupleDepth; |
| 105 | getTupleDepth = [&getTupleDepth](const DataTypePtr & type) -> size_t |
| 106 | { |
| 107 | if (auto tuple_type = typeid_cast<const DataTypeTuple *>(type.get())) |
| 108 | return 1 + (tuple_type->getElements().empty() ? 0 : getTupleDepth(tuple_type->getElements().at(0))); |
| 109 | |
| 110 | return 0; |
| 111 | }; |
| 112 | |
| 113 | size_t left_tuple_depth = getTupleDepth(left_arg_type); |
| 114 | size_t right_tuple_depth = getTupleDepth(right_arg_type); |
| 115 | |
| 116 | ASTPtr elements_ast = nullptr; |
| 117 | |
| 118 | /// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc. |
| 119 | if (left_tuple_depth == right_tuple_depth) |
| 120 | { |
| 121 | ASTPtr exp_list = std::make_shared<ASTExpressionList>(); |
| 122 | exp_list->children.push_back(right_arg); |
| 123 | elements_ast = exp_list; |
| 124 | } |
| 125 | /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc. |
| 126 | else if (left_tuple_depth + 1 == right_tuple_depth) |
| 127 | { |
| 128 | const auto * set_func = right_arg->as<ASTFunction>(); |
| 129 | |
| 130 | if (!set_func || set_func->name != "tuple" ) |
| 131 | throw Exception("Incorrect type of 2nd argument for function " + node->name |
| 132 | + ". Must be subquery or set of elements with type " + left_arg_type->getName() + "." , |
| 133 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
| 134 | |
| 135 | elements_ast = set_func->arguments; |
| 136 | } |
| 137 | else |
| 138 | throw Exception("Invalid types for IN function: " |
| 139 | + left_arg_type->getName() + " and " + right_arg_type->getName() + "." , |
| 140 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
| 141 | |
| 142 | SetPtr set = std::make_shared<Set>(size_limits, create_ordered_set); |
| 143 | set->createFromAST(set_element_types, elements_ast, context); |
| 144 | prepared_sets[set_key] = set; |
| 145 | return set; |
| 146 | } |
| 147 | |
| 148 | static String getUniqueName(const Block & block, const String & prefix) |
| 149 | { |
| 150 | int i = 1; |
| 151 | while (block.has(prefix + toString(i))) |
| 152 | ++i; |
| 153 | return prefix + toString(i); |
| 154 | } |
| 155 | |
| 156 | ScopeStack::ScopeStack(const ExpressionActionsPtr & actions, const Context & context_) |
| 157 | : context(context_) |
| 158 | { |
| 159 | stack.emplace_back(); |
| 160 | stack.back().actions = actions; |
| 161 | |
| 162 | const Block & sample_block = actions->getSampleBlock(); |
| 163 | for (size_t i = 0, size = sample_block.columns(); i < size; ++i) |
| 164 | stack.back().new_columns.insert(sample_block.getByPosition(i).name); |
| 165 | } |
| 166 | |
| 167 | void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) |
| 168 | { |
| 169 | stack.emplace_back(); |
| 170 | Level & prev = stack[stack.size() - 2]; |
| 171 | |
| 172 | ColumnsWithTypeAndName all_columns; |
| 173 | NameSet new_names; |
| 174 | |
| 175 | for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it) |
| 176 | { |
| 177 | all_columns.emplace_back(nullptr, it->type, it->name); |
| 178 | new_names.insert(it->name); |
| 179 | stack.back().new_columns.insert(it->name); |
| 180 | } |
| 181 | |
| 182 | const Block & prev_sample_block = prev.actions->getSampleBlock(); |
| 183 | for (size_t i = 0, size = prev_sample_block.columns(); i < size; ++i) |
| 184 | { |
| 185 | const ColumnWithTypeAndName & col = prev_sample_block.getByPosition(i); |
| 186 | if (!new_names.count(col.name)) |
| 187 | all_columns.push_back(col); |
| 188 | } |
| 189 | |
| 190 | stack.back().actions = std::make_shared<ExpressionActions>(all_columns, context); |
| 191 | } |
| 192 | |
| 193 | size_t ScopeStack::getColumnLevel(const std::string & name) |
| 194 | { |
| 195 | for (int i = static_cast<int>(stack.size()) - 1; i >= 0; --i) |
| 196 | if (stack[i].new_columns.count(name)) |
| 197 | return i; |
| 198 | |
| 199 | throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); |
| 200 | } |
| 201 | |
| 202 | void ScopeStack::addAction(const ExpressionAction & action) |
| 203 | { |
| 204 | size_t level = 0; |
| 205 | Names required = action.getNeededColumns(); |
| 206 | for (size_t i = 0; i < required.size(); ++i) |
| 207 | level = std::max(level, getColumnLevel(required[i])); |
| 208 | |
| 209 | Names added; |
| 210 | stack[level].actions->add(action, added); |
| 211 | |
| 212 | stack[level].new_columns.insert(added.begin(), added.end()); |
| 213 | |
| 214 | for (size_t i = 0; i < added.size(); ++i) |
| 215 | { |
| 216 | const ColumnWithTypeAndName & col = stack[level].actions->getSampleBlock().getByName(added[i]); |
| 217 | for (size_t j = level + 1; j < stack.size(); ++j) |
| 218 | stack[j].actions->addInput(col); |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | ExpressionActionsPtr ScopeStack::popLevel() |
| 223 | { |
| 224 | ExpressionActionsPtr res = stack.back().actions; |
| 225 | stack.pop_back(); |
| 226 | return res; |
| 227 | } |
| 228 | |
| 229 | const Block & ScopeStack::getSampleBlock() const |
| 230 | { |
| 231 | return stack.back().actions->getSampleBlock(); |
| 232 | } |
| 233 | |
| 234 | struct CachedColumnName |
| 235 | { |
| 236 | String cached; |
| 237 | |
| 238 | const String & get(const ASTPtr & ast) |
| 239 | { |
| 240 | if (cached.empty()) |
| 241 | cached = ast->getColumnName(); |
| 242 | return cached; |
| 243 | } |
| 244 | }; |
| 245 | |
| 246 | bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child) |
| 247 | { |
| 248 | /// Visit children themself |
| 249 | if (node->as<ASTIdentifier>() || |
| 250 | node->as<ASTFunction>() || |
| 251 | node->as<ASTLiteral>()) |
| 252 | return false; |
| 253 | |
| 254 | /// Do not go to FROM, JOIN, UNION. |
| 255 | if (child->as<ASTTableExpression>() || |
| 256 | child->as<ASTSelectQuery>()) |
| 257 | return false; |
| 258 | |
| 259 | return true; |
| 260 | } |
| 261 | |
| 262 | void ActionsMatcher::visit(const ASTPtr & ast, Data & data) |
| 263 | { |
| 264 | if (const auto * identifier = ast->as<ASTIdentifier>()) |
| 265 | visit(*identifier, ast, data); |
| 266 | else if (const auto * node = ast->as<ASTFunction>()) |
| 267 | visit(*node, ast, data); |
| 268 | else if (const auto * literal = ast->as<ASTLiteral>()) |
| 269 | visit(*literal, ast, data); |
| 270 | } |
| 271 | |
| 272 | void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr & ast, Data & data) |
| 273 | { |
| 274 | CachedColumnName column_name; |
| 275 | if (data.hasColumn(column_name.get(ast))) |
| 276 | return; |
| 277 | |
| 278 | if (!data.only_consts) |
| 279 | { |
| 280 | /// The requested column is not in the block. |
| 281 | /// If such a column exists in the table, then the user probably forgot to surround it with an aggregate function or add it to GROUP BY. |
| 282 | |
| 283 | bool found = false; |
| 284 | for (const auto & column_name_type : data.source_columns) |
| 285 | if (column_name_type.name == column_name.get(ast)) |
| 286 | found = true; |
| 287 | |
| 288 | if (found) |
| 289 | throw Exception("Column " + column_name.get(ast) + " is not under aggregate function and not in GROUP BY." , |
| 290 | ErrorCodes::NOT_AN_AGGREGATE); |
| 291 | |
| 292 | /// Special check for WITH statement alias. Add alias action to be able to use this alias. |
| 293 | if (identifier.prefer_alias_to_column_name && !identifier.alias.empty()) |
| 294 | data.addAction(ExpressionAction::addAliases({{identifier.name, identifier.alias}})); |
| 295 | } |
| 296 | } |
| 297 | |
| 298 | void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data) |
| 299 | { |
| 300 | CachedColumnName column_name; |
| 301 | if (data.hasColumn(column_name.get(ast))) |
| 302 | return; |
| 303 | |
| 304 | if (node.name == "lambda" ) |
| 305 | throw Exception("Unexpected lambda expression" , ErrorCodes::UNEXPECTED_EXPRESSION); |
| 306 | |
| 307 | /// Function arrayJoin. |
| 308 | if (node.name == "arrayJoin" ) |
| 309 | { |
| 310 | if (node.arguments->children.size() != 1) |
| 311 | throw Exception("arrayJoin requires exactly 1 argument" , ErrorCodes::TYPE_MISMATCH); |
| 312 | |
| 313 | ASTPtr arg = node.arguments->children.at(0); |
| 314 | visit(arg, data); |
| 315 | if (!data.only_consts) |
| 316 | { |
| 317 | String result_name = column_name.get(ast); |
| 318 | data.addAction(ExpressionAction::copyColumn(arg->getColumnName(), result_name)); |
| 319 | NameSet joined_columns; |
| 320 | joined_columns.insert(result_name); |
| 321 | data.addAction(ExpressionAction::arrayJoin(joined_columns, false, data.context)); |
| 322 | } |
| 323 | |
| 324 | return; |
| 325 | } |
| 326 | |
| 327 | SetPtr prepared_set; |
| 328 | if (functionIsInOrGlobalInOperator(node.name)) |
| 329 | { |
| 330 | /// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything). |
| 331 | visit(node.arguments->children.at(0), data); |
| 332 | |
| 333 | if ((prepared_set = makeSet(node, data, data.no_subqueries))) |
| 334 | { |
| 335 | /// Transform tuple or subquery into a set. |
| 336 | } |
| 337 | else |
| 338 | { |
| 339 | if (!data.only_consts) |
| 340 | { |
| 341 | /// We are in the part of the tree that we are not going to compute. You just need to define types. |
| 342 | /// Do not subquery and create sets. We treat "IN" as "ignoreExceptNull" function. |
| 343 | |
| 344 | data.addAction(ExpressionAction::applyFunction( |
| 345 | FunctionFactory::instance().get("ignoreExceptNull" , data.context), |
| 346 | { node.arguments->children.at(0)->getColumnName() }, |
| 347 | column_name.get(ast))); |
| 348 | } |
| 349 | return; |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | /// A special function `indexHint`. Everything that is inside it is not calculated |
| 354 | /// (and is used only for index analysis, see KeyCondition). |
| 355 | if (node.name == "indexHint" ) |
| 356 | { |
| 357 | data.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName( |
| 358 | ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared<DataTypeUInt8>(), |
| 359 | column_name.get(ast)))); |
| 360 | return; |
| 361 | } |
| 362 | |
| 363 | if (AggregateFunctionFactory::instance().isAggregateFunctionName(node.name)) |
| 364 | return; |
| 365 | |
| 366 | /// Context object that we pass to function should live during query. |
| 367 | const Context & function_context = data.context.hasQueryContext() |
| 368 | ? data.context.getQueryContext() |
| 369 | : data.context; |
| 370 | |
| 371 | FunctionOverloadResolverPtr function_builder; |
| 372 | try |
| 373 | { |
| 374 | function_builder = FunctionFactory::instance().get(node.name, function_context); |
| 375 | } |
| 376 | catch (DB::Exception & e) |
| 377 | { |
| 378 | auto hints = AggregateFunctionFactory::instance().getHints(node.name); |
| 379 | if (!hints.empty()) |
| 380 | e.addMessage("Or unknown aggregate function " + node.name + ". Maybe you meant: " + toString(hints)); |
| 381 | e.rethrow(); |
| 382 | } |
| 383 | |
| 384 | Names argument_names; |
| 385 | DataTypes argument_types; |
| 386 | bool arguments_present = true; |
| 387 | |
| 388 | /// If the function has an argument-lambda expression, you need to determine its type before the recursive call. |
| 389 | bool has_lambda_arguments = false; |
| 390 | |
| 391 | for (size_t arg = 0; arg < node.arguments->children.size(); ++arg) |
| 392 | { |
| 393 | auto & child = node.arguments->children[arg]; |
| 394 | auto child_column_name = child->getColumnName(); |
| 395 | |
| 396 | const auto * lambda = child->as<ASTFunction>(); |
| 397 | const auto * identifier = child->as<ASTIdentifier>(); |
| 398 | if (lambda && lambda->name == "lambda" ) |
| 399 | { |
| 400 | /// If the argument is a lambda expression, just remember its approximate type. |
| 401 | if (lambda->arguments->children.size() != 2) |
| 402 | throw Exception("lambda requires two arguments" , ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); |
| 403 | |
| 404 | const auto * lambda_args_tuple = lambda->arguments->children.at(0)->as<ASTFunction>(); |
| 405 | |
| 406 | if (!lambda_args_tuple || lambda_args_tuple->name != "tuple" ) |
| 407 | throw Exception("First argument of lambda must be a tuple" , ErrorCodes::TYPE_MISMATCH); |
| 408 | |
| 409 | has_lambda_arguments = true; |
| 410 | argument_types.emplace_back(std::make_shared<DataTypeFunction>(DataTypes(lambda_args_tuple->arguments->children.size()))); |
| 411 | /// Select the name in the next cycle. |
| 412 | argument_names.emplace_back(); |
| 413 | } |
| 414 | else if (functionIsInOrGlobalInOperator(node.name) && arg == 1 && prepared_set) |
| 415 | { |
| 416 | ColumnWithTypeAndName column; |
| 417 | column.type = std::make_shared<DataTypeSet>(); |
| 418 | |
| 419 | /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, |
| 420 | /// so that sets with the same literal representation do not fuse together (they can have different types). |
| 421 | if (!prepared_set->empty()) |
| 422 | column.name = getUniqueName(data.getSampleBlock(), "__set" ); |
| 423 | else |
| 424 | column.name = child_column_name; |
| 425 | |
| 426 | if (!data.hasColumn(column.name)) |
| 427 | { |
| 428 | auto column_set = ColumnSet::create(1, prepared_set); |
| 429 | /// If prepared_set is not empty, we have a set made with literals. |
| 430 | /// Create a const ColumnSet to make constant folding work |
| 431 | if (!prepared_set->empty()) |
| 432 | column.column = ColumnConst::create(std::move(column_set), 1); |
| 433 | else |
| 434 | column.column = std::move(column_set); |
| 435 | data.addAction(ExpressionAction::addColumn(column)); |
| 436 | } |
| 437 | |
| 438 | argument_types.push_back(column.type); |
| 439 | argument_names.push_back(column.name); |
| 440 | } |
| 441 | else if (identifier && node.name == "joinGet" && arg == 0) |
| 442 | { |
| 443 | String database_name; |
| 444 | String table_name; |
| 445 | std::tie(database_name, table_name) = IdentifierSemantic::extractDatabaseAndTable(*identifier); |
| 446 | if (database_name.empty()) |
| 447 | database_name = data.context.getCurrentDatabase(); |
| 448 | auto column_string = ColumnString::create(); |
| 449 | column_string->insert(database_name + "." + table_name); |
| 450 | ColumnWithTypeAndName column( |
| 451 | ColumnConst::create(std::move(column_string), 1), |
| 452 | std::make_shared<DataTypeString>(), |
| 453 | getUniqueName(data.getSampleBlock(), "__joinGet" )); |
| 454 | data.addAction(ExpressionAction::addColumn(column)); |
| 455 | argument_types.push_back(column.type); |
| 456 | argument_names.push_back(column.name); |
| 457 | } |
| 458 | else |
| 459 | { |
| 460 | /// If the argument is not a lambda expression, call it recursively and find out its type. |
| 461 | visit(child, data); |
| 462 | std::string name = child_column_name; |
| 463 | if (data.hasColumn(name)) |
| 464 | { |
| 465 | argument_types.push_back(data.getSampleBlock().getByName(name).type); |
| 466 | argument_names.push_back(name); |
| 467 | } |
| 468 | else |
| 469 | { |
| 470 | if (data.only_consts) |
| 471 | arguments_present = false; |
| 472 | else |
| 473 | throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); |
| 474 | } |
| 475 | } |
| 476 | } |
| 477 | |
| 478 | if (data.only_consts && !arguments_present) |
| 479 | return; |
| 480 | |
| 481 | if (has_lambda_arguments && !data.only_consts) |
| 482 | { |
| 483 | function_builder->getLambdaArgumentTypes(argument_types); |
| 484 | |
| 485 | /// Call recursively for lambda expressions. |
| 486 | for (size_t i = 0; i < node.arguments->children.size(); ++i) |
| 487 | { |
| 488 | ASTPtr child = node.arguments->children[i]; |
| 489 | |
| 490 | const auto * lambda = child->as<ASTFunction>(); |
| 491 | if (lambda && lambda->name == "lambda" ) |
| 492 | { |
| 493 | const DataTypeFunction * lambda_type = typeid_cast<const DataTypeFunction *>(argument_types[i].get()); |
| 494 | const auto * lambda_args_tuple = lambda->arguments->children.at(0)->as<ASTFunction>(); |
| 495 | const ASTs & lambda_arg_asts = lambda_args_tuple->arguments->children; |
| 496 | NamesAndTypesList lambda_arguments; |
| 497 | |
| 498 | for (size_t j = 0; j < lambda_arg_asts.size(); ++j) |
| 499 | { |
| 500 | auto opt_arg_name = tryGetIdentifierName(lambda_arg_asts[j]); |
| 501 | if (!opt_arg_name) |
| 502 | throw Exception("lambda argument declarations must be identifiers" , ErrorCodes::TYPE_MISMATCH); |
| 503 | |
| 504 | lambda_arguments.emplace_back(*opt_arg_name, lambda_type->getArgumentTypes()[j]); |
| 505 | } |
| 506 | |
| 507 | data.actions_stack.pushLevel(lambda_arguments); |
| 508 | visit(lambda->arguments->children.at(1), data); |
| 509 | ExpressionActionsPtr lambda_actions = data.actions_stack.popLevel(); |
| 510 | |
| 511 | String result_name = lambda->arguments->children.at(1)->getColumnName(); |
| 512 | lambda_actions->finalize(Names(1, result_name)); |
| 513 | DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; |
| 514 | |
| 515 | Names captured; |
| 516 | Names required = lambda_actions->getRequiredColumns(); |
| 517 | for (const auto & required_arg : required) |
| 518 | if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end()) |
| 519 | captured.push_back(required_arg); |
| 520 | |
| 521 | /// We can not name `getColumnName()`, |
| 522 | /// because it does not uniquely define the expression (the types of arguments can be different). |
| 523 | String lambda_name = getUniqueName(data.getSampleBlock(), "__lambda" ); |
| 524 | |
| 525 | auto function_capture = std::make_unique<FunctionCaptureOverloadResolver>( |
| 526 | lambda_actions, captured, lambda_arguments, result_type, result_name); |
| 527 | auto function_capture_adapter = std::make_shared<FunctionOverloadResolverAdaptor>(std::move(function_capture)); |
| 528 | data.addAction(ExpressionAction::applyFunction(function_capture_adapter, captured, lambda_name)); |
| 529 | |
| 530 | argument_types[i] = std::make_shared<DataTypeFunction>(lambda_type->getArgumentTypes(), result_type); |
| 531 | argument_names[i] = lambda_name; |
| 532 | } |
| 533 | } |
| 534 | } |
| 535 | |
| 536 | if (data.only_consts) |
| 537 | { |
| 538 | for (const auto & argument_name : argument_names) |
| 539 | { |
| 540 | if (!data.hasColumn(argument_name)) |
| 541 | { |
| 542 | arguments_present = false; |
| 543 | break; |
| 544 | } |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | if (arguments_present) |
| 549 | { |
| 550 | data.addAction(ExpressionAction::applyFunction(function_builder, argument_names, column_name.get(ast))); |
| 551 | } |
| 552 | } |
| 553 | |
| 554 | void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & ast, Data & data) |
| 555 | { |
| 556 | CachedColumnName column_name; |
| 557 | if (data.hasColumn(column_name.get(ast))) |
| 558 | return; |
| 559 | |
| 560 | DataTypePtr type = applyVisitor(FieldToDataType(), literal.value); |
| 561 | |
| 562 | ColumnWithTypeAndName column; |
| 563 | column.column = type->createColumnConst(1, convertFieldToType(literal.value, *type)); |
| 564 | column.type = type; |
| 565 | column.name = column_name.get(ast); |
| 566 | |
| 567 | data.addAction(ExpressionAction::addColumn(column)); |
| 568 | } |
| 569 | |
| 570 | SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_subqueries) |
| 571 | { |
| 572 | /** You need to convert the right argument to a set. |
| 573 | * This can be a table name, a value, a value enumeration, or a subquery. |
| 574 | * The enumeration of values is parsed as a function `tuple`. |
| 575 | */ |
| 576 | const IAST & args = *node.arguments; |
| 577 | const ASTPtr & left_in_operand = args.children.at(0); |
| 578 | const ASTPtr & right_in_operand = args.children.at(1); |
| 579 | const Block & sample_block = data.getSampleBlock(); |
| 580 | |
| 581 | /// If the subquery or table name for SELECT. |
| 582 | const auto * identifier = right_in_operand->as<ASTIdentifier>(); |
| 583 | if (right_in_operand->as<ASTSubquery>() || identifier) |
| 584 | { |
| 585 | if (no_subqueries) |
| 586 | return {}; |
| 587 | auto set_key = PreparedSetKey::forSubquery(*right_in_operand); |
| 588 | if (data.prepared_sets.count(set_key)) |
| 589 | return data.prepared_sets.at(set_key); |
| 590 | |
| 591 | /// A special case is if the name of the table is specified on the right side of the IN statement, |
| 592 | /// and the table has the type Set (a previously prepared set). |
| 593 | if (identifier) |
| 594 | { |
| 595 | DatabaseAndTableWithAlias database_table(*identifier); |
| 596 | StoragePtr table = data.context.tryGetTable(database_table.database, database_table.table); |
| 597 | |
| 598 | if (table) |
| 599 | { |
| 600 | StorageSet * storage_set = dynamic_cast<StorageSet *>(table.get()); |
| 601 | if (storage_set) |
| 602 | { |
| 603 | data.prepared_sets[set_key] = storage_set->getSet(); |
| 604 | return storage_set->getSet(); |
| 605 | } |
| 606 | } |
| 607 | } |
| 608 | |
| 609 | /// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery. |
| 610 | String set_id = right_in_operand->getColumnName(); |
| 611 | |
| 612 | SubqueryForSet & subquery_for_set = data.subqueries_for_sets[set_id]; |
| 613 | |
| 614 | /// If you already created a Set with the same subquery / table. |
| 615 | if (subquery_for_set.set) |
| 616 | { |
| 617 | data.prepared_sets[set_key] = subquery_for_set.set; |
| 618 | return subquery_for_set.set; |
| 619 | } |
| 620 | |
| 621 | SetPtr set = std::make_shared<Set>(data.set_size_limit, false); |
| 622 | |
| 623 | /** The following happens for GLOBAL INs: |
| 624 | * - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1, |
| 625 | * in the subquery_for_set object, this subquery is set as source and the temporary table _data1 as the table. |
| 626 | * - this function shows the expression IN_data1. |
| 627 | */ |
| 628 | if (!subquery_for_set.source && data.no_storage_or_local) |
| 629 | { |
| 630 | auto interpreter = interpretSubquery(right_in_operand, data.context, data.subquery_depth, {}); |
| 631 | subquery_for_set.source = std::make_shared<LazyBlockInputStream>( |
| 632 | interpreter->getSampleBlock(), [interpreter]() mutable { return interpreter->execute().in; }); |
| 633 | |
| 634 | /** Why is LazyBlockInputStream used? |
| 635 | * |
| 636 | * The fact is that when processing a query of the form |
| 637 | * SELECT ... FROM remote_test WHERE column GLOBAL IN (subquery), |
| 638 | * if the distributed remote_test table contains localhost as one of the servers, |
| 639 | * the query will be interpreted locally again (and not sent over TCP, as in the case of a remote server). |
| 640 | * |
| 641 | * The query execution pipeline will be: |
| 642 | * CreatingSets |
| 643 | * subquery execution, filling the temporary table with _data1 (1) |
| 644 | * CreatingSets |
| 645 | * reading from the table _data1, creating the set (2) |
| 646 | * read from the table subordinate to remote_test. |
| 647 | * |
| 648 | * (The second part of the pipeline under CreateSets is a reinterpretation of the query inside StorageDistributed, |
| 649 | * the query differs in that the database name and tables are replaced with subordinates, and the subquery is replaced with _data1.) |
| 650 | * |
| 651 | * But when creating the pipeline, when creating the source (2), it will be found that the _data1 table is empty |
| 652 | * (because the query has not started yet), and empty source will be returned as the source. |
| 653 | * And then, when the query is executed, an empty set will be created in step (2). |
| 654 | * |
| 655 | * Therefore, we make the initialization of step (2) lazy |
| 656 | * - so that it does not occur until step (1) is completed, on which the table will be populated. |
| 657 | * |
| 658 | * Note: this solution is not very good, you need to think better. |
| 659 | */ |
| 660 | } |
| 661 | |
| 662 | subquery_for_set.set = set; |
| 663 | data.prepared_sets[set_key] = set; |
| 664 | return set; |
| 665 | } |
| 666 | else |
| 667 | { |
| 668 | if (sample_block.has(left_in_operand->getColumnName())) |
| 669 | /// An explicit enumeration of values in parentheses. |
| 670 | return makeExplicitSet(&node, sample_block, false, data.context, data.set_size_limit, data.prepared_sets); |
| 671 | else |
| 672 | return {}; |
| 673 | } |
| 674 | } |
| 675 | |
| 676 | } |
| 677 | |