1 | #include <Common/typeid_cast.h> |
2 | #include <Common/PODArray.h> |
3 | |
4 | #include <Functions/FunctionFactory.h> |
5 | #include <Functions/FunctionsMiscellaneous.h> |
6 | |
7 | #include <AggregateFunctions/AggregateFunctionFactory.h> |
8 | |
9 | #include <DataTypes/DataTypeSet.h> |
10 | #include <DataTypes/DataTypesNumber.h> |
11 | #include <DataTypes/DataTypeFunction.h> |
12 | #include <DataTypes/DataTypeString.h> |
13 | #include <DataTypes/DataTypeTuple.h> |
14 | #include <DataTypes/DataTypeLowCardinality.h> |
15 | #include <DataTypes/FieldToDataType.h> |
16 | |
17 | #include <DataStreams/LazyBlockInputStream.h> |
18 | |
19 | #include <Columns/ColumnSet.h> |
20 | #include <Columns/ColumnConst.h> |
21 | #include <Columns/ColumnsNumber.h> |
22 | |
23 | #include <Storages/StorageSet.h> |
24 | |
25 | #include <Parsers/ASTFunction.h> |
26 | #include <Parsers/ASTIdentifier.h> |
27 | #include <Parsers/ASTLiteral.h> |
28 | #include <Parsers/ASTSelectQuery.h> |
29 | #include <Parsers/ASTSubquery.h> |
30 | #include <Parsers/ASTTablesInSelectQuery.h> |
31 | |
32 | #include <Interpreters/ExpressionActions.h> |
33 | #include <Interpreters/misc.h> |
34 | #include <Interpreters/ActionsVisitor.h> |
35 | #include <Interpreters/InterpreterSelectWithUnionQuery.h> |
36 | #include <Interpreters/Set.h> |
37 | #include <Interpreters/evaluateConstantExpression.h> |
38 | #include <Interpreters/convertFieldToType.h> |
39 | #include <Interpreters/interpretSubquery.h> |
40 | #include <Interpreters/DatabaseAndTableWithAlias.h> |
41 | #include <Interpreters/IdentifierSemantic.h> |
42 | |
43 | namespace DB |
44 | { |
45 | |
46 | namespace ErrorCodes |
47 | { |
48 | extern const int UNKNOWN_IDENTIFIER; |
49 | extern const int NOT_AN_AGGREGATE; |
50 | extern const int UNEXPECTED_EXPRESSION; |
51 | extern const int TYPE_MISMATCH; |
52 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; |
53 | } |
54 | |
55 | static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) |
56 | { |
57 | return std::find_if(cols.begin(), cols.end(), |
58 | [&](const NamesAndTypesList::value_type & val) { return val.name == name; }); |
59 | } |
60 | |
61 | SetPtr makeExplicitSet( |
62 | const ASTFunction * node, const Block & sample_block, bool create_ordered_set, |
63 | const Context & context, const SizeLimits & size_limits, PreparedSets & prepared_sets) |
64 | { |
65 | const IAST & args = *node->arguments; |
66 | |
67 | if (args.children.size() != 2) |
68 | throw Exception("Wrong number of arguments passed to function in" , ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); |
69 | |
70 | const ASTPtr & left_arg = args.children.at(0); |
71 | const ASTPtr & right_arg = args.children.at(1); |
72 | |
73 | const DataTypePtr & left_arg_type = sample_block.getByName(left_arg->getColumnName()).type; |
74 | |
75 | DataTypes set_element_types = {left_arg_type}; |
76 | auto left_tuple_type = typeid_cast<const DataTypeTuple *>(left_arg_type.get()); |
77 | if (left_tuple_type && left_tuple_type->getElements().size() != 1) |
78 | set_element_types = left_tuple_type->getElements(); |
79 | |
80 | for (auto & element_type : set_element_types) |
81 | if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(element_type.get())) |
82 | element_type = low_cardinality_type->getDictionaryType(); |
83 | |
84 | auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types); |
85 | if (prepared_sets.count(set_key)) |
86 | return prepared_sets.at(set_key); /// Already prepared. |
87 | |
88 | auto getTupleTypeFromAst = [&context](const ASTPtr & tuple_ast) -> DataTypePtr |
89 | { |
90 | const auto * func = tuple_ast->as<ASTFunction>(); |
91 | if (func && func->name == "tuple" && !func->arguments->children.empty()) |
92 | { |
93 | /// Won't parse all values of outer tuple. |
94 | auto element = func->arguments->children.at(0); |
95 | std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(element, context); |
96 | return std::make_shared<DataTypeTuple>(DataTypes({value_raw.second})); |
97 | } |
98 | |
99 | return evaluateConstantExpression(tuple_ast, context).second; |
100 | }; |
101 | |
102 | const DataTypePtr & right_arg_type = getTupleTypeFromAst(right_arg); |
103 | |
104 | std::function<size_t(const DataTypePtr &)> getTupleDepth; |
105 | getTupleDepth = [&getTupleDepth](const DataTypePtr & type) -> size_t |
106 | { |
107 | if (auto tuple_type = typeid_cast<const DataTypeTuple *>(type.get())) |
108 | return 1 + (tuple_type->getElements().empty() ? 0 : getTupleDepth(tuple_type->getElements().at(0))); |
109 | |
110 | return 0; |
111 | }; |
112 | |
113 | size_t left_tuple_depth = getTupleDepth(left_arg_type); |
114 | size_t right_tuple_depth = getTupleDepth(right_arg_type); |
115 | |
116 | ASTPtr elements_ast = nullptr; |
117 | |
118 | /// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc. |
119 | if (left_tuple_depth == right_tuple_depth) |
120 | { |
121 | ASTPtr exp_list = std::make_shared<ASTExpressionList>(); |
122 | exp_list->children.push_back(right_arg); |
123 | elements_ast = exp_list; |
124 | } |
125 | /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc. |
126 | else if (left_tuple_depth + 1 == right_tuple_depth) |
127 | { |
128 | const auto * set_func = right_arg->as<ASTFunction>(); |
129 | |
130 | if (!set_func || set_func->name != "tuple" ) |
131 | throw Exception("Incorrect type of 2nd argument for function " + node->name |
132 | + ". Must be subquery or set of elements with type " + left_arg_type->getName() + "." , |
133 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
134 | |
135 | elements_ast = set_func->arguments; |
136 | } |
137 | else |
138 | throw Exception("Invalid types for IN function: " |
139 | + left_arg_type->getName() + " and " + right_arg_type->getName() + "." , |
140 | ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); |
141 | |
142 | SetPtr set = std::make_shared<Set>(size_limits, create_ordered_set); |
143 | set->createFromAST(set_element_types, elements_ast, context); |
144 | prepared_sets[set_key] = set; |
145 | return set; |
146 | } |
147 | |
148 | static String getUniqueName(const Block & block, const String & prefix) |
149 | { |
150 | int i = 1; |
151 | while (block.has(prefix + toString(i))) |
152 | ++i; |
153 | return prefix + toString(i); |
154 | } |
155 | |
156 | ScopeStack::ScopeStack(const ExpressionActionsPtr & actions, const Context & context_) |
157 | : context(context_) |
158 | { |
159 | stack.emplace_back(); |
160 | stack.back().actions = actions; |
161 | |
162 | const Block & sample_block = actions->getSampleBlock(); |
163 | for (size_t i = 0, size = sample_block.columns(); i < size; ++i) |
164 | stack.back().new_columns.insert(sample_block.getByPosition(i).name); |
165 | } |
166 | |
167 | void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) |
168 | { |
169 | stack.emplace_back(); |
170 | Level & prev = stack[stack.size() - 2]; |
171 | |
172 | ColumnsWithTypeAndName all_columns; |
173 | NameSet new_names; |
174 | |
175 | for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it) |
176 | { |
177 | all_columns.emplace_back(nullptr, it->type, it->name); |
178 | new_names.insert(it->name); |
179 | stack.back().new_columns.insert(it->name); |
180 | } |
181 | |
182 | const Block & prev_sample_block = prev.actions->getSampleBlock(); |
183 | for (size_t i = 0, size = prev_sample_block.columns(); i < size; ++i) |
184 | { |
185 | const ColumnWithTypeAndName & col = prev_sample_block.getByPosition(i); |
186 | if (!new_names.count(col.name)) |
187 | all_columns.push_back(col); |
188 | } |
189 | |
190 | stack.back().actions = std::make_shared<ExpressionActions>(all_columns, context); |
191 | } |
192 | |
193 | size_t ScopeStack::getColumnLevel(const std::string & name) |
194 | { |
195 | for (int i = static_cast<int>(stack.size()) - 1; i >= 0; --i) |
196 | if (stack[i].new_columns.count(name)) |
197 | return i; |
198 | |
199 | throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); |
200 | } |
201 | |
202 | void ScopeStack::addAction(const ExpressionAction & action) |
203 | { |
204 | size_t level = 0; |
205 | Names required = action.getNeededColumns(); |
206 | for (size_t i = 0; i < required.size(); ++i) |
207 | level = std::max(level, getColumnLevel(required[i])); |
208 | |
209 | Names added; |
210 | stack[level].actions->add(action, added); |
211 | |
212 | stack[level].new_columns.insert(added.begin(), added.end()); |
213 | |
214 | for (size_t i = 0; i < added.size(); ++i) |
215 | { |
216 | const ColumnWithTypeAndName & col = stack[level].actions->getSampleBlock().getByName(added[i]); |
217 | for (size_t j = level + 1; j < stack.size(); ++j) |
218 | stack[j].actions->addInput(col); |
219 | } |
220 | } |
221 | |
222 | ExpressionActionsPtr ScopeStack::popLevel() |
223 | { |
224 | ExpressionActionsPtr res = stack.back().actions; |
225 | stack.pop_back(); |
226 | return res; |
227 | } |
228 | |
229 | const Block & ScopeStack::getSampleBlock() const |
230 | { |
231 | return stack.back().actions->getSampleBlock(); |
232 | } |
233 | |
234 | struct CachedColumnName |
235 | { |
236 | String cached; |
237 | |
238 | const String & get(const ASTPtr & ast) |
239 | { |
240 | if (cached.empty()) |
241 | cached = ast->getColumnName(); |
242 | return cached; |
243 | } |
244 | }; |
245 | |
246 | bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child) |
247 | { |
248 | /// Visit children themself |
249 | if (node->as<ASTIdentifier>() || |
250 | node->as<ASTFunction>() || |
251 | node->as<ASTLiteral>()) |
252 | return false; |
253 | |
254 | /// Do not go to FROM, JOIN, UNION. |
255 | if (child->as<ASTTableExpression>() || |
256 | child->as<ASTSelectQuery>()) |
257 | return false; |
258 | |
259 | return true; |
260 | } |
261 | |
262 | void ActionsMatcher::visit(const ASTPtr & ast, Data & data) |
263 | { |
264 | if (const auto * identifier = ast->as<ASTIdentifier>()) |
265 | visit(*identifier, ast, data); |
266 | else if (const auto * node = ast->as<ASTFunction>()) |
267 | visit(*node, ast, data); |
268 | else if (const auto * literal = ast->as<ASTLiteral>()) |
269 | visit(*literal, ast, data); |
270 | } |
271 | |
272 | void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr & ast, Data & data) |
273 | { |
274 | CachedColumnName column_name; |
275 | if (data.hasColumn(column_name.get(ast))) |
276 | return; |
277 | |
278 | if (!data.only_consts) |
279 | { |
280 | /// The requested column is not in the block. |
281 | /// If such a column exists in the table, then the user probably forgot to surround it with an aggregate function or add it to GROUP BY. |
282 | |
283 | bool found = false; |
284 | for (const auto & column_name_type : data.source_columns) |
285 | if (column_name_type.name == column_name.get(ast)) |
286 | found = true; |
287 | |
288 | if (found) |
289 | throw Exception("Column " + column_name.get(ast) + " is not under aggregate function and not in GROUP BY." , |
290 | ErrorCodes::NOT_AN_AGGREGATE); |
291 | |
292 | /// Special check for WITH statement alias. Add alias action to be able to use this alias. |
293 | if (identifier.prefer_alias_to_column_name && !identifier.alias.empty()) |
294 | data.addAction(ExpressionAction::addAliases({{identifier.name, identifier.alias}})); |
295 | } |
296 | } |
297 | |
298 | void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data) |
299 | { |
300 | CachedColumnName column_name; |
301 | if (data.hasColumn(column_name.get(ast))) |
302 | return; |
303 | |
304 | if (node.name == "lambda" ) |
305 | throw Exception("Unexpected lambda expression" , ErrorCodes::UNEXPECTED_EXPRESSION); |
306 | |
307 | /// Function arrayJoin. |
308 | if (node.name == "arrayJoin" ) |
309 | { |
310 | if (node.arguments->children.size() != 1) |
311 | throw Exception("arrayJoin requires exactly 1 argument" , ErrorCodes::TYPE_MISMATCH); |
312 | |
313 | ASTPtr arg = node.arguments->children.at(0); |
314 | visit(arg, data); |
315 | if (!data.only_consts) |
316 | { |
317 | String result_name = column_name.get(ast); |
318 | data.addAction(ExpressionAction::copyColumn(arg->getColumnName(), result_name)); |
319 | NameSet joined_columns; |
320 | joined_columns.insert(result_name); |
321 | data.addAction(ExpressionAction::arrayJoin(joined_columns, false, data.context)); |
322 | } |
323 | |
324 | return; |
325 | } |
326 | |
327 | SetPtr prepared_set; |
328 | if (functionIsInOrGlobalInOperator(node.name)) |
329 | { |
330 | /// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything). |
331 | visit(node.arguments->children.at(0), data); |
332 | |
333 | if ((prepared_set = makeSet(node, data, data.no_subqueries))) |
334 | { |
335 | /// Transform tuple or subquery into a set. |
336 | } |
337 | else |
338 | { |
339 | if (!data.only_consts) |
340 | { |
341 | /// We are in the part of the tree that we are not going to compute. You just need to define types. |
342 | /// Do not subquery and create sets. We treat "IN" as "ignoreExceptNull" function. |
343 | |
344 | data.addAction(ExpressionAction::applyFunction( |
345 | FunctionFactory::instance().get("ignoreExceptNull" , data.context), |
346 | { node.arguments->children.at(0)->getColumnName() }, |
347 | column_name.get(ast))); |
348 | } |
349 | return; |
350 | } |
351 | } |
352 | |
353 | /// A special function `indexHint`. Everything that is inside it is not calculated |
354 | /// (and is used only for index analysis, see KeyCondition). |
355 | if (node.name == "indexHint" ) |
356 | { |
357 | data.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName( |
358 | ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared<DataTypeUInt8>(), |
359 | column_name.get(ast)))); |
360 | return; |
361 | } |
362 | |
363 | if (AggregateFunctionFactory::instance().isAggregateFunctionName(node.name)) |
364 | return; |
365 | |
366 | /// Context object that we pass to function should live during query. |
367 | const Context & function_context = data.context.hasQueryContext() |
368 | ? data.context.getQueryContext() |
369 | : data.context; |
370 | |
371 | FunctionOverloadResolverPtr function_builder; |
372 | try |
373 | { |
374 | function_builder = FunctionFactory::instance().get(node.name, function_context); |
375 | } |
376 | catch (DB::Exception & e) |
377 | { |
378 | auto hints = AggregateFunctionFactory::instance().getHints(node.name); |
379 | if (!hints.empty()) |
380 | e.addMessage("Or unknown aggregate function " + node.name + ". Maybe you meant: " + toString(hints)); |
381 | e.rethrow(); |
382 | } |
383 | |
384 | Names argument_names; |
385 | DataTypes argument_types; |
386 | bool arguments_present = true; |
387 | |
388 | /// If the function has an argument-lambda expression, you need to determine its type before the recursive call. |
389 | bool has_lambda_arguments = false; |
390 | |
391 | for (size_t arg = 0; arg < node.arguments->children.size(); ++arg) |
392 | { |
393 | auto & child = node.arguments->children[arg]; |
394 | auto child_column_name = child->getColumnName(); |
395 | |
396 | const auto * lambda = child->as<ASTFunction>(); |
397 | const auto * identifier = child->as<ASTIdentifier>(); |
398 | if (lambda && lambda->name == "lambda" ) |
399 | { |
400 | /// If the argument is a lambda expression, just remember its approximate type. |
401 | if (lambda->arguments->children.size() != 2) |
402 | throw Exception("lambda requires two arguments" , ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); |
403 | |
404 | const auto * lambda_args_tuple = lambda->arguments->children.at(0)->as<ASTFunction>(); |
405 | |
406 | if (!lambda_args_tuple || lambda_args_tuple->name != "tuple" ) |
407 | throw Exception("First argument of lambda must be a tuple" , ErrorCodes::TYPE_MISMATCH); |
408 | |
409 | has_lambda_arguments = true; |
410 | argument_types.emplace_back(std::make_shared<DataTypeFunction>(DataTypes(lambda_args_tuple->arguments->children.size()))); |
411 | /// Select the name in the next cycle. |
412 | argument_names.emplace_back(); |
413 | } |
414 | else if (functionIsInOrGlobalInOperator(node.name) && arg == 1 && prepared_set) |
415 | { |
416 | ColumnWithTypeAndName column; |
417 | column.type = std::make_shared<DataTypeSet>(); |
418 | |
419 | /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, |
420 | /// so that sets with the same literal representation do not fuse together (they can have different types). |
421 | if (!prepared_set->empty()) |
422 | column.name = getUniqueName(data.getSampleBlock(), "__set" ); |
423 | else |
424 | column.name = child_column_name; |
425 | |
426 | if (!data.hasColumn(column.name)) |
427 | { |
428 | auto column_set = ColumnSet::create(1, prepared_set); |
429 | /// If prepared_set is not empty, we have a set made with literals. |
430 | /// Create a const ColumnSet to make constant folding work |
431 | if (!prepared_set->empty()) |
432 | column.column = ColumnConst::create(std::move(column_set), 1); |
433 | else |
434 | column.column = std::move(column_set); |
435 | data.addAction(ExpressionAction::addColumn(column)); |
436 | } |
437 | |
438 | argument_types.push_back(column.type); |
439 | argument_names.push_back(column.name); |
440 | } |
441 | else if (identifier && node.name == "joinGet" && arg == 0) |
442 | { |
443 | String database_name; |
444 | String table_name; |
445 | std::tie(database_name, table_name) = IdentifierSemantic::extractDatabaseAndTable(*identifier); |
446 | if (database_name.empty()) |
447 | database_name = data.context.getCurrentDatabase(); |
448 | auto column_string = ColumnString::create(); |
449 | column_string->insert(database_name + "." + table_name); |
450 | ColumnWithTypeAndName column( |
451 | ColumnConst::create(std::move(column_string), 1), |
452 | std::make_shared<DataTypeString>(), |
453 | getUniqueName(data.getSampleBlock(), "__joinGet" )); |
454 | data.addAction(ExpressionAction::addColumn(column)); |
455 | argument_types.push_back(column.type); |
456 | argument_names.push_back(column.name); |
457 | } |
458 | else |
459 | { |
460 | /// If the argument is not a lambda expression, call it recursively and find out its type. |
461 | visit(child, data); |
462 | std::string name = child_column_name; |
463 | if (data.hasColumn(name)) |
464 | { |
465 | argument_types.push_back(data.getSampleBlock().getByName(name).type); |
466 | argument_names.push_back(name); |
467 | } |
468 | else |
469 | { |
470 | if (data.only_consts) |
471 | arguments_present = false; |
472 | else |
473 | throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); |
474 | } |
475 | } |
476 | } |
477 | |
478 | if (data.only_consts && !arguments_present) |
479 | return; |
480 | |
481 | if (has_lambda_arguments && !data.only_consts) |
482 | { |
483 | function_builder->getLambdaArgumentTypes(argument_types); |
484 | |
485 | /// Call recursively for lambda expressions. |
486 | for (size_t i = 0; i < node.arguments->children.size(); ++i) |
487 | { |
488 | ASTPtr child = node.arguments->children[i]; |
489 | |
490 | const auto * lambda = child->as<ASTFunction>(); |
491 | if (lambda && lambda->name == "lambda" ) |
492 | { |
493 | const DataTypeFunction * lambda_type = typeid_cast<const DataTypeFunction *>(argument_types[i].get()); |
494 | const auto * lambda_args_tuple = lambda->arguments->children.at(0)->as<ASTFunction>(); |
495 | const ASTs & lambda_arg_asts = lambda_args_tuple->arguments->children; |
496 | NamesAndTypesList lambda_arguments; |
497 | |
498 | for (size_t j = 0; j < lambda_arg_asts.size(); ++j) |
499 | { |
500 | auto opt_arg_name = tryGetIdentifierName(lambda_arg_asts[j]); |
501 | if (!opt_arg_name) |
502 | throw Exception("lambda argument declarations must be identifiers" , ErrorCodes::TYPE_MISMATCH); |
503 | |
504 | lambda_arguments.emplace_back(*opt_arg_name, lambda_type->getArgumentTypes()[j]); |
505 | } |
506 | |
507 | data.actions_stack.pushLevel(lambda_arguments); |
508 | visit(lambda->arguments->children.at(1), data); |
509 | ExpressionActionsPtr lambda_actions = data.actions_stack.popLevel(); |
510 | |
511 | String result_name = lambda->arguments->children.at(1)->getColumnName(); |
512 | lambda_actions->finalize(Names(1, result_name)); |
513 | DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; |
514 | |
515 | Names captured; |
516 | Names required = lambda_actions->getRequiredColumns(); |
517 | for (const auto & required_arg : required) |
518 | if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end()) |
519 | captured.push_back(required_arg); |
520 | |
521 | /// We can not name `getColumnName()`, |
522 | /// because it does not uniquely define the expression (the types of arguments can be different). |
523 | String lambda_name = getUniqueName(data.getSampleBlock(), "__lambda" ); |
524 | |
525 | auto function_capture = std::make_unique<FunctionCaptureOverloadResolver>( |
526 | lambda_actions, captured, lambda_arguments, result_type, result_name); |
527 | auto function_capture_adapter = std::make_shared<FunctionOverloadResolverAdaptor>(std::move(function_capture)); |
528 | data.addAction(ExpressionAction::applyFunction(function_capture_adapter, captured, lambda_name)); |
529 | |
530 | argument_types[i] = std::make_shared<DataTypeFunction>(lambda_type->getArgumentTypes(), result_type); |
531 | argument_names[i] = lambda_name; |
532 | } |
533 | } |
534 | } |
535 | |
536 | if (data.only_consts) |
537 | { |
538 | for (const auto & argument_name : argument_names) |
539 | { |
540 | if (!data.hasColumn(argument_name)) |
541 | { |
542 | arguments_present = false; |
543 | break; |
544 | } |
545 | } |
546 | } |
547 | |
548 | if (arguments_present) |
549 | { |
550 | data.addAction(ExpressionAction::applyFunction(function_builder, argument_names, column_name.get(ast))); |
551 | } |
552 | } |
553 | |
554 | void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & ast, Data & data) |
555 | { |
556 | CachedColumnName column_name; |
557 | if (data.hasColumn(column_name.get(ast))) |
558 | return; |
559 | |
560 | DataTypePtr type = applyVisitor(FieldToDataType(), literal.value); |
561 | |
562 | ColumnWithTypeAndName column; |
563 | column.column = type->createColumnConst(1, convertFieldToType(literal.value, *type)); |
564 | column.type = type; |
565 | column.name = column_name.get(ast); |
566 | |
567 | data.addAction(ExpressionAction::addColumn(column)); |
568 | } |
569 | |
570 | SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_subqueries) |
571 | { |
572 | /** You need to convert the right argument to a set. |
573 | * This can be a table name, a value, a value enumeration, or a subquery. |
574 | * The enumeration of values is parsed as a function `tuple`. |
575 | */ |
576 | const IAST & args = *node.arguments; |
577 | const ASTPtr & left_in_operand = args.children.at(0); |
578 | const ASTPtr & right_in_operand = args.children.at(1); |
579 | const Block & sample_block = data.getSampleBlock(); |
580 | |
581 | /// If the subquery or table name for SELECT. |
582 | const auto * identifier = right_in_operand->as<ASTIdentifier>(); |
583 | if (right_in_operand->as<ASTSubquery>() || identifier) |
584 | { |
585 | if (no_subqueries) |
586 | return {}; |
587 | auto set_key = PreparedSetKey::forSubquery(*right_in_operand); |
588 | if (data.prepared_sets.count(set_key)) |
589 | return data.prepared_sets.at(set_key); |
590 | |
591 | /// A special case is if the name of the table is specified on the right side of the IN statement, |
592 | /// and the table has the type Set (a previously prepared set). |
593 | if (identifier) |
594 | { |
595 | DatabaseAndTableWithAlias database_table(*identifier); |
596 | StoragePtr table = data.context.tryGetTable(database_table.database, database_table.table); |
597 | |
598 | if (table) |
599 | { |
600 | StorageSet * storage_set = dynamic_cast<StorageSet *>(table.get()); |
601 | if (storage_set) |
602 | { |
603 | data.prepared_sets[set_key] = storage_set->getSet(); |
604 | return storage_set->getSet(); |
605 | } |
606 | } |
607 | } |
608 | |
609 | /// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery. |
610 | String set_id = right_in_operand->getColumnName(); |
611 | |
612 | SubqueryForSet & subquery_for_set = data.subqueries_for_sets[set_id]; |
613 | |
614 | /// If you already created a Set with the same subquery / table. |
615 | if (subquery_for_set.set) |
616 | { |
617 | data.prepared_sets[set_key] = subquery_for_set.set; |
618 | return subquery_for_set.set; |
619 | } |
620 | |
621 | SetPtr set = std::make_shared<Set>(data.set_size_limit, false); |
622 | |
623 | /** The following happens for GLOBAL INs: |
624 | * - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1, |
625 | * in the subquery_for_set object, this subquery is set as source and the temporary table _data1 as the table. |
626 | * - this function shows the expression IN_data1. |
627 | */ |
628 | if (!subquery_for_set.source && data.no_storage_or_local) |
629 | { |
630 | auto interpreter = interpretSubquery(right_in_operand, data.context, data.subquery_depth, {}); |
631 | subquery_for_set.source = std::make_shared<LazyBlockInputStream>( |
632 | interpreter->getSampleBlock(), [interpreter]() mutable { return interpreter->execute().in; }); |
633 | |
634 | /** Why is LazyBlockInputStream used? |
635 | * |
636 | * The fact is that when processing a query of the form |
637 | * SELECT ... FROM remote_test WHERE column GLOBAL IN (subquery), |
638 | * if the distributed remote_test table contains localhost as one of the servers, |
639 | * the query will be interpreted locally again (and not sent over TCP, as in the case of a remote server). |
640 | * |
641 | * The query execution pipeline will be: |
642 | * CreatingSets |
643 | * subquery execution, filling the temporary table with _data1 (1) |
644 | * CreatingSets |
645 | * reading from the table _data1, creating the set (2) |
646 | * read from the table subordinate to remote_test. |
647 | * |
648 | * (The second part of the pipeline under CreateSets is a reinterpretation of the query inside StorageDistributed, |
649 | * the query differs in that the database name and tables are replaced with subordinates, and the subquery is replaced with _data1.) |
650 | * |
651 | * But when creating the pipeline, when creating the source (2), it will be found that the _data1 table is empty |
652 | * (because the query has not started yet), and empty source will be returned as the source. |
653 | * And then, when the query is executed, an empty set will be created in step (2). |
654 | * |
655 | * Therefore, we make the initialization of step (2) lazy |
656 | * - so that it does not occur until step (1) is completed, on which the table will be populated. |
657 | * |
658 | * Note: this solution is not very good, you need to think better. |
659 | */ |
660 | } |
661 | |
662 | subquery_for_set.set = set; |
663 | data.prepared_sets[set_key] = set; |
664 | return set; |
665 | } |
666 | else |
667 | { |
668 | if (sample_block.has(left_in_operand->getColumnName())) |
669 | /// An explicit enumeration of values in parentheses. |
670 | return makeExplicitSet(&node, sample_block, false, data.context, data.set_size_limit, data.prepared_sets); |
671 | else |
672 | return {}; |
673 | } |
674 | } |
675 | |
676 | } |
677 | |