ActionsVisitor.cpp source code [ClickHouse/dbms/src/Interpreters/ActionsVisitor.cpp]

1	#include <Common/typeid_cast.h>
2	#include <Common/PODArray.h>
3
4	#include <Functions/FunctionFactory.h>
5	#include <Functions/FunctionsMiscellaneous.h>
6
7	#include <AggregateFunctions/AggregateFunctionFactory.h>
8
9	#include <DataTypes/DataTypeSet.h>
10	#include <DataTypes/DataTypesNumber.h>
11	#include <DataTypes/DataTypeFunction.h>
12	#include <DataTypes/DataTypeString.h>
13	#include <DataTypes/DataTypeTuple.h>
14	#include <DataTypes/DataTypeLowCardinality.h>
15	#include <DataTypes/FieldToDataType.h>
16
17	#include <DataStreams/LazyBlockInputStream.h>
18
19	#include <Columns/ColumnSet.h>
20	#include <Columns/ColumnConst.h>
21	#include <Columns/ColumnsNumber.h>
22
23	#include <Storages/StorageSet.h>
24
25	#include <Parsers/ASTFunction.h>
26	#include <Parsers/ASTIdentifier.h>
27	#include <Parsers/ASTLiteral.h>
28	#include <Parsers/ASTSelectQuery.h>
29	#include <Parsers/ASTSubquery.h>
30	#include <Parsers/ASTTablesInSelectQuery.h>
31
32	#include <Interpreters/ExpressionActions.h>
33	#include <Interpreters/misc.h>
34	#include <Interpreters/ActionsVisitor.h>
35	#include <Interpreters/InterpreterSelectWithUnionQuery.h>
36	#include <Interpreters/Set.h>
37	#include <Interpreters/evaluateConstantExpression.h>
38	#include <Interpreters/convertFieldToType.h>
39	#include <Interpreters/interpretSubquery.h>
40	#include <Interpreters/DatabaseAndTableWithAlias.h>
41	#include <Interpreters/IdentifierSemantic.h>
42
43	namespace DB
44	{
45
46	namespace ErrorCodes
47	{
48	extern const int UNKNOWN_IDENTIFIER;
49	extern const int NOT_AN_AGGREGATE;
50	extern const int UNEXPECTED_EXPRESSION;
51	extern const int TYPE_MISMATCH;
52	extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
53	}
54
55	static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols)
56	{
57	return std::find_if(cols.begin(), cols.end(),
58	[&](const NamesAndTypesList::value_type & val) { return val.name == name; });
59	}
60
61	SetPtr makeExplicitSet(
62	const ASTFunction * node, const Block & sample_block, bool create_ordered_set,
63	const Context & context, const SizeLimits & size_limits, PreparedSets & prepared_sets)
64	{
65	const IAST & args = *node->arguments;
66
67	if (args.children.size() != `2`)
68	throw Exception ("Wrong number of arguments passed to function in", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
69
70	const ASTPtr & left_arg = args.children.at(`0`);
71	const ASTPtr & right_arg = args.children.at(`1`);
72
73	const DataTypePtr & left_arg_type = sample_block.getByName(left_arg ->getColumnName()).type;
74
75	DataTypes set_element_types = {left_arg_type};
76	auto left_tuple_type = typeid_cast<const DataTypeTuple *>(left_arg_type.get());
77	if (left_tuple_type && left_tuple_type->getElements().size() != `1`)
78	set_element_types = left_tuple_type->getElements();
79
80	for (auto & element_type : set_element_types)
81	if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(element_type.get()))
82	element_type = low_cardinality_type->getDictionaryType();
83
84	auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types);
85	if (prepared_sets.count(set_key))
86	return prepared_sets.at(set_key); /// Already prepared.
87
88	auto getTupleTypeFromAst = [&context](const ASTPtr & tuple_ast) -> DataTypePtr
89	{
90	const auto * func = tuple_ast ->as<ASTFunction>();
91	if (func && func->name == "tuple" && !func->arguments ->children.empty())
92	{
93	/// Won't parse all values of outer tuple.
94	auto element = func->arguments ->children.at(`0`);
95	std::pair<Field, DataTypePtr> value_raw = evaluateConstantExpression(element, context);
96	return std::make_shared<DataTypeTuple>(DataTypes ({value_raw.second}));
97	}
98
99	return evaluateConstantExpression(tuple_ast, context).second;
100	};
101
102	const DataTypePtr & right_arg_type = getTupleTypeFromAst (right_arg);
103
104	std::function<size_t(const DataTypePtr &)> getTupleDepth;
105	getTupleDepth = [&getTupleDepth](const DataTypePtr & type) -> size_t
106	{
107	if (auto tuple_type = typeid_cast<const DataTypeTuple *>(type.get()))
108	return `1` + (tuple_type->getElements().empty() ? `0` : getTupleDepth (tuple_type->getElements().at(`0`)));
109
110	return `0`;
111	};
112
113	size_t left_tuple_depth = getTupleDepth (left_arg_type);
114	size_t right_tuple_depth = getTupleDepth (right_arg_type);
115
116	ASTPtr elements_ast = nullptr;
117
118	/// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc.
119	if (left_tuple_depth == right_tuple_depth)
120	{
121	ASTPtr exp_list = std::make_shared<ASTExpressionList>();
122	exp_list ->children.push_back(right_arg);
123	elements_ast = exp_list;
124	}
125	/// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc.
126	else if (left_tuple_depth + `1` == right_tuple_depth)
127	{
128	const auto * set_func = right_arg ->as<ASTFunction>();
129
130	if (!set_func \|\| set_func->name != "tuple")
131	throw Exception ("Incorrect type of 2nd argument for function " + node->name
132	+ ". Must be subquery or set of elements with type " + left_arg_type ->getName() + ".",
133	ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
134
135	elements_ast = set_func->arguments;
136	}
137	else
138	throw Exception ("Invalid types for IN function: "
139	+ left_arg_type ->getName() + " and " + right_arg_type ->getName() + ".",
140	ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
141
142	SetPtr set = std::make_shared<Set>(size_limits, create_ordered_set);
143	set ->createFromAST(set_element_types, elements_ast, context);
144	prepared_sets [set_key] = set;
145	return set;
146	}
147
148	static String getUniqueName(const Block & block, const String & prefix)
149	{
150	int i = `1`;
151	while (block.has(prefix + toString(i)))
152	++i;
153	return prefix + toString(i);
154	}
155
156	ScopeStack::ScopeStack(const ExpressionActionsPtr & actions, const Context & context_)
157	: context(context_)
158	{
159	stack.emplace_back();
160	stack.back().actions = actions;
161
162	const Block & sample_block = actions ->getSampleBlock();
163	for (size_t i = `0`, size = sample_block.columns(); i < size; ++i)
164	stack.back().new_columns.insert(sample_block.getByPosition(i).name);
165	}
166
167	void ScopeStack::pushLevel(const NamesAndTypesList & input_columns)
168	{
169	stack.emplace_back();
170	Level & prev = stack [stack.size() - `2`];
171
172	ColumnsWithTypeAndName all_columns;
173	NameSet new_names;
174
175	for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it)
176	{
177	all_columns.emplace_back(nullptr, it ->type, it ->name);
178	new_names.insert(it ->name);
179	stack.back().new_columns.insert(it ->name);
180	}
181
182	const Block & prev_sample_block = prev.actions ->getSampleBlock();
183	for (size_t i = `0`, size = prev_sample_block.columns(); i < size; ++i)
184	{
185	const ColumnWithTypeAndName & col = prev_sample_block.getByPosition(i);
186	if (!new_names.count(col.name))
187	all_columns.push_back(col);
188	}
189
190	stack.back().actions = std::make_shared<ExpressionActions>(all_columns, context);
191	}
192
193	size_t ScopeStack::getColumnLevel(const std::string & name)
194	{
195	for (int i = static_cast<int>(stack.size()) - `1`; i >= `0`; --i)
196	if (stack [i].new_columns.count(name))
197	return i;
198
199	throw Exception ("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER);
200	}
201
202	void ScopeStack::addAction(const ExpressionAction & action)
203	{
204	size_t level = `0`;
205	Names required = action.getNeededColumns();
206	for (size_t i = `0`; i < required.size(); ++i)
207	level = std::max(level, getColumnLevel(required [i]));
208
209	Names added;
210	stack [level].actions ->add(action, added);
211
212	stack [level].new_columns.insert(added.begin(), added.end());
213
214	for (size_t i = `0`; i < added.size(); ++i)
215	{
216	const ColumnWithTypeAndName & col = stack [level].actions ->getSampleBlock().getByName(added [i]);
217	for (size_t j = level + `1`; j < stack.size(); ++j)
218	stack [j].actions ->addInput(col);
219	}
220	}
221
222	ExpressionActionsPtr ScopeStack::popLevel()
223	{
224	ExpressionActionsPtr res = stack.back().actions;
225	stack.pop_back();
226	return res;
227	}
228
229	const Block & ScopeStack::getSampleBlock() const
230	{
231	return stack.back().actions ->getSampleBlock();
232	}
233
234	struct CachedColumnName
235	{
236	String cached;
237
238	const String & get(const ASTPtr & ast)
239	{
240	if (cached.empty())
241	cached = ast ->getColumnName();
242	return cached;
243	}
244	};
245
246	bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child)
247	{
248	/// Visit children themself
249	if (node ->as<ASTIdentifier>() \|\|
250	node ->as<ASTFunction>() \|\|
251	node ->as<ASTLiteral>())
252	return false;
253
254	/// Do not go to FROM, JOIN, UNION.
255	if (child ->as<ASTTableExpression>() \|\|
256	child ->as<ASTSelectQuery>())
257	return false;
258
259	return true;
260	}
261
262	void ActionsMatcher::visit(const ASTPtr & ast, Data & data)
263	{
264	if (const auto * identifier = ast ->as<ASTIdentifier>())
265	visit(*identifier, ast, data);
266	else if (const auto * node = ast ->as<ASTFunction>())
267	visit(*node, ast, data);
268	else if (const auto * literal = ast ->as<ASTLiteral>())
269	visit(*literal, ast, data);
270	}
271
272	void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr & ast, Data & data)
273	{
274	CachedColumnName column_name;
275	if (data.hasColumn(column_name.get(ast)))
276	return;
277
278	if (!data.only_consts)
279	{
280	/// The requested column is not in the block.
281	/// If such a column exists in the table, then the user probably forgot to surround it with an aggregate function or add it to GROUP BY.
282
283	bool found = false;
284	for (const auto & column_name_type : data.source_columns)
285	if (column_name_type.name == column_name.get(ast))
286	found = true;
287
288	if (found)
289	throw Exception ("Column " + column_name.get(ast) + " is not under aggregate function and not in GROUP BY.",
290	ErrorCodes::NOT_AN_AGGREGATE);
291
292	/// Special check for WITH statement alias. Add alias action to be able to use this alias.
293	if (identifier.prefer_alias_to_column_name && !identifier.alias.empty())
294	data.addAction(ExpressionAction::addAliases({{identifier.name, identifier.alias}}));
295	}
296	}
297
298	void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data)
299	{
300	CachedColumnName column_name;
301	if (data.hasColumn(column_name.get(ast)))
302	return;
303
304	if (node.name == "lambda")
305	throw Exception ("Unexpected lambda expression", ErrorCodes::UNEXPECTED_EXPRESSION);
306
307	/// Function arrayJoin.
308	if (node.name == "arrayJoin")
309	{
310	if (node.arguments ->children.size() != `1`)
311	throw Exception ("arrayJoin requires exactly 1 argument", ErrorCodes::TYPE_MISMATCH);
312
313	ASTPtr arg = node.arguments ->children.at(`0`);
314	visit(arg, data);
315	if (!data.only_consts)
316	{
317	String result_name = column_name.get(ast);
318	data.addAction(ExpressionAction::copyColumn(arg ->getColumnName(), result_name));
319	NameSet joined_columns;
320	joined_columns.insert(result_name);
321	data.addAction(ExpressionAction::arrayJoin(joined_columns, false, data.context));
322	}
323
324	return;
325	}
326
327	SetPtr prepared_set;
328	if (functionIsInOrGlobalInOperator(node.name))
329	{
330	/// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything).
331	visit(node.arguments ->children.at(`0`), data);
332
333	if ((prepared_set = makeSet(node, data, data.no_subqueries)))
334	{
335	/// Transform tuple or subquery into a set.
336	}
337	else
338	{
339	if (!data.only_consts)
340	{
341	/// We are in the part of the tree that we are not going to compute. You just need to define types.
342	/// Do not subquery and create sets. We treat "IN" as "ignoreExceptNull" function.
343
344	data.addAction(ExpressionAction::applyFunction(
345	FunctionFactory::instance().get("ignoreExceptNull", data.context),
346	{ node.arguments ->children.at(`0`)->getColumnName() },
347	column_name.get(ast)));
348	}
349	return;
350	}
351	}
352
353	/// A special function `indexHint`. Everything that is inside it is not calculated
354	/// (and is used only for index analysis, see KeyCondition).
355	if (node.name == "indexHint")
356	{
357	data.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName (
358	ColumnConst::create(ColumnUInt8::create(`1`, `1`), `1`), std::make_shared<DataTypeUInt8>(),
359	column_name.get(ast))));
360	return;
361	}
362
363	if (AggregateFunctionFactory::instance().isAggregateFunctionName(node.name))
364	return;
365
366	/// Context object that we pass to function should live during query.
367	const Context & function_context = data.context.hasQueryContext()
368	? data.context.getQueryContext()
369	: data.context;
370
371	FunctionOverloadResolverPtr function_builder;
372	try
373	{
374	function_builder = FunctionFactory::instance().get(node.name, function_context);
375	}
376	catch (DB::Exception & e)
377	{
378	auto hints = AggregateFunctionFactory::instance().getHints(node.name);
379	if (!hints.empty())
380	e.addMessage("Or unknown aggregate function " + node.name + ". Maybe you meant: " + toString(hints));
381	e.rethrow();
382	}
383
384	Names argument_names;
385	DataTypes argument_types;
386	bool arguments_present = true;
387
388	/// If the function has an argument-lambda expression, you need to determine its type before the recursive call.
389	bool has_lambda_arguments = false;
390
391	for (size_t arg = `0`; arg < node.arguments ->children.size(); ++arg)
392	{
393	auto & child = node.arguments ->children [arg];
394	auto child_column_name = child ->getColumnName();
395
396	const auto * lambda = child ->as<ASTFunction>();
397	const auto * identifier = child ->as<ASTIdentifier>();
398	if (lambda && lambda->name == "lambda")
399	{
400	/// If the argument is a lambda expression, just remember its approximate type.
401	if (lambda->arguments ->children.size() != `2`)
402	throw Exception ("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
403
404	const auto * lambda_args_tuple = lambda->arguments ->children.at(`0`)->as<ASTFunction>();
405
406	if (!lambda_args_tuple \|\| lambda_args_tuple->name != "tuple")
407	throw Exception ("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
408
409	has_lambda_arguments = true;
410	argument_types.emplace_back(std::make_shared<DataTypeFunction>(DataTypes (lambda_args_tuple->arguments ->children.size())));
411	/// Select the name in the next cycle.
412	argument_names.emplace_back();
413	}
414	else if (functionIsInOrGlobalInOperator(node.name) && arg == `1` && prepared_set)
415	{
416	ColumnWithTypeAndName column;
417	column.type = std::make_shared<DataTypeSet>();
418
419	/// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name,
420	/// so that sets with the same literal representation do not fuse together (they can have different types).
421	if (!prepared_set ->empty())
422	column.name = getUniqueName(data.getSampleBlock(), "__set");
423	else
424	column.name = child_column_name;
425
426	if (!data.hasColumn(column.name))
427	{
428	auto column_set = ColumnSet::create(`1`, prepared_set);
429	/// If prepared_set is not empty, we have a set made with literals.
430	/// Create a const ColumnSet to make constant folding work
431	if (!prepared_set ->empty())
432	column.column = ColumnConst::create(std::move(column_set), `1`);
433	else
434	column.column = std::move(column_set);
435	data.addAction(ExpressionAction::addColumn(column));
436	}
437
438	argument_types.push_back(column.type);
439	argument_names.push_back(column.name);
440	}
441	else if (identifier && node.name == "joinGet" && arg == `0`)
442	{
443	String database_name;
444	String table_name;
445	std::tie(database_name, table_name) = IdentifierSemantic::extractDatabaseAndTable(*identifier);
446	if (database_name.empty())
447	database_name = data.context.getCurrentDatabase();
448	auto column_string = ColumnString::create();
449	column_string ->insert(database_name + "." + table_name);
450	ColumnWithTypeAndName column(
451	ColumnConst::create(std::move(column_string), `1`),
452	std::make_shared<DataTypeString>(),
453	getUniqueName(data.getSampleBlock(), "__joinGet"));
454	data.addAction(ExpressionAction::addColumn(column));
455	argument_types.push_back(column.type);
456	argument_names.push_back(column.name);
457	}
458	else
459	{
460	/// If the argument is not a lambda expression, call it recursively and find out its type.
461	visit(child, data);
462	std::string name = child_column_name;
463	if (data.hasColumn(name))
464	{
465	argument_types.push_back(data.getSampleBlock().getByName(name).type);
466	argument_names.push_back(name);
467	}
468	else
469	{
470	if (data.only_consts)
471	arguments_present = false;
472	else
473	throw Exception ("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER);
474	}
475	}
476	}
477
478	if (data.only_consts && !arguments_present)
479	return;
480
481	if (has_lambda_arguments && !data.only_consts)
482	{
483	function_builder ->getLambdaArgumentTypes(argument_types);
484
485	/// Call recursively for lambda expressions.
486	for (size_t i = `0`; i < node.arguments ->children.size(); ++i)
487	{
488	ASTPtr child = node.arguments ->children [i];
489
490	const auto * lambda = child ->as<ASTFunction>();
491	if (lambda && lambda->name == "lambda")
492	{
493	const DataTypeFunction * lambda_type = typeid_cast<const DataTypeFunction *>(argument_types [i].get());
494	const auto * lambda_args_tuple = lambda->arguments ->children.at(`0`)->as<ASTFunction>();
495	const ASTs & lambda_arg_asts = lambda_args_tuple->arguments ->children;
496	NamesAndTypesList lambda_arguments;
497
498	for (size_t j = `0`; j < lambda_arg_asts.size(); ++j)
499	{
500	auto opt_arg_name = tryGetIdentifierName(lambda_arg_asts [j]);
501	if (!opt_arg_name)
502	throw Exception ("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
503
504	lambda_arguments.emplace_back(*opt_arg_name, lambda_type->getArgumentTypes()[j]);
505	}
506
507	data.actions_stack.pushLevel(lambda_arguments);
508	visit(lambda->arguments ->children.at(`1`), data);
509	ExpressionActionsPtr lambda_actions = data.actions_stack.popLevel();
510
511	String result_name = lambda->arguments ->children.at(`1`)->getColumnName();
512	lambda_actions ->finalize(Names (`1`, result_name));
513	DataTypePtr result_type = lambda_actions ->getSampleBlock().getByName(result_name).type;
514
515	Names captured;
516	Names required = lambda_actions ->getRequiredColumns();
517	for (const auto & required_arg : required)
518	if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end())
519	captured.push_back(required_arg);
520
521	/// We can not name `getColumnName()`,
522	/// because it does not uniquely define the expression (the types of arguments can be different).
523	String lambda_name = getUniqueName(data.getSampleBlock(), "__lambda");
524
525	auto function_capture = std::make_unique<FunctionCaptureOverloadResolver>(
526	lambda_actions, captured, lambda_arguments, result_type, result_name);
527	auto function_capture_adapter = std::make_shared<FunctionOverloadResolverAdaptor>(std::move(function_capture));
528	data.addAction(ExpressionAction::applyFunction(function_capture_adapter, captured, lambda_name));
529
530	argument_types [i] = std::make_shared<DataTypeFunction>(lambda_type->getArgumentTypes(), result_type);
531	argument_names [i] = lambda_name;
532	}
533	}
534	}
535
536	if (data.only_consts)
537	{
538	for (const auto & argument_name : argument_names)
539	{
540	if (!data.hasColumn(argument_name))
541	{
542	arguments_present = false;
543	break;
544	}
545	}
546	}
547
548	if (arguments_present)
549	{
550	data.addAction(ExpressionAction::applyFunction(function_builder, argument_names, column_name.get(ast)));
551	}
552	}
553
554	void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & ast, Data & data)
555	{
556	CachedColumnName column_name;
557	if (data.hasColumn(column_name.get(ast)))
558	return;
559
560	DataTypePtr type = applyVisitor(FieldToDataType (), literal.value);
561
562	ColumnWithTypeAndName column;
563	column.column = type ->createColumnConst(`1`, convertFieldToType(literal.value, *type));
564	column.type = type;
565	column.name = column_name.get(ast);
566
567	data.addAction(ExpressionAction::addColumn(column));
568	}
569
570	SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_subqueries)
571	{
572	/* You need to convert the right argument to a set.*
573	* This can be a table name, a value, a value enumeration, or a subquery.
574	* The enumeration of values is parsed as a function `tuple`.
575	*/
576	const IAST & args = *node.arguments;
577	const ASTPtr & left_in_operand = args.children.at(`0`);
578	const ASTPtr & right_in_operand = args.children.at(`1`);
579	const Block & sample_block = data.getSampleBlock();
580
581	/// If the subquery or table name for SELECT.
582	const auto * identifier = right_in_operand ->as<ASTIdentifier>();
583	if (right_in_operand ->as<ASTSubquery>() \|\| identifier)
584	{
585	if (no_subqueries)
586	return {};
587	auto set_key = PreparedSetKey::forSubquery(*right_in_operand);
588	if (data.prepared_sets.count(set_key))
589	return data.prepared_sets.at(set_key);
590
591	/// A special case is if the name of the table is specified on the right side of the IN statement,
592	/// and the table has the type Set (a previously prepared set).
593	if (identifier)
594	{
595	DatabaseAndTableWithAlias database_table(*identifier);
596	StoragePtr table = data.context.tryGetTable(database_table.database, database_table.table);
597
598	if (table)
599	{
600	StorageSet * storage_set = dynamic_cast<StorageSet *>(table.get());
601	if (storage_set)
602	{
603	data.prepared_sets [set_key] = storage_set->getSet();
604	return storage_set->getSet();
605	}
606	}
607	}
608
609	/// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
610	String set_id = right_in_operand ->getColumnName();
611
612	SubqueryForSet & subquery_for_set = data.subqueries_for_sets [set_id];
613
614	/// If you already created a Set with the same subquery / table.
615	if (subquery_for_set.set)
616	{
617	data.prepared_sets [set_key] = subquery_for_set.set;
618	return subquery_for_set.set;
619	}
620
621	SetPtr set = std::make_shared<Set>(data.set_size_limit, false);
622
623	/* The following happens for GLOBAL INs:*
624	* - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1,
625	* in the subquery_for_set object, this subquery is set as source and the temporary table _data1 as the table.
626	* - this function shows the expression IN_data1.
627	*/
628	if (!subquery_for_set.source && data.no_storage_or_local)
629	{
630	auto interpreter = interpretSubquery(right_in_operand, data.context, data.subquery_depth, {});
631	subquery_for_set.source = std::make_shared<LazyBlockInputStream>(
632	interpreter ->getSampleBlock(), [interpreter]() mutable { return interpreter ->execute().in; });
633
634	/* Why is LazyBlockInputStream used?*
635	*
636	* The fact is that when processing a query of the form
637	* SELECT ... FROM remote_test WHERE column GLOBAL IN (subquery),
638	* if the distributed remote_test table contains localhost as one of the servers,
639	* the query will be interpreted locally again (and not sent over TCP, as in the case of a remote server).
640	*
641	* The query execution pipeline will be:
642	* CreatingSets
643	* subquery execution, filling the temporary table with _data1 (1)
644	* CreatingSets
645	* reading from the table _data1, creating the set (2)
646	* read from the table subordinate to remote_test.
647	*
648	* (The second part of the pipeline under CreateSets is a reinterpretation of the query inside StorageDistributed,
649	* the query differs in that the database name and tables are replaced with subordinates, and the subquery is replaced with _data1.)
650	*
651	* But when creating the pipeline, when creating the source (2), it will be found that the _data1 table is empty
652	* (because the query has not started yet), and empty source will be returned as the source.
653	* And then, when the query is executed, an empty set will be created in step (2).
654	*
655	* Therefore, we make the initialization of step (2) lazy
656	* - so that it does not occur until step (1) is completed, on which the table will be populated.
657	*
658	* Note: this solution is not very good, you need to think better.
659	*/
660	}
661
662	subquery_for_set.set = set;
663	data.prepared_sets [set_key] = set;
664	return set;
665	}
666	else
667	{
668	if (sample_block.has(left_in_operand ->getColumnName()))
669	/// An explicit enumeration of values in parentheses.
670	return makeExplicitSet(&node, sample_block, false, data.context, data.set_size_limit, data.prepared_sets);
671	else
672	return {};
673	}
674	}
675
676	}
677

Browse the source code of ClickHouse/dbms/src/Interpreters/ActionsVisitor.cpp