1#include <Common/typeid_cast.h>
2#include <Core/NamesAndTypes.h>
3#include <Interpreters/JoinToSubqueryTransformVisitor.h>
4#include <Interpreters/IdentifierSemantic.h>
5#include <Interpreters/AsteriskSemantic.h>
6#include <Interpreters/DatabaseAndTableWithAlias.h>
7#include <Interpreters/Context.h>
8#include <Interpreters/getTableExpressions.h>
9#include <Parsers/ASTSelectQuery.h>
10#include <Parsers/ASTSubquery.h>
11#include <Parsers/ASTTablesInSelectQuery.h>
12#include <Parsers/ASTIdentifier.h>
13#include <Parsers/ASTExpressionList.h>
14#include <Parsers/ParserTablesInSelectQuery.h>
15#include <Parsers/ExpressionListParsers.h>
16#include <Parsers/parseQuery.h>
17#include <IO/WriteHelpers.h>
18
19
20namespace DB
21{
22
23namespace ErrorCodes
24{
25 extern const int LOGICAL_ERROR;
26 extern const int TOO_DEEP_AST;
27 extern const int AMBIGUOUS_COLUMN_NAME;
28 extern const int NOT_IMPLEMENTED;
29 extern const int UNKNOWN_IDENTIFIER;
30}
31
32namespace
33{
34
35/// Replace asterisks in select_expression_list with column identifiers
36class ExtractAsterisksMatcher
37{
38public:
39 struct Data
40 {
41 std::unordered_map<String, NamesAndTypesList> table_columns;
42 std::vector<String> tables_order;
43 std::shared_ptr<ASTExpressionList> new_select_expression_list;
44
45 Data(const Context & context, const std::vector<const ASTTableExpression *> & table_expressions)
46 {
47 tables_order.reserve(table_expressions.size());
48 for (const auto & expr : table_expressions)
49 {
50 if (expr->subquery)
51 {
52 table_columns.clear();
53 tables_order.clear();
54 break;
55 }
56
57 String table_name = DatabaseAndTableWithAlias(*expr, context.getCurrentDatabase()).getQualifiedNamePrefix(false);
58 NamesAndTypesList columns = getColumnsFromTableExpression(*expr, context);
59 tables_order.push_back(table_name);
60 table_columns.emplace(std::move(table_name), std::move(columns));
61 }
62 }
63
64 void addTableColumns(const String & table_name)
65 {
66 auto it = table_columns.find(table_name);
67 if (it == table_columns.end())
68 throw Exception("Unknown qualified identifier: " + table_name, ErrorCodes::UNKNOWN_IDENTIFIER);
69
70 for (const auto & column : it->second)
71 new_select_expression_list->children.push_back(
72 std::make_shared<ASTIdentifier>(std::vector<String>{it->first, column.name}));
73 }
74 };
75
76 static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return false; }
77
78 static void visit(const ASTPtr & ast, Data & data)
79 {
80 if (auto * t = ast->as<ASTExpressionList>())
81 visit(*t, ast, data);
82 }
83
84private:
85 static void visit(const ASTExpressionList & node, const ASTPtr &, Data & data)
86 {
87 bool has_asterisks = false;
88 data.new_select_expression_list = std::make_shared<ASTExpressionList>();
89 data.new_select_expression_list->children.reserve(node.children.size());
90
91 for (auto & child : node.children)
92 {
93 if (child->as<ASTAsterisk>())
94 {
95 has_asterisks = true;
96
97 for (auto & table_name : data.tables_order)
98 data.addTableColumns(table_name);
99 }
100 else if (child->as<ASTQualifiedAsterisk>())
101 {
102 has_asterisks = true;
103
104 if (child->children.size() != 1)
105 throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR);
106 ASTIdentifier & identifier = child->children[0]->as<ASTIdentifier &>();
107
108 data.addTableColumns(identifier.name);
109 }
110 else
111 data.new_select_expression_list->children.push_back(child);
112 }
113
114 if (!has_asterisks)
115 data.new_select_expression_list.reset();
116 }
117};
118
119/// Find columns with aliases to push them into rewritten subselects.
120/// Normalize table aliases: table_name.column_name -> table_alias.column_name
121/// Make aliases maps (alias -> column_name, column_name -> alias)
122struct ColumnAliasesMatcher
123{
124 struct Data
125 {
126 const std::vector<DatabaseAndTableWithAlias> tables;
127 bool public_names;
128 AsteriskSemantic::RevertedAliases rev_aliases; /// long_name -> aliases
129 std::unordered_map<String, String> aliases; /// alias -> long_name
130 std::vector<std::pair<ASTIdentifier *, bool>> compound_identifiers;
131 std::set<String> allowed_long_names; /// original names allowed as aliases '--t.x as t.x' (select expressions only).
132
133 Data(const std::vector<DatabaseAndTableWithAlias> && tables_)
134 : tables(tables_)
135 , public_names(false)
136 {}
137
138 void replaceIdentifiersWithAliases()
139 {
140 String hide_prefix = "--"; /// @note restriction: user should not use alises like `--table.column`
141
142 for (auto & [identifier, is_public] : compound_identifiers)
143 {
144 String long_name = identifier->name;
145
146 auto it = rev_aliases.find(long_name);
147 if (it == rev_aliases.end())
148 {
149 bool last_table = false;
150 {
151 size_t best_table_pos = 0;
152 if (IdentifierSemantic::chooseTable(*identifier, tables, best_table_pos))
153 last_table = (best_table_pos + 1 == tables.size());
154 }
155
156 if (!last_table)
157 {
158 String alias = hide_prefix + long_name;
159 aliases[alias] = long_name;
160 rev_aliases[long_name].push_back(alias);
161
162 IdentifierSemantic::coverName(*identifier, alias);
163 if (is_public)
164 {
165 identifier->setAlias(long_name);
166 allowed_long_names.insert(long_name);
167 }
168 }
169 else if (is_public)
170 identifier->setAlias(long_name); /// prevent crop long to short name
171 }
172 else
173 {
174 if (it->second.empty())
175 throw Exception("No alias for '" + long_name + "'", ErrorCodes::LOGICAL_ERROR);
176
177 if (is_public && allowed_long_names.count(long_name))
178 ; /// leave original name unchanged for correct output
179 else
180 IdentifierSemantic::coverName(*identifier, it->second[0]);
181 }
182 }
183 }
184 };
185
186 static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
187 {
188 if (node->as<ASTQualifiedAsterisk>())
189 return false;
190 return true;
191 }
192
193 static void visit(const ASTPtr & ast, Data & data)
194 {
195 if (auto * t = ast->as<ASTIdentifier>())
196 visit(*t, ast, data);
197
198 if (ast->as<ASTAsterisk>() || ast->as<ASTQualifiedAsterisk>())
199 throw Exception("Multiple JOIN do not support asterisks for complex queries yet", ErrorCodes::NOT_IMPLEMENTED);
200 }
201
202 static void visit(const ASTIdentifier & const_node, const ASTPtr &, Data & data)
203 {
204 ASTIdentifier & node = const_cast<ASTIdentifier &>(const_node); /// we know it's not const
205 if (node.isShort())
206 return;
207
208 bool last_table = false;
209 String long_name;
210
211 size_t table_pos = 0;
212 if (IdentifierSemantic::chooseTable(node, data.tables, table_pos))
213 {
214 auto & table = data.tables[table_pos];
215 IdentifierSemantic::setColumnLongName(node, table); /// table_name.column_name -> table_alias.column_name
216 long_name = node.name;
217 if (&table == &data.tables.back())
218 last_table = true;
219 }
220
221 if (long_name.empty())
222 throw Exception("Cannot refer column '" + node.name + "' to table", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
223
224 String alias = node.tryGetAlias();
225 if (!alias.empty())
226 {
227 data.aliases[alias] = long_name;
228 data.rev_aliases[long_name].push_back(alias);
229
230 if (!last_table)
231 {
232 IdentifierSemantic::coverName(node, alias);
233 node.setAlias("");
234 }
235 }
236 else if (node.compound())
237 data.compound_identifiers.emplace_back(&node, data.public_names);
238 }
239};
240
241/// Attach additional semantic info to generated selects.
242struct AppendSemanticVisitorData
243{
244 using TypeToVisit = ASTSelectQuery;
245
246 AsteriskSemantic::RevertedAliasesPtr rev_aliases = {};
247 bool done = false;
248
249 void visit(ASTSelectQuery & select, ASTPtr &)
250 {
251 if (done || !rev_aliases || !select.select())
252 return;
253
254 for (auto & child : select.select()->children)
255 {
256 if (auto * node = child->as<ASTAsterisk>())
257 AsteriskSemantic::setAliases(*node, rev_aliases);
258 if (auto * node = child->as<ASTQualifiedAsterisk>())
259 AsteriskSemantic::setAliases(*node, rev_aliases);
260 }
261
262 done = true;
263 }
264};
265
266
267/// Replaces table elements with pair.
268struct RewriteTablesVisitorData
269{
270 using TypeToVisit = ASTTablesInSelectQuery;
271
272 ASTPtr left;
273 ASTPtr right;
274 bool done = false;
275
276 /// @note Do not change ASTTablesInSelectQuery itself. No need to change select.tables.
277 void visit(ASTTablesInSelectQuery &, ASTPtr & ast)
278 {
279 if (done)
280 return;
281 std::vector<ASTPtr> new_tables{left, right};
282 ast->children.swap(new_tables);
283 done = true;
284 }
285};
286
287/// Attach alias to the first visited subquery
288struct SetSubqueryAliasVisitorData
289{
290 using TypeToVisit = ASTSubquery;
291
292 const String & alias;
293 bool done = false;
294
295 void visit(ASTSubquery &, ASTPtr & ast)
296 {
297 if (done)
298 return;
299 ast->setAlias(alias);
300 done = true;
301 }
302};
303
304bool needRewrite(ASTSelectQuery & select, std::vector<const ASTTableExpression *> & table_expressions)
305{
306 if (!select.tables())
307 return false;
308
309 const auto * tables = select.tables()->as<ASTTablesInSelectQuery>();
310 if (!tables)
311 return false;
312
313 size_t num_tables = tables->children.size();
314 if (num_tables <= 2)
315 return false;
316
317 size_t num_array_join = 0;
318 size_t num_using = 0;
319
320 table_expressions.reserve(num_tables);
321 for (size_t i = 0; i < num_tables; ++i)
322 {
323 const auto * table = tables->children[i]->as<ASTTablesInSelectQueryElement>();
324 if (!table)
325 throw Exception("Table expected", ErrorCodes::LOGICAL_ERROR);
326
327 if (table->table_expression)
328 if (const auto * expression = table->table_expression->as<ASTTableExpression>())
329 table_expressions.push_back(expression);
330 if (!i)
331 continue;
332
333 if (!table->table_join && !table->array_join)
334 throw Exception("Joined table expected", ErrorCodes::LOGICAL_ERROR);
335
336 if (table->array_join)
337 {
338 ++num_array_join;
339 continue;
340 }
341
342 const auto & join = table->table_join->as<ASTTableJoin &>();
343 if (isComma(join.kind))
344 throw Exception("COMMA to CROSS JOIN rewriter is not enabled or cannot rewrite query", ErrorCodes::NOT_IMPLEMENTED);
345
346 if (join.using_expression_list)
347 ++num_using;
348 }
349
350 if (num_tables - num_array_join <= 2)
351 return false;
352
353 /// it's not trivial to support mix of JOIN ON & JOIN USING cause of short names
354 if (num_using)
355 throw Exception("Multiple JOIN does not support USING", ErrorCodes::NOT_IMPLEMENTED);
356 if (num_array_join)
357 throw Exception("Multiple JOIN does not support mix with ARRAY JOINs", ErrorCodes::NOT_IMPLEMENTED);
358 return true;
359}
360
361using RewriteMatcher = OneTypeMatcher<RewriteTablesVisitorData>;
362using RewriteVisitor = InDepthNodeVisitor<RewriteMatcher, true>;
363using SetSubqueryAliasMatcher = OneTypeMatcher<SetSubqueryAliasVisitorData>;
364using SetSubqueryAliasVisitor = InDepthNodeVisitor<SetSubqueryAliasMatcher, true>;
365using ExtractAsterisksVisitor = ConstInDepthNodeVisitor<ExtractAsterisksMatcher, true>;
366using ColumnAliasesVisitor = ConstInDepthNodeVisitor<ColumnAliasesMatcher, true>;
367using AppendSemanticMatcher = OneTypeMatcher<AppendSemanticVisitorData>;
368using AppendSemanticVisitor = InDepthNodeVisitor<AppendSemanticMatcher, true>;
369
370} /// namelesspace
371
372
373void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data)
374{
375 if (auto * t = ast->as<ASTSelectQuery>())
376 visit(*t, ast, data);
377}
378
379void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & data)
380{
381 using RevertedAliases = AsteriskSemantic::RevertedAliases;
382
383 std::vector<const ASTTableExpression *> table_expressions;
384 if (!needRewrite(select, table_expressions))
385 return;
386
387 ExtractAsterisksVisitor::Data asterisks_data(data.context, table_expressions);
388 if (!asterisks_data.table_columns.empty())
389 {
390 ExtractAsterisksVisitor(asterisks_data).visit(select.select());
391 if (asterisks_data.new_select_expression_list)
392 select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(asterisks_data.new_select_expression_list));
393 }
394
395 ColumnAliasesVisitor::Data aliases_data(getDatabaseAndTables(select, ""));
396 if (select.select())
397 {
398 aliases_data.public_names = true;
399 ColumnAliasesVisitor(aliases_data).visit(select.select());
400 aliases_data.public_names = false;
401 }
402 if (select.where())
403 ColumnAliasesVisitor(aliases_data).visit(select.where());
404 if (select.prewhere())
405 ColumnAliasesVisitor(aliases_data).visit(select.prewhere());
406 if (select.orderBy())
407 ColumnAliasesVisitor(aliases_data).visit(select.orderBy());
408 if (select.groupBy())
409 ColumnAliasesVisitor(aliases_data).visit(select.groupBy());
410 if (select.having())
411 ColumnAliasesVisitor(aliases_data).visit(select.having());
412
413 /// JOIN sections
414 for (auto & child : select.tables()->children)
415 {
416 auto * table = child->as<ASTTablesInSelectQueryElement>();
417 if (table->table_join)
418 {
419 auto & join = table->table_join->as<ASTTableJoin &>();
420 if (join.on_expression)
421 ColumnAliasesVisitor(aliases_data).visit(join.on_expression);
422 }
423 }
424
425 aliases_data.replaceIdentifiersWithAliases();
426
427 auto rev_aliases = std::make_shared<RevertedAliases>();
428 rev_aliases->swap(aliases_data.rev_aliases);
429
430 auto & src_tables = select.tables()->children;
431 ASTPtr left_table = src_tables[0];
432
433 for (size_t i = 1; i < src_tables.size() - 1; ++i)
434 {
435 left_table = replaceJoin(left_table, src_tables[i]);
436 if (!left_table)
437 throw Exception("Cannot replace tables with subselect", ErrorCodes::LOGICAL_ERROR);
438
439 /// attach an alias to subquery.
440 /// TODO: remove setting check after testing period
441 if (data.context.getSettingsRef().joined_subquery_requires_alias)
442 {
443 SetSubqueryAliasVisitor::Data alias_data{String("--.join") + std::to_string(i)};
444 SetSubqueryAliasVisitor(alias_data).visit(left_table);
445 }
446
447 /// attach data to generated asterisk
448 AppendSemanticVisitor::Data semantic_data{rev_aliases, false};
449 AppendSemanticVisitor(semantic_data).visit(left_table);
450 }
451
452 /// replace tables in select with generated two-table join
453 RewriteVisitor::Data visitor_data{left_table, src_tables.back()};
454 RewriteVisitor(visitor_data).visit(select.refTables());
455
456 data.done = true;
457}
458
459static ASTPtr makeSubqueryTemplate()
460{
461 ParserTablesInSelectQueryElement parser(true);
462 ASTPtr subquery_template = parseQuery(parser, "(select * from _t)", 0);
463 if (!subquery_template)
464 throw Exception("Cannot parse subquery template", ErrorCodes::LOGICAL_ERROR);
465 return subquery_template;
466}
467
468ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTPtr ast_left, ASTPtr ast_right)
469{
470 const auto * left = ast_left->as<ASTTablesInSelectQueryElement>();
471 const auto * right = ast_right->as<ASTTablesInSelectQueryElement>();
472 if (!left || !right)
473 throw Exception("Two TablesInSelectQueryElements expected", ErrorCodes::LOGICAL_ERROR);
474
475 if (!right->table_join)
476 throw Exception("Table join expected", ErrorCodes::LOGICAL_ERROR);
477
478 static ASTPtr subquery_template = makeSubqueryTemplate();
479
480 /// replace '_t' with pair of joined tables
481 ASTPtr res = subquery_template->clone();
482 RewriteVisitor::Data visitor_data{ast_left, ast_right};
483 RewriteVisitor(visitor_data).visit(res);
484 return res;
485}
486
487}
488