1 | #include <Common/typeid_cast.h> |
2 | #include <Core/NamesAndTypes.h> |
3 | #include <Interpreters/JoinToSubqueryTransformVisitor.h> |
4 | #include <Interpreters/IdentifierSemantic.h> |
5 | #include <Interpreters/AsteriskSemantic.h> |
6 | #include <Interpreters/DatabaseAndTableWithAlias.h> |
7 | #include <Interpreters/Context.h> |
8 | #include <Interpreters/getTableExpressions.h> |
9 | #include <Parsers/ASTSelectQuery.h> |
10 | #include <Parsers/ASTSubquery.h> |
11 | #include <Parsers/ASTTablesInSelectQuery.h> |
12 | #include <Parsers/ASTIdentifier.h> |
13 | #include <Parsers/ASTExpressionList.h> |
14 | #include <Parsers/ParserTablesInSelectQuery.h> |
15 | #include <Parsers/ExpressionListParsers.h> |
16 | #include <Parsers/parseQuery.h> |
17 | #include <IO/WriteHelpers.h> |
18 | |
19 | |
20 | namespace DB |
21 | { |
22 | |
23 | namespace ErrorCodes |
24 | { |
25 | extern const int LOGICAL_ERROR; |
26 | extern const int TOO_DEEP_AST; |
27 | extern const int AMBIGUOUS_COLUMN_NAME; |
28 | extern const int NOT_IMPLEMENTED; |
29 | extern const int UNKNOWN_IDENTIFIER; |
30 | } |
31 | |
32 | namespace |
33 | { |
34 | |
35 | /// Replace asterisks in select_expression_list with column identifiers |
36 | class |
37 | { |
38 | public: |
39 | struct |
40 | { |
41 | std::unordered_map<String, NamesAndTypesList> table_columns; |
42 | std::vector<String> ; |
43 | std::shared_ptr<ASTExpressionList> ; |
44 | |
45 | (const Context & context, const std::vector<const ASTTableExpression *> & table_expressions) |
46 | { |
47 | tables_order.reserve(table_expressions.size()); |
48 | for (const auto & expr : table_expressions) |
49 | { |
50 | if (expr->subquery) |
51 | { |
52 | table_columns.clear(); |
53 | tables_order.clear(); |
54 | break; |
55 | } |
56 | |
57 | String table_name = DatabaseAndTableWithAlias(*expr, context.getCurrentDatabase()).getQualifiedNamePrefix(false); |
58 | NamesAndTypesList columns = getColumnsFromTableExpression(*expr, context); |
59 | tables_order.push_back(table_name); |
60 | table_columns.emplace(std::move(table_name), std::move(columns)); |
61 | } |
62 | } |
63 | |
64 | void addTableColumns(const String & table_name) |
65 | { |
66 | auto it = table_columns.find(table_name); |
67 | if (it == table_columns.end()) |
68 | throw Exception("Unknown qualified identifier: " + table_name, ErrorCodes::UNKNOWN_IDENTIFIER); |
69 | |
70 | for (const auto & column : it->second) |
71 | new_select_expression_list->children.push_back( |
72 | std::make_shared<ASTIdentifier>(std::vector<String>{it->first, column.name})); |
73 | } |
74 | }; |
75 | |
76 | static bool (const ASTPtr &, const ASTPtr &) { return false; } |
77 | |
78 | static void (const ASTPtr & ast, Data & data) |
79 | { |
80 | if (auto * t = ast->as<ASTExpressionList>()) |
81 | visit(*t, ast, data); |
82 | } |
83 | |
84 | private: |
85 | static void (const ASTExpressionList & node, const ASTPtr &, Data & data) |
86 | { |
87 | bool has_asterisks = false; |
88 | data.new_select_expression_list = std::make_shared<ASTExpressionList>(); |
89 | data.new_select_expression_list->children.reserve(node.children.size()); |
90 | |
91 | for (auto & child : node.children) |
92 | { |
93 | if (child->as<ASTAsterisk>()) |
94 | { |
95 | has_asterisks = true; |
96 | |
97 | for (auto & table_name : data.tables_order) |
98 | data.addTableColumns(table_name); |
99 | } |
100 | else if (child->as<ASTQualifiedAsterisk>()) |
101 | { |
102 | has_asterisks = true; |
103 | |
104 | if (child->children.size() != 1) |
105 | throw Exception("Logical error: qualified asterisk must have exactly one child" , ErrorCodes::LOGICAL_ERROR); |
106 | ASTIdentifier & identifier = child->children[0]->as<ASTIdentifier &>(); |
107 | |
108 | data.addTableColumns(identifier.name); |
109 | } |
110 | else |
111 | data.new_select_expression_list->children.push_back(child); |
112 | } |
113 | |
114 | if (!has_asterisks) |
115 | data.new_select_expression_list.reset(); |
116 | } |
117 | }; |
118 | |
119 | /// Find columns with aliases to push them into rewritten subselects. |
120 | /// Normalize table aliases: table_name.column_name -> table_alias.column_name |
121 | /// Make aliases maps (alias -> column_name, column_name -> alias) |
122 | struct ColumnAliasesMatcher |
123 | { |
124 | struct Data |
125 | { |
126 | const std::vector<DatabaseAndTableWithAlias> tables; |
127 | bool public_names; |
128 | AsteriskSemantic::RevertedAliases rev_aliases; /// long_name -> aliases |
129 | std::unordered_map<String, String> aliases; /// alias -> long_name |
130 | std::vector<std::pair<ASTIdentifier *, bool>> compound_identifiers; |
131 | std::set<String> allowed_long_names; /// original names allowed as aliases '--t.x as t.x' (select expressions only). |
132 | |
133 | Data(const std::vector<DatabaseAndTableWithAlias> && tables_) |
134 | : tables(tables_) |
135 | , public_names(false) |
136 | {} |
137 | |
138 | void replaceIdentifiersWithAliases() |
139 | { |
140 | String hide_prefix = "--" ; /// @note restriction: user should not use alises like `--table.column` |
141 | |
142 | for (auto & [identifier, is_public] : compound_identifiers) |
143 | { |
144 | String long_name = identifier->name; |
145 | |
146 | auto it = rev_aliases.find(long_name); |
147 | if (it == rev_aliases.end()) |
148 | { |
149 | bool last_table = false; |
150 | { |
151 | size_t best_table_pos = 0; |
152 | if (IdentifierSemantic::chooseTable(*identifier, tables, best_table_pos)) |
153 | last_table = (best_table_pos + 1 == tables.size()); |
154 | } |
155 | |
156 | if (!last_table) |
157 | { |
158 | String alias = hide_prefix + long_name; |
159 | aliases[alias] = long_name; |
160 | rev_aliases[long_name].push_back(alias); |
161 | |
162 | IdentifierSemantic::coverName(*identifier, alias); |
163 | if (is_public) |
164 | { |
165 | identifier->setAlias(long_name); |
166 | allowed_long_names.insert(long_name); |
167 | } |
168 | } |
169 | else if (is_public) |
170 | identifier->setAlias(long_name); /// prevent crop long to short name |
171 | } |
172 | else |
173 | { |
174 | if (it->second.empty()) |
175 | throw Exception("No alias for '" + long_name + "'" , ErrorCodes::LOGICAL_ERROR); |
176 | |
177 | if (is_public && allowed_long_names.count(long_name)) |
178 | ; /// leave original name unchanged for correct output |
179 | else |
180 | IdentifierSemantic::coverName(*identifier, it->second[0]); |
181 | } |
182 | } |
183 | } |
184 | }; |
185 | |
186 | static bool needChildVisit(const ASTPtr & node, const ASTPtr &) |
187 | { |
188 | if (node->as<ASTQualifiedAsterisk>()) |
189 | return false; |
190 | return true; |
191 | } |
192 | |
193 | static void visit(const ASTPtr & ast, Data & data) |
194 | { |
195 | if (auto * t = ast->as<ASTIdentifier>()) |
196 | visit(*t, ast, data); |
197 | |
198 | if (ast->as<ASTAsterisk>() || ast->as<ASTQualifiedAsterisk>()) |
199 | throw Exception("Multiple JOIN do not support asterisks for complex queries yet" , ErrorCodes::NOT_IMPLEMENTED); |
200 | } |
201 | |
202 | static void visit(const ASTIdentifier & const_node, const ASTPtr &, Data & data) |
203 | { |
204 | ASTIdentifier & node = const_cast<ASTIdentifier &>(const_node); /// we know it's not const |
205 | if (node.isShort()) |
206 | return; |
207 | |
208 | bool last_table = false; |
209 | String long_name; |
210 | |
211 | size_t table_pos = 0; |
212 | if (IdentifierSemantic::chooseTable(node, data.tables, table_pos)) |
213 | { |
214 | auto & table = data.tables[table_pos]; |
215 | IdentifierSemantic::setColumnLongName(node, table); /// table_name.column_name -> table_alias.column_name |
216 | long_name = node.name; |
217 | if (&table == &data.tables.back()) |
218 | last_table = true; |
219 | } |
220 | |
221 | if (long_name.empty()) |
222 | throw Exception("Cannot refer column '" + node.name + "' to table" , ErrorCodes::AMBIGUOUS_COLUMN_NAME); |
223 | |
224 | String alias = node.tryGetAlias(); |
225 | if (!alias.empty()) |
226 | { |
227 | data.aliases[alias] = long_name; |
228 | data.rev_aliases[long_name].push_back(alias); |
229 | |
230 | if (!last_table) |
231 | { |
232 | IdentifierSemantic::coverName(node, alias); |
233 | node.setAlias("" ); |
234 | } |
235 | } |
236 | else if (node.compound()) |
237 | data.compound_identifiers.emplace_back(&node, data.public_names); |
238 | } |
239 | }; |
240 | |
241 | /// Attach additional semantic info to generated selects. |
242 | struct AppendSemanticVisitorData |
243 | { |
244 | using TypeToVisit = ASTSelectQuery; |
245 | |
246 | AsteriskSemantic::RevertedAliasesPtr rev_aliases = {}; |
247 | bool done = false; |
248 | |
249 | void visit(ASTSelectQuery & select, ASTPtr &) |
250 | { |
251 | if (done || !rev_aliases || !select.select()) |
252 | return; |
253 | |
254 | for (auto & child : select.select()->children) |
255 | { |
256 | if (auto * node = child->as<ASTAsterisk>()) |
257 | AsteriskSemantic::setAliases(*node, rev_aliases); |
258 | if (auto * node = child->as<ASTQualifiedAsterisk>()) |
259 | AsteriskSemantic::setAliases(*node, rev_aliases); |
260 | } |
261 | |
262 | done = true; |
263 | } |
264 | }; |
265 | |
266 | |
267 | /// Replaces table elements with pair. |
268 | struct RewriteTablesVisitorData |
269 | { |
270 | using TypeToVisit = ASTTablesInSelectQuery; |
271 | |
272 | ASTPtr left; |
273 | ASTPtr right; |
274 | bool done = false; |
275 | |
276 | /// @note Do not change ASTTablesInSelectQuery itself. No need to change select.tables. |
277 | void visit(ASTTablesInSelectQuery &, ASTPtr & ast) |
278 | { |
279 | if (done) |
280 | return; |
281 | std::vector<ASTPtr> new_tables{left, right}; |
282 | ast->children.swap(new_tables); |
283 | done = true; |
284 | } |
285 | }; |
286 | |
287 | /// Attach alias to the first visited subquery |
288 | struct SetSubqueryAliasVisitorData |
289 | { |
290 | using TypeToVisit = ASTSubquery; |
291 | |
292 | const String & alias; |
293 | bool done = false; |
294 | |
295 | void visit(ASTSubquery &, ASTPtr & ast) |
296 | { |
297 | if (done) |
298 | return; |
299 | ast->setAlias(alias); |
300 | done = true; |
301 | } |
302 | }; |
303 | |
304 | bool needRewrite(ASTSelectQuery & select, std::vector<const ASTTableExpression *> & table_expressions) |
305 | { |
306 | if (!select.tables()) |
307 | return false; |
308 | |
309 | const auto * tables = select.tables()->as<ASTTablesInSelectQuery>(); |
310 | if (!tables) |
311 | return false; |
312 | |
313 | size_t num_tables = tables->children.size(); |
314 | if (num_tables <= 2) |
315 | return false; |
316 | |
317 | size_t num_array_join = 0; |
318 | size_t num_using = 0; |
319 | |
320 | table_expressions.reserve(num_tables); |
321 | for (size_t i = 0; i < num_tables; ++i) |
322 | { |
323 | const auto * table = tables->children[i]->as<ASTTablesInSelectQueryElement>(); |
324 | if (!table) |
325 | throw Exception("Table expected" , ErrorCodes::LOGICAL_ERROR); |
326 | |
327 | if (table->table_expression) |
328 | if (const auto * expression = table->table_expression->as<ASTTableExpression>()) |
329 | table_expressions.push_back(expression); |
330 | if (!i) |
331 | continue; |
332 | |
333 | if (!table->table_join && !table->array_join) |
334 | throw Exception("Joined table expected" , ErrorCodes::LOGICAL_ERROR); |
335 | |
336 | if (table->array_join) |
337 | { |
338 | ++num_array_join; |
339 | continue; |
340 | } |
341 | |
342 | const auto & join = table->table_join->as<ASTTableJoin &>(); |
343 | if (isComma(join.kind)) |
344 | throw Exception("COMMA to CROSS JOIN rewriter is not enabled or cannot rewrite query" , ErrorCodes::NOT_IMPLEMENTED); |
345 | |
346 | if (join.using_expression_list) |
347 | ++num_using; |
348 | } |
349 | |
350 | if (num_tables - num_array_join <= 2) |
351 | return false; |
352 | |
353 | /// it's not trivial to support mix of JOIN ON & JOIN USING cause of short names |
354 | if (num_using) |
355 | throw Exception("Multiple JOIN does not support USING" , ErrorCodes::NOT_IMPLEMENTED); |
356 | if (num_array_join) |
357 | throw Exception("Multiple JOIN does not support mix with ARRAY JOINs" , ErrorCodes::NOT_IMPLEMENTED); |
358 | return true; |
359 | } |
360 | |
361 | using RewriteMatcher = OneTypeMatcher<RewriteTablesVisitorData>; |
362 | using RewriteVisitor = InDepthNodeVisitor<RewriteMatcher, true>; |
363 | using SetSubqueryAliasMatcher = OneTypeMatcher<SetSubqueryAliasVisitorData>; |
364 | using SetSubqueryAliasVisitor = InDepthNodeVisitor<SetSubqueryAliasMatcher, true>; |
365 | using = ConstInDepthNodeVisitor<ExtractAsterisksMatcher, true>; |
366 | using ColumnAliasesVisitor = ConstInDepthNodeVisitor<ColumnAliasesMatcher, true>; |
367 | using AppendSemanticMatcher = OneTypeMatcher<AppendSemanticVisitorData>; |
368 | using AppendSemanticVisitor = InDepthNodeVisitor<AppendSemanticMatcher, true>; |
369 | |
370 | } /// namelesspace |
371 | |
372 | |
373 | void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data) |
374 | { |
375 | if (auto * t = ast->as<ASTSelectQuery>()) |
376 | visit(*t, ast, data); |
377 | } |
378 | |
379 | void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & data) |
380 | { |
381 | using RevertedAliases = AsteriskSemantic::RevertedAliases; |
382 | |
383 | std::vector<const ASTTableExpression *> table_expressions; |
384 | if (!needRewrite(select, table_expressions)) |
385 | return; |
386 | |
387 | ExtractAsterisksVisitor::Data asterisks_data(data.context, table_expressions); |
388 | if (!asterisks_data.table_columns.empty()) |
389 | { |
390 | ExtractAsterisksVisitor(asterisks_data).visit(select.select()); |
391 | if (asterisks_data.new_select_expression_list) |
392 | select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(asterisks_data.new_select_expression_list)); |
393 | } |
394 | |
395 | ColumnAliasesVisitor::Data aliases_data(getDatabaseAndTables(select, "" )); |
396 | if (select.select()) |
397 | { |
398 | aliases_data.public_names = true; |
399 | ColumnAliasesVisitor(aliases_data).visit(select.select()); |
400 | aliases_data.public_names = false; |
401 | } |
402 | if (select.where()) |
403 | ColumnAliasesVisitor(aliases_data).visit(select.where()); |
404 | if (select.prewhere()) |
405 | ColumnAliasesVisitor(aliases_data).visit(select.prewhere()); |
406 | if (select.orderBy()) |
407 | ColumnAliasesVisitor(aliases_data).visit(select.orderBy()); |
408 | if (select.groupBy()) |
409 | ColumnAliasesVisitor(aliases_data).visit(select.groupBy()); |
410 | if (select.having()) |
411 | ColumnAliasesVisitor(aliases_data).visit(select.having()); |
412 | |
413 | /// JOIN sections |
414 | for (auto & child : select.tables()->children) |
415 | { |
416 | auto * table = child->as<ASTTablesInSelectQueryElement>(); |
417 | if (table->table_join) |
418 | { |
419 | auto & join = table->table_join->as<ASTTableJoin &>(); |
420 | if (join.on_expression) |
421 | ColumnAliasesVisitor(aliases_data).visit(join.on_expression); |
422 | } |
423 | } |
424 | |
425 | aliases_data.replaceIdentifiersWithAliases(); |
426 | |
427 | auto rev_aliases = std::make_shared<RevertedAliases>(); |
428 | rev_aliases->swap(aliases_data.rev_aliases); |
429 | |
430 | auto & src_tables = select.tables()->children; |
431 | ASTPtr left_table = src_tables[0]; |
432 | |
433 | for (size_t i = 1; i < src_tables.size() - 1; ++i) |
434 | { |
435 | left_table = replaceJoin(left_table, src_tables[i]); |
436 | if (!left_table) |
437 | throw Exception("Cannot replace tables with subselect" , ErrorCodes::LOGICAL_ERROR); |
438 | |
439 | /// attach an alias to subquery. |
440 | /// TODO: remove setting check after testing period |
441 | if (data.context.getSettingsRef().joined_subquery_requires_alias) |
442 | { |
443 | SetSubqueryAliasVisitor::Data alias_data{String("--.join" ) + std::to_string(i)}; |
444 | SetSubqueryAliasVisitor(alias_data).visit(left_table); |
445 | } |
446 | |
447 | /// attach data to generated asterisk |
448 | AppendSemanticVisitor::Data semantic_data{rev_aliases, false}; |
449 | AppendSemanticVisitor(semantic_data).visit(left_table); |
450 | } |
451 | |
452 | /// replace tables in select with generated two-table join |
453 | RewriteVisitor::Data visitor_data{left_table, src_tables.back()}; |
454 | RewriteVisitor(visitor_data).visit(select.refTables()); |
455 | |
456 | data.done = true; |
457 | } |
458 | |
459 | static ASTPtr makeSubqueryTemplate() |
460 | { |
461 | ParserTablesInSelectQueryElement parser(true); |
462 | ASTPtr subquery_template = parseQuery(parser, "(select * from _t)" , 0); |
463 | if (!subquery_template) |
464 | throw Exception("Cannot parse subquery template" , ErrorCodes::LOGICAL_ERROR); |
465 | return subquery_template; |
466 | } |
467 | |
468 | ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTPtr ast_left, ASTPtr ast_right) |
469 | { |
470 | const auto * left = ast_left->as<ASTTablesInSelectQueryElement>(); |
471 | const auto * right = ast_right->as<ASTTablesInSelectQueryElement>(); |
472 | if (!left || !right) |
473 | throw Exception("Two TablesInSelectQueryElements expected" , ErrorCodes::LOGICAL_ERROR); |
474 | |
475 | if (!right->table_join) |
476 | throw Exception("Table join expected" , ErrorCodes::LOGICAL_ERROR); |
477 | |
478 | static ASTPtr subquery_template = makeSubqueryTemplate(); |
479 | |
480 | /// replace '_t' with pair of joined tables |
481 | ASTPtr res = subquery_template->clone(); |
482 | RewriteVisitor::Data visitor_data{ast_left, ast_right}; |
483 | RewriteVisitor(visitor_data).visit(res); |
484 | return res; |
485 | } |
486 | |
487 | } |
488 | |