| 1 | #include <Common/typeid_cast.h> |
| 2 | #include <Core/NamesAndTypes.h> |
| 3 | #include <Interpreters/JoinToSubqueryTransformVisitor.h> |
| 4 | #include <Interpreters/IdentifierSemantic.h> |
| 5 | #include <Interpreters/AsteriskSemantic.h> |
| 6 | #include <Interpreters/DatabaseAndTableWithAlias.h> |
| 7 | #include <Interpreters/Context.h> |
| 8 | #include <Interpreters/getTableExpressions.h> |
| 9 | #include <Parsers/ASTSelectQuery.h> |
| 10 | #include <Parsers/ASTSubquery.h> |
| 11 | #include <Parsers/ASTTablesInSelectQuery.h> |
| 12 | #include <Parsers/ASTIdentifier.h> |
| 13 | #include <Parsers/ASTExpressionList.h> |
| 14 | #include <Parsers/ParserTablesInSelectQuery.h> |
| 15 | #include <Parsers/ExpressionListParsers.h> |
| 16 | #include <Parsers/parseQuery.h> |
| 17 | #include <IO/WriteHelpers.h> |
| 18 | |
| 19 | |
| 20 | namespace DB |
| 21 | { |
| 22 | |
| 23 | namespace ErrorCodes |
| 24 | { |
| 25 | extern const int LOGICAL_ERROR; |
| 26 | extern const int TOO_DEEP_AST; |
| 27 | extern const int AMBIGUOUS_COLUMN_NAME; |
| 28 | extern const int NOT_IMPLEMENTED; |
| 29 | extern const int UNKNOWN_IDENTIFIER; |
| 30 | } |
| 31 | |
| 32 | namespace |
| 33 | { |
| 34 | |
| 35 | /// Replace asterisks in select_expression_list with column identifiers |
| 36 | class |
| 37 | { |
| 38 | public: |
| 39 | struct |
| 40 | { |
| 41 | std::unordered_map<String, NamesAndTypesList> table_columns; |
| 42 | std::vector<String> ; |
| 43 | std::shared_ptr<ASTExpressionList> ; |
| 44 | |
| 45 | (const Context & context, const std::vector<const ASTTableExpression *> & table_expressions) |
| 46 | { |
| 47 | tables_order.reserve(table_expressions.size()); |
| 48 | for (const auto & expr : table_expressions) |
| 49 | { |
| 50 | if (expr->subquery) |
| 51 | { |
| 52 | table_columns.clear(); |
| 53 | tables_order.clear(); |
| 54 | break; |
| 55 | } |
| 56 | |
| 57 | String table_name = DatabaseAndTableWithAlias(*expr, context.getCurrentDatabase()).getQualifiedNamePrefix(false); |
| 58 | NamesAndTypesList columns = getColumnsFromTableExpression(*expr, context); |
| 59 | tables_order.push_back(table_name); |
| 60 | table_columns.emplace(std::move(table_name), std::move(columns)); |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | void addTableColumns(const String & table_name) |
| 65 | { |
| 66 | auto it = table_columns.find(table_name); |
| 67 | if (it == table_columns.end()) |
| 68 | throw Exception("Unknown qualified identifier: " + table_name, ErrorCodes::UNKNOWN_IDENTIFIER); |
| 69 | |
| 70 | for (const auto & column : it->second) |
| 71 | new_select_expression_list->children.push_back( |
| 72 | std::make_shared<ASTIdentifier>(std::vector<String>{it->first, column.name})); |
| 73 | } |
| 74 | }; |
| 75 | |
| 76 | static bool (const ASTPtr &, const ASTPtr &) { return false; } |
| 77 | |
| 78 | static void (const ASTPtr & ast, Data & data) |
| 79 | { |
| 80 | if (auto * t = ast->as<ASTExpressionList>()) |
| 81 | visit(*t, ast, data); |
| 82 | } |
| 83 | |
| 84 | private: |
| 85 | static void (const ASTExpressionList & node, const ASTPtr &, Data & data) |
| 86 | { |
| 87 | bool has_asterisks = false; |
| 88 | data.new_select_expression_list = std::make_shared<ASTExpressionList>(); |
| 89 | data.new_select_expression_list->children.reserve(node.children.size()); |
| 90 | |
| 91 | for (auto & child : node.children) |
| 92 | { |
| 93 | if (child->as<ASTAsterisk>()) |
| 94 | { |
| 95 | has_asterisks = true; |
| 96 | |
| 97 | for (auto & table_name : data.tables_order) |
| 98 | data.addTableColumns(table_name); |
| 99 | } |
| 100 | else if (child->as<ASTQualifiedAsterisk>()) |
| 101 | { |
| 102 | has_asterisks = true; |
| 103 | |
| 104 | if (child->children.size() != 1) |
| 105 | throw Exception("Logical error: qualified asterisk must have exactly one child" , ErrorCodes::LOGICAL_ERROR); |
| 106 | ASTIdentifier & identifier = child->children[0]->as<ASTIdentifier &>(); |
| 107 | |
| 108 | data.addTableColumns(identifier.name); |
| 109 | } |
| 110 | else |
| 111 | data.new_select_expression_list->children.push_back(child); |
| 112 | } |
| 113 | |
| 114 | if (!has_asterisks) |
| 115 | data.new_select_expression_list.reset(); |
| 116 | } |
| 117 | }; |
| 118 | |
| 119 | /// Find columns with aliases to push them into rewritten subselects. |
| 120 | /// Normalize table aliases: table_name.column_name -> table_alias.column_name |
| 121 | /// Make aliases maps (alias -> column_name, column_name -> alias) |
| 122 | struct ColumnAliasesMatcher |
| 123 | { |
| 124 | struct Data |
| 125 | { |
| 126 | const std::vector<DatabaseAndTableWithAlias> tables; |
| 127 | bool public_names; |
| 128 | AsteriskSemantic::RevertedAliases rev_aliases; /// long_name -> aliases |
| 129 | std::unordered_map<String, String> aliases; /// alias -> long_name |
| 130 | std::vector<std::pair<ASTIdentifier *, bool>> compound_identifiers; |
| 131 | std::set<String> allowed_long_names; /// original names allowed as aliases '--t.x as t.x' (select expressions only). |
| 132 | |
| 133 | Data(const std::vector<DatabaseAndTableWithAlias> && tables_) |
| 134 | : tables(tables_) |
| 135 | , public_names(false) |
| 136 | {} |
| 137 | |
| 138 | void replaceIdentifiersWithAliases() |
| 139 | { |
| 140 | String hide_prefix = "--" ; /// @note restriction: user should not use alises like `--table.column` |
| 141 | |
| 142 | for (auto & [identifier, is_public] : compound_identifiers) |
| 143 | { |
| 144 | String long_name = identifier->name; |
| 145 | |
| 146 | auto it = rev_aliases.find(long_name); |
| 147 | if (it == rev_aliases.end()) |
| 148 | { |
| 149 | bool last_table = false; |
| 150 | { |
| 151 | size_t best_table_pos = 0; |
| 152 | if (IdentifierSemantic::chooseTable(*identifier, tables, best_table_pos)) |
| 153 | last_table = (best_table_pos + 1 == tables.size()); |
| 154 | } |
| 155 | |
| 156 | if (!last_table) |
| 157 | { |
| 158 | String alias = hide_prefix + long_name; |
| 159 | aliases[alias] = long_name; |
| 160 | rev_aliases[long_name].push_back(alias); |
| 161 | |
| 162 | IdentifierSemantic::coverName(*identifier, alias); |
| 163 | if (is_public) |
| 164 | { |
| 165 | identifier->setAlias(long_name); |
| 166 | allowed_long_names.insert(long_name); |
| 167 | } |
| 168 | } |
| 169 | else if (is_public) |
| 170 | identifier->setAlias(long_name); /// prevent crop long to short name |
| 171 | } |
| 172 | else |
| 173 | { |
| 174 | if (it->second.empty()) |
| 175 | throw Exception("No alias for '" + long_name + "'" , ErrorCodes::LOGICAL_ERROR); |
| 176 | |
| 177 | if (is_public && allowed_long_names.count(long_name)) |
| 178 | ; /// leave original name unchanged for correct output |
| 179 | else |
| 180 | IdentifierSemantic::coverName(*identifier, it->second[0]); |
| 181 | } |
| 182 | } |
| 183 | } |
| 184 | }; |
| 185 | |
| 186 | static bool needChildVisit(const ASTPtr & node, const ASTPtr &) |
| 187 | { |
| 188 | if (node->as<ASTQualifiedAsterisk>()) |
| 189 | return false; |
| 190 | return true; |
| 191 | } |
| 192 | |
| 193 | static void visit(const ASTPtr & ast, Data & data) |
| 194 | { |
| 195 | if (auto * t = ast->as<ASTIdentifier>()) |
| 196 | visit(*t, ast, data); |
| 197 | |
| 198 | if (ast->as<ASTAsterisk>() || ast->as<ASTQualifiedAsterisk>()) |
| 199 | throw Exception("Multiple JOIN do not support asterisks for complex queries yet" , ErrorCodes::NOT_IMPLEMENTED); |
| 200 | } |
| 201 | |
| 202 | static void visit(const ASTIdentifier & const_node, const ASTPtr &, Data & data) |
| 203 | { |
| 204 | ASTIdentifier & node = const_cast<ASTIdentifier &>(const_node); /// we know it's not const |
| 205 | if (node.isShort()) |
| 206 | return; |
| 207 | |
| 208 | bool last_table = false; |
| 209 | String long_name; |
| 210 | |
| 211 | size_t table_pos = 0; |
| 212 | if (IdentifierSemantic::chooseTable(node, data.tables, table_pos)) |
| 213 | { |
| 214 | auto & table = data.tables[table_pos]; |
| 215 | IdentifierSemantic::setColumnLongName(node, table); /// table_name.column_name -> table_alias.column_name |
| 216 | long_name = node.name; |
| 217 | if (&table == &data.tables.back()) |
| 218 | last_table = true; |
| 219 | } |
| 220 | |
| 221 | if (long_name.empty()) |
| 222 | throw Exception("Cannot refer column '" + node.name + "' to table" , ErrorCodes::AMBIGUOUS_COLUMN_NAME); |
| 223 | |
| 224 | String alias = node.tryGetAlias(); |
| 225 | if (!alias.empty()) |
| 226 | { |
| 227 | data.aliases[alias] = long_name; |
| 228 | data.rev_aliases[long_name].push_back(alias); |
| 229 | |
| 230 | if (!last_table) |
| 231 | { |
| 232 | IdentifierSemantic::coverName(node, alias); |
| 233 | node.setAlias("" ); |
| 234 | } |
| 235 | } |
| 236 | else if (node.compound()) |
| 237 | data.compound_identifiers.emplace_back(&node, data.public_names); |
| 238 | } |
| 239 | }; |
| 240 | |
| 241 | /// Attach additional semantic info to generated selects. |
| 242 | struct AppendSemanticVisitorData |
| 243 | { |
| 244 | using TypeToVisit = ASTSelectQuery; |
| 245 | |
| 246 | AsteriskSemantic::RevertedAliasesPtr rev_aliases = {}; |
| 247 | bool done = false; |
| 248 | |
| 249 | void visit(ASTSelectQuery & select, ASTPtr &) |
| 250 | { |
| 251 | if (done || !rev_aliases || !select.select()) |
| 252 | return; |
| 253 | |
| 254 | for (auto & child : select.select()->children) |
| 255 | { |
| 256 | if (auto * node = child->as<ASTAsterisk>()) |
| 257 | AsteriskSemantic::setAliases(*node, rev_aliases); |
| 258 | if (auto * node = child->as<ASTQualifiedAsterisk>()) |
| 259 | AsteriskSemantic::setAliases(*node, rev_aliases); |
| 260 | } |
| 261 | |
| 262 | done = true; |
| 263 | } |
| 264 | }; |
| 265 | |
| 266 | |
| 267 | /// Replaces table elements with pair. |
| 268 | struct RewriteTablesVisitorData |
| 269 | { |
| 270 | using TypeToVisit = ASTTablesInSelectQuery; |
| 271 | |
| 272 | ASTPtr left; |
| 273 | ASTPtr right; |
| 274 | bool done = false; |
| 275 | |
| 276 | /// @note Do not change ASTTablesInSelectQuery itself. No need to change select.tables. |
| 277 | void visit(ASTTablesInSelectQuery &, ASTPtr & ast) |
| 278 | { |
| 279 | if (done) |
| 280 | return; |
| 281 | std::vector<ASTPtr> new_tables{left, right}; |
| 282 | ast->children.swap(new_tables); |
| 283 | done = true; |
| 284 | } |
| 285 | }; |
| 286 | |
| 287 | /// Attach alias to the first visited subquery |
| 288 | struct SetSubqueryAliasVisitorData |
| 289 | { |
| 290 | using TypeToVisit = ASTSubquery; |
| 291 | |
| 292 | const String & alias; |
| 293 | bool done = false; |
| 294 | |
| 295 | void visit(ASTSubquery &, ASTPtr & ast) |
| 296 | { |
| 297 | if (done) |
| 298 | return; |
| 299 | ast->setAlias(alias); |
| 300 | done = true; |
| 301 | } |
| 302 | }; |
| 303 | |
| 304 | bool needRewrite(ASTSelectQuery & select, std::vector<const ASTTableExpression *> & table_expressions) |
| 305 | { |
| 306 | if (!select.tables()) |
| 307 | return false; |
| 308 | |
| 309 | const auto * tables = select.tables()->as<ASTTablesInSelectQuery>(); |
| 310 | if (!tables) |
| 311 | return false; |
| 312 | |
| 313 | size_t num_tables = tables->children.size(); |
| 314 | if (num_tables <= 2) |
| 315 | return false; |
| 316 | |
| 317 | size_t num_array_join = 0; |
| 318 | size_t num_using = 0; |
| 319 | |
| 320 | table_expressions.reserve(num_tables); |
| 321 | for (size_t i = 0; i < num_tables; ++i) |
| 322 | { |
| 323 | const auto * table = tables->children[i]->as<ASTTablesInSelectQueryElement>(); |
| 324 | if (!table) |
| 325 | throw Exception("Table expected" , ErrorCodes::LOGICAL_ERROR); |
| 326 | |
| 327 | if (table->table_expression) |
| 328 | if (const auto * expression = table->table_expression->as<ASTTableExpression>()) |
| 329 | table_expressions.push_back(expression); |
| 330 | if (!i) |
| 331 | continue; |
| 332 | |
| 333 | if (!table->table_join && !table->array_join) |
| 334 | throw Exception("Joined table expected" , ErrorCodes::LOGICAL_ERROR); |
| 335 | |
| 336 | if (table->array_join) |
| 337 | { |
| 338 | ++num_array_join; |
| 339 | continue; |
| 340 | } |
| 341 | |
| 342 | const auto & join = table->table_join->as<ASTTableJoin &>(); |
| 343 | if (isComma(join.kind)) |
| 344 | throw Exception("COMMA to CROSS JOIN rewriter is not enabled or cannot rewrite query" , ErrorCodes::NOT_IMPLEMENTED); |
| 345 | |
| 346 | if (join.using_expression_list) |
| 347 | ++num_using; |
| 348 | } |
| 349 | |
| 350 | if (num_tables - num_array_join <= 2) |
| 351 | return false; |
| 352 | |
| 353 | /// it's not trivial to support mix of JOIN ON & JOIN USING cause of short names |
| 354 | if (num_using) |
| 355 | throw Exception("Multiple JOIN does not support USING" , ErrorCodes::NOT_IMPLEMENTED); |
| 356 | if (num_array_join) |
| 357 | throw Exception("Multiple JOIN does not support mix with ARRAY JOINs" , ErrorCodes::NOT_IMPLEMENTED); |
| 358 | return true; |
| 359 | } |
| 360 | |
| 361 | using RewriteMatcher = OneTypeMatcher<RewriteTablesVisitorData>; |
| 362 | using RewriteVisitor = InDepthNodeVisitor<RewriteMatcher, true>; |
| 363 | using SetSubqueryAliasMatcher = OneTypeMatcher<SetSubqueryAliasVisitorData>; |
| 364 | using SetSubqueryAliasVisitor = InDepthNodeVisitor<SetSubqueryAliasMatcher, true>; |
| 365 | using = ConstInDepthNodeVisitor<ExtractAsterisksMatcher, true>; |
| 366 | using ColumnAliasesVisitor = ConstInDepthNodeVisitor<ColumnAliasesMatcher, true>; |
| 367 | using AppendSemanticMatcher = OneTypeMatcher<AppendSemanticVisitorData>; |
| 368 | using AppendSemanticVisitor = InDepthNodeVisitor<AppendSemanticMatcher, true>; |
| 369 | |
| 370 | } /// namelesspace |
| 371 | |
| 372 | |
| 373 | void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data) |
| 374 | { |
| 375 | if (auto * t = ast->as<ASTSelectQuery>()) |
| 376 | visit(*t, ast, data); |
| 377 | } |
| 378 | |
| 379 | void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & data) |
| 380 | { |
| 381 | using RevertedAliases = AsteriskSemantic::RevertedAliases; |
| 382 | |
| 383 | std::vector<const ASTTableExpression *> table_expressions; |
| 384 | if (!needRewrite(select, table_expressions)) |
| 385 | return; |
| 386 | |
| 387 | ExtractAsterisksVisitor::Data asterisks_data(data.context, table_expressions); |
| 388 | if (!asterisks_data.table_columns.empty()) |
| 389 | { |
| 390 | ExtractAsterisksVisitor(asterisks_data).visit(select.select()); |
| 391 | if (asterisks_data.new_select_expression_list) |
| 392 | select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(asterisks_data.new_select_expression_list)); |
| 393 | } |
| 394 | |
| 395 | ColumnAliasesVisitor::Data aliases_data(getDatabaseAndTables(select, "" )); |
| 396 | if (select.select()) |
| 397 | { |
| 398 | aliases_data.public_names = true; |
| 399 | ColumnAliasesVisitor(aliases_data).visit(select.select()); |
| 400 | aliases_data.public_names = false; |
| 401 | } |
| 402 | if (select.where()) |
| 403 | ColumnAliasesVisitor(aliases_data).visit(select.where()); |
| 404 | if (select.prewhere()) |
| 405 | ColumnAliasesVisitor(aliases_data).visit(select.prewhere()); |
| 406 | if (select.orderBy()) |
| 407 | ColumnAliasesVisitor(aliases_data).visit(select.orderBy()); |
| 408 | if (select.groupBy()) |
| 409 | ColumnAliasesVisitor(aliases_data).visit(select.groupBy()); |
| 410 | if (select.having()) |
| 411 | ColumnAliasesVisitor(aliases_data).visit(select.having()); |
| 412 | |
| 413 | /// JOIN sections |
| 414 | for (auto & child : select.tables()->children) |
| 415 | { |
| 416 | auto * table = child->as<ASTTablesInSelectQueryElement>(); |
| 417 | if (table->table_join) |
| 418 | { |
| 419 | auto & join = table->table_join->as<ASTTableJoin &>(); |
| 420 | if (join.on_expression) |
| 421 | ColumnAliasesVisitor(aliases_data).visit(join.on_expression); |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | aliases_data.replaceIdentifiersWithAliases(); |
| 426 | |
| 427 | auto rev_aliases = std::make_shared<RevertedAliases>(); |
| 428 | rev_aliases->swap(aliases_data.rev_aliases); |
| 429 | |
| 430 | auto & src_tables = select.tables()->children; |
| 431 | ASTPtr left_table = src_tables[0]; |
| 432 | |
| 433 | for (size_t i = 1; i < src_tables.size() - 1; ++i) |
| 434 | { |
| 435 | left_table = replaceJoin(left_table, src_tables[i]); |
| 436 | if (!left_table) |
| 437 | throw Exception("Cannot replace tables with subselect" , ErrorCodes::LOGICAL_ERROR); |
| 438 | |
| 439 | /// attach an alias to subquery. |
| 440 | /// TODO: remove setting check after testing period |
| 441 | if (data.context.getSettingsRef().joined_subquery_requires_alias) |
| 442 | { |
| 443 | SetSubqueryAliasVisitor::Data alias_data{String("--.join" ) + std::to_string(i)}; |
| 444 | SetSubqueryAliasVisitor(alias_data).visit(left_table); |
| 445 | } |
| 446 | |
| 447 | /// attach data to generated asterisk |
| 448 | AppendSemanticVisitor::Data semantic_data{rev_aliases, false}; |
| 449 | AppendSemanticVisitor(semantic_data).visit(left_table); |
| 450 | } |
| 451 | |
| 452 | /// replace tables in select with generated two-table join |
| 453 | RewriteVisitor::Data visitor_data{left_table, src_tables.back()}; |
| 454 | RewriteVisitor(visitor_data).visit(select.refTables()); |
| 455 | |
| 456 | data.done = true; |
| 457 | } |
| 458 | |
| 459 | static ASTPtr makeSubqueryTemplate() |
| 460 | { |
| 461 | ParserTablesInSelectQueryElement parser(true); |
| 462 | ASTPtr subquery_template = parseQuery(parser, "(select * from _t)" , 0); |
| 463 | if (!subquery_template) |
| 464 | throw Exception("Cannot parse subquery template" , ErrorCodes::LOGICAL_ERROR); |
| 465 | return subquery_template; |
| 466 | } |
| 467 | |
| 468 | ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTPtr ast_left, ASTPtr ast_right) |
| 469 | { |
| 470 | const auto * left = ast_left->as<ASTTablesInSelectQueryElement>(); |
| 471 | const auto * right = ast_right->as<ASTTablesInSelectQueryElement>(); |
| 472 | if (!left || !right) |
| 473 | throw Exception("Two TablesInSelectQueryElements expected" , ErrorCodes::LOGICAL_ERROR); |
| 474 | |
| 475 | if (!right->table_join) |
| 476 | throw Exception("Table join expected" , ErrorCodes::LOGICAL_ERROR); |
| 477 | |
| 478 | static ASTPtr subquery_template = makeSubqueryTemplate(); |
| 479 | |
| 480 | /// replace '_t' with pair of joined tables |
| 481 | ASTPtr res = subquery_template->clone(); |
| 482 | RewriteVisitor::Data visitor_data{ast_left, ast_right}; |
| 483 | RewriteVisitor(visitor_data).visit(res); |
| 484 | return res; |
| 485 | } |
| 486 | |
| 487 | } |
| 488 | |