| 1 | #include <Parsers/queryToString.h> |
| 2 | |
| 3 | #include <Interpreters/CollectJoinOnKeysVisitor.h> |
| 4 | #include <Interpreters/IdentifierSemantic.h> |
| 5 | #include <Interpreters/AnalyzedJoin.h> |
| 6 | |
| 7 | namespace DB |
| 8 | { |
| 9 | |
| 10 | namespace ErrorCodes |
| 11 | { |
| 12 | extern const int INVALID_JOIN_ON_EXPRESSION; |
| 13 | extern const int AMBIGUOUS_COLUMN_NAME; |
| 14 | extern const int NOT_IMPLEMENTED; |
| 15 | extern const int LOGICAL_ERROR; |
| 16 | } |
| 17 | |
| 18 | void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, |
| 19 | const std::pair<size_t, size_t> & table_no) |
| 20 | { |
| 21 | ASTPtr left = left_ast->clone(); |
| 22 | ASTPtr right = right_ast->clone(); |
| 23 | |
| 24 | if (table_no.first == 1 || table_no.second == 2) |
| 25 | analyzed_join.addOnKeys(left, right); |
| 26 | else if (table_no.first == 2 || table_no.second == 1) |
| 27 | analyzed_join.addOnKeys(right, left); |
| 28 | else |
| 29 | throw Exception("Cannot detect left and right JOIN keys. JOIN ON section is ambiguous." , |
| 30 | ErrorCodes::AMBIGUOUS_COLUMN_NAME); |
| 31 | has_some = true; |
| 32 | } |
| 33 | |
| 34 | void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, |
| 35 | const std::pair<size_t, size_t> & table_no, const ASOF::Inequality & inequality) |
| 36 | { |
| 37 | if (table_no.first == 1 || table_no.second == 2) |
| 38 | { |
| 39 | asof_left_key = left_ast->clone(); |
| 40 | asof_right_key = right_ast->clone(); |
| 41 | analyzed_join.setAsofInequality(inequality); |
| 42 | } |
| 43 | else if (table_no.first == 2 || table_no.second == 1) |
| 44 | { |
| 45 | asof_left_key = right_ast->clone(); |
| 46 | asof_right_key = left_ast->clone(); |
| 47 | analyzed_join.setAsofInequality(ASOF::reverseInequality(inequality)); |
| 48 | } |
| 49 | } |
| 50 | |
| 51 | void CollectJoinOnKeysMatcher::Data::asofToJoinKeys() |
| 52 | { |
| 53 | if (!asof_left_key || !asof_right_key) |
| 54 | throw Exception("No inequality in ASOF JOIN ON section." , ErrorCodes::INVALID_JOIN_ON_EXPRESSION); |
| 55 | addJoinKeys(asof_left_key, asof_right_key, {1, 2}); |
| 56 | } |
| 57 | |
| 58 | |
| 59 | void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & ast, Data & data) |
| 60 | { |
| 61 | if (func.name == "and" ) |
| 62 | return; /// go into children |
| 63 | |
| 64 | if (func.name == "equals" ) |
| 65 | { |
| 66 | ASTPtr left = func.arguments->children.at(0); |
| 67 | ASTPtr right = func.arguments->children.at(1); |
| 68 | auto table_numbers = getTableNumbers(ast, left, right, data); |
| 69 | data.addJoinKeys(left, right, table_numbers); |
| 70 | return; |
| 71 | } |
| 72 | |
| 73 | ASOF::Inequality inequality = ASOF::getInequality(func.name); |
| 74 | |
| 75 | if (data.is_asof && (inequality != ASOF::Inequality::None)) |
| 76 | { |
| 77 | if (data.asof_left_key || data.asof_right_key) |
| 78 | throwSyntaxException("ASOF JOIN expects exactly one inequality in ON section, unexpected " + queryToString(ast) + "." ); |
| 79 | |
| 80 | ASTPtr left = func.arguments->children.at(0); |
| 81 | ASTPtr right = func.arguments->children.at(1); |
| 82 | auto table_numbers = getTableNumbers(ast, left, right, data); |
| 83 | |
| 84 | data.addAsofJoinKeys(left, right, table_numbers, inequality); |
| 85 | return; |
| 86 | } |
| 87 | |
| 88 | throwSyntaxException("Expected equals expression, got " + queryToString(ast) + "." ); |
| 89 | } |
| 90 | |
| 91 | void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out) |
| 92 | { |
| 93 | if (const auto * ident = ast->as<ASTIdentifier>()) |
| 94 | { |
| 95 | if (IdentifierSemantic::getColumnName(*ident)) |
| 96 | out.push_back(ident); |
| 97 | return; |
| 98 | } |
| 99 | |
| 100 | for (const auto & child : ast->children) |
| 101 | getIdentifiers(child, out); |
| 102 | } |
| 103 | |
| 104 | std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast, |
| 105 | Data & data) |
| 106 | { |
| 107 | std::vector<const ASTIdentifier *> left_identifiers; |
| 108 | std::vector<const ASTIdentifier *> right_identifiers; |
| 109 | |
| 110 | getIdentifiers(left_ast, left_identifiers); |
| 111 | getIdentifiers(right_ast, right_identifiers); |
| 112 | |
| 113 | size_t left_idents_table = getTableForIdentifiers(left_identifiers, data); |
| 114 | size_t right_idents_table = getTableForIdentifiers(right_identifiers, data); |
| 115 | |
| 116 | if (left_idents_table && left_idents_table == right_idents_table) |
| 117 | { |
| 118 | auto left_name = queryToString(*left_identifiers[0]); |
| 119 | auto right_name = queryToString(*right_identifiers[0]); |
| 120 | |
| 121 | throwSyntaxException("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name |
| 122 | + " are from the same table but from different arguments of equal function." ); |
| 123 | } |
| 124 | |
| 125 | return std::make_pair(left_idents_table, right_idents_table); |
| 126 | } |
| 127 | |
| 128 | const ASTIdentifier * CollectJoinOnKeysMatcher::unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases) |
| 129 | { |
| 130 | if (identifier->compound()) |
| 131 | return identifier; |
| 132 | |
| 133 | UInt32 max_attempts = 100; |
| 134 | for (auto it = aliases.find(identifier->name); it != aliases.end();) |
| 135 | { |
| 136 | const ASTIdentifier * parent = identifier; |
| 137 | identifier = it->second->as<ASTIdentifier>(); |
| 138 | if (!identifier) |
| 139 | break; /// not a column alias |
| 140 | if (identifier == parent) |
| 141 | break; /// alias to itself with the same name: 'a as a' |
| 142 | if (identifier->compound()) |
| 143 | break; /// not an alias. Break to prevent cycle through short names: 'a as b, t1.b as a' |
| 144 | |
| 145 | it = aliases.find(identifier->name); |
| 146 | if (!max_attempts--) |
| 147 | throw Exception("Cannot unroll aliases for '" + identifier->name + "'" , ErrorCodes::LOGICAL_ERROR); |
| 148 | } |
| 149 | |
| 150 | return identifier; |
| 151 | } |
| 152 | |
| 153 | /// @returns 1 if identifiers belongs to left table, 2 for right table and 0 if unknown. Throws on table mix. |
| 154 | /// Place detected identifier into identifiers[0] if any. |
| 155 | size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data) |
| 156 | { |
| 157 | size_t table_number = 0; |
| 158 | |
| 159 | for (auto & ident : identifiers) |
| 160 | { |
| 161 | const ASTIdentifier * identifier = unrollAliases(ident, data.aliases); |
| 162 | if (!identifier) |
| 163 | continue; |
| 164 | |
| 165 | /// Column name could be cropped to a short form in TranslateQualifiedNamesVisitor. |
| 166 | /// In this case it saves membership in IdentifierSemantic. |
| 167 | auto opt = IdentifierSemantic::getMembership(*identifier); |
| 168 | size_t membership = opt ? (*opt + 1) : 0; |
| 169 | |
| 170 | if (!membership) |
| 171 | { |
| 172 | const String & name = identifier->name; |
| 173 | bool in_left_table = data.left_table.hasColumn(name); |
| 174 | bool in_right_table = data.right_table.hasColumn(name); |
| 175 | |
| 176 | if (in_left_table && in_right_table) |
| 177 | { |
| 178 | /// Relax ambiguous check for multiple JOINs |
| 179 | if (auto original_name = IdentifierSemantic::uncover(*identifier)) |
| 180 | { |
| 181 | auto match = IdentifierSemantic::canReferColumnToTable(*original_name, data.right_table.table); |
| 182 | if (match == IdentifierSemantic::ColumnMatch::NoMatch) |
| 183 | in_right_table = false; |
| 184 | in_left_table = !in_right_table; |
| 185 | } |
| 186 | else |
| 187 | throw Exception("Column '" + name + "' is ambiguous" , ErrorCodes::AMBIGUOUS_COLUMN_NAME); |
| 188 | } |
| 189 | |
| 190 | if (in_left_table) |
| 191 | membership = 1; |
| 192 | if (in_right_table) |
| 193 | membership = 2; |
| 194 | } |
| 195 | |
| 196 | if (membership && table_number == 0) |
| 197 | { |
| 198 | table_number = membership; |
| 199 | std::swap(ident, identifiers[0]); /// move first detected identifier to the first position |
| 200 | } |
| 201 | |
| 202 | if (membership && membership != table_number) |
| 203 | { |
| 204 | throw Exception("Invalid columns in JOIN ON section. Columns " |
| 205 | + identifiers[0]->getAliasOrColumnName() + " and " + ident->getAliasOrColumnName() |
| 206 | + " are from different tables." , ErrorCodes::INVALID_JOIN_ON_EXPRESSION); |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | return table_number; |
| 211 | } |
| 212 | |
| 213 | [[noreturn]] void CollectJoinOnKeysMatcher::throwSyntaxException(const String & msg) |
| 214 | { |
| 215 | throw Exception("Invalid expression for JOIN ON. " + msg + |
| 216 | " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) " |
| 217 | "[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]" , |
| 218 | ErrorCodes::INVALID_JOIN_ON_EXPRESSION); |
| 219 | } |
| 220 | |
| 221 | } |
| 222 | |