| 1 | #include <Parsers/queryToString.h> | 
|---|
| 2 |  | 
|---|
| 3 | #include <Interpreters/CollectJoinOnKeysVisitor.h> | 
|---|
| 4 | #include <Interpreters/IdentifierSemantic.h> | 
|---|
| 5 | #include <Interpreters/AnalyzedJoin.h> | 
|---|
| 6 |  | 
|---|
| 7 | namespace DB | 
|---|
| 8 | { | 
|---|
| 9 |  | 
|---|
| 10 | namespace ErrorCodes | 
|---|
| 11 | { | 
|---|
| 12 | extern const int INVALID_JOIN_ON_EXPRESSION; | 
|---|
| 13 | extern const int AMBIGUOUS_COLUMN_NAME; | 
|---|
| 14 | extern const int NOT_IMPLEMENTED; | 
|---|
| 15 | extern const int LOGICAL_ERROR; | 
|---|
| 16 | } | 
|---|
| 17 |  | 
|---|
| 18 | void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, | 
|---|
| 19 | const std::pair<size_t, size_t> & table_no) | 
|---|
| 20 | { | 
|---|
| 21 | ASTPtr left = left_ast->clone(); | 
|---|
| 22 | ASTPtr right = right_ast->clone(); | 
|---|
| 23 |  | 
|---|
| 24 | if (table_no.first == 1 || table_no.second == 2) | 
|---|
| 25 | analyzed_join.addOnKeys(left, right); | 
|---|
| 26 | else if (table_no.first == 2 || table_no.second == 1) | 
|---|
| 27 | analyzed_join.addOnKeys(right, left); | 
|---|
| 28 | else | 
|---|
| 29 | throw Exception( "Cannot detect left and right JOIN keys. JOIN ON section is ambiguous.", | 
|---|
| 30 | ErrorCodes::AMBIGUOUS_COLUMN_NAME); | 
|---|
| 31 | has_some = true; | 
|---|
| 32 | } | 
|---|
| 33 |  | 
|---|
| 34 | void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, | 
|---|
| 35 | const std::pair<size_t, size_t> & table_no, const ASOF::Inequality & inequality) | 
|---|
| 36 | { | 
|---|
| 37 | if (table_no.first == 1 || table_no.second == 2) | 
|---|
| 38 | { | 
|---|
| 39 | asof_left_key = left_ast->clone(); | 
|---|
| 40 | asof_right_key = right_ast->clone(); | 
|---|
| 41 | analyzed_join.setAsofInequality(inequality); | 
|---|
| 42 | } | 
|---|
| 43 | else if (table_no.first == 2 || table_no.second == 1) | 
|---|
| 44 | { | 
|---|
| 45 | asof_left_key = right_ast->clone(); | 
|---|
| 46 | asof_right_key = left_ast->clone(); | 
|---|
| 47 | analyzed_join.setAsofInequality(ASOF::reverseInequality(inequality)); | 
|---|
| 48 | } | 
|---|
| 49 | } | 
|---|
| 50 |  | 
|---|
| 51 | void CollectJoinOnKeysMatcher::Data::asofToJoinKeys() | 
|---|
| 52 | { | 
|---|
| 53 | if (!asof_left_key || !asof_right_key) | 
|---|
| 54 | throw Exception( "No inequality in ASOF JOIN ON section.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION); | 
|---|
| 55 | addJoinKeys(asof_left_key, asof_right_key, {1, 2}); | 
|---|
| 56 | } | 
|---|
| 57 |  | 
|---|
| 58 |  | 
|---|
| 59 | void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & ast, Data & data) | 
|---|
| 60 | { | 
|---|
| 61 | if (func.name == "and") | 
|---|
| 62 | return; /// go into children | 
|---|
| 63 |  | 
|---|
| 64 | if (func.name == "equals") | 
|---|
| 65 | { | 
|---|
| 66 | ASTPtr left = func.arguments->children.at(0); | 
|---|
| 67 | ASTPtr right = func.arguments->children.at(1); | 
|---|
| 68 | auto table_numbers = getTableNumbers(ast, left, right, data); | 
|---|
| 69 | data.addJoinKeys(left, right, table_numbers); | 
|---|
| 70 | return; | 
|---|
| 71 | } | 
|---|
| 72 |  | 
|---|
| 73 | ASOF::Inequality inequality = ASOF::getInequality(func.name); | 
|---|
| 74 |  | 
|---|
| 75 | if (data.is_asof && (inequality != ASOF::Inequality::None)) | 
|---|
| 76 | { | 
|---|
| 77 | if (data.asof_left_key || data.asof_right_key) | 
|---|
| 78 | throwSyntaxException( "ASOF JOIN expects exactly one inequality in ON section, unexpected "+ queryToString(ast) + "."); | 
|---|
| 79 |  | 
|---|
| 80 | ASTPtr left = func.arguments->children.at(0); | 
|---|
| 81 | ASTPtr right = func.arguments->children.at(1); | 
|---|
| 82 | auto table_numbers = getTableNumbers(ast, left, right, data); | 
|---|
| 83 |  | 
|---|
| 84 | data.addAsofJoinKeys(left, right, table_numbers, inequality); | 
|---|
| 85 | return; | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | throwSyntaxException( "Expected equals expression, got "+ queryToString(ast) + "."); | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out) | 
|---|
| 92 | { | 
|---|
| 93 | if (const auto * ident = ast->as<ASTIdentifier>()) | 
|---|
| 94 | { | 
|---|
| 95 | if (IdentifierSemantic::getColumnName(*ident)) | 
|---|
| 96 | out.push_back(ident); | 
|---|
| 97 | return; | 
|---|
| 98 | } | 
|---|
| 99 |  | 
|---|
| 100 | for (const auto & child : ast->children) | 
|---|
| 101 | getIdentifiers(child, out); | 
|---|
| 102 | } | 
|---|
| 103 |  | 
|---|
| 104 | std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast, | 
|---|
| 105 | Data & data) | 
|---|
| 106 | { | 
|---|
| 107 | std::vector<const ASTIdentifier *> left_identifiers; | 
|---|
| 108 | std::vector<const ASTIdentifier *> right_identifiers; | 
|---|
| 109 |  | 
|---|
| 110 | getIdentifiers(left_ast, left_identifiers); | 
|---|
| 111 | getIdentifiers(right_ast, right_identifiers); | 
|---|
| 112 |  | 
|---|
| 113 | size_t left_idents_table = getTableForIdentifiers(left_identifiers, data); | 
|---|
| 114 | size_t right_idents_table = getTableForIdentifiers(right_identifiers, data); | 
|---|
| 115 |  | 
|---|
| 116 | if (left_idents_table && left_idents_table == right_idents_table) | 
|---|
| 117 | { | 
|---|
| 118 | auto left_name = queryToString(*left_identifiers[0]); | 
|---|
| 119 | auto right_name = queryToString(*right_identifiers[0]); | 
|---|
| 120 |  | 
|---|
| 121 | throwSyntaxException( "In expression "+ queryToString(expr) + " columns "+ left_name + " and "+ right_name | 
|---|
| 122 | + " are from the same table but from different arguments of equal function."); | 
|---|
| 123 | } | 
|---|
| 124 |  | 
|---|
| 125 | return std::make_pair(left_idents_table, right_idents_table); | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | const ASTIdentifier * CollectJoinOnKeysMatcher::unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases) | 
|---|
| 129 | { | 
|---|
| 130 | if (identifier->compound()) | 
|---|
| 131 | return identifier; | 
|---|
| 132 |  | 
|---|
| 133 | UInt32 max_attempts = 100; | 
|---|
| 134 | for (auto it = aliases.find(identifier->name); it != aliases.end();) | 
|---|
| 135 | { | 
|---|
| 136 | const ASTIdentifier * parent = identifier; | 
|---|
| 137 | identifier = it->second->as<ASTIdentifier>(); | 
|---|
| 138 | if (!identifier) | 
|---|
| 139 | break; /// not a column alias | 
|---|
| 140 | if (identifier == parent) | 
|---|
| 141 | break; /// alias to itself with the same name: 'a as a' | 
|---|
| 142 | if (identifier->compound()) | 
|---|
| 143 | break; /// not an alias. Break to prevent cycle through short names: 'a as b, t1.b as a' | 
|---|
| 144 |  | 
|---|
| 145 | it = aliases.find(identifier->name); | 
|---|
| 146 | if (!max_attempts--) | 
|---|
| 147 | throw Exception( "Cannot unroll aliases for '"+ identifier->name + "'", ErrorCodes::LOGICAL_ERROR); | 
|---|
| 148 | } | 
|---|
| 149 |  | 
|---|
| 150 | return identifier; | 
|---|
| 151 | } | 
|---|
| 152 |  | 
|---|
| 153 | /// @returns 1 if identifiers belongs to left table, 2 for right table and 0 if unknown. Throws on table mix. | 
|---|
| 154 | /// Place detected identifier into identifiers[0] if any. | 
|---|
| 155 | size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data) | 
|---|
| 156 | { | 
|---|
| 157 | size_t table_number = 0; | 
|---|
| 158 |  | 
|---|
| 159 | for (auto & ident : identifiers) | 
|---|
| 160 | { | 
|---|
| 161 | const ASTIdentifier * identifier = unrollAliases(ident, data.aliases); | 
|---|
| 162 | if (!identifier) | 
|---|
| 163 | continue; | 
|---|
| 164 |  | 
|---|
| 165 | /// Column name could be cropped to a short form in TranslateQualifiedNamesVisitor. | 
|---|
| 166 | /// In this case it saves membership in IdentifierSemantic. | 
|---|
| 167 | auto opt = IdentifierSemantic::getMembership(*identifier); | 
|---|
| 168 | size_t membership = opt ? (*opt + 1) : 0; | 
|---|
| 169 |  | 
|---|
| 170 | if (!membership) | 
|---|
| 171 | { | 
|---|
| 172 | const String & name = identifier->name; | 
|---|
| 173 | bool in_left_table = data.left_table.hasColumn(name); | 
|---|
| 174 | bool in_right_table = data.right_table.hasColumn(name); | 
|---|
| 175 |  | 
|---|
| 176 | if (in_left_table && in_right_table) | 
|---|
| 177 | { | 
|---|
| 178 | /// Relax ambiguous check for multiple JOINs | 
|---|
| 179 | if (auto original_name = IdentifierSemantic::uncover(*identifier)) | 
|---|
| 180 | { | 
|---|
| 181 | auto match = IdentifierSemantic::canReferColumnToTable(*original_name, data.right_table.table); | 
|---|
| 182 | if (match == IdentifierSemantic::ColumnMatch::NoMatch) | 
|---|
| 183 | in_right_table = false; | 
|---|
| 184 | in_left_table = !in_right_table; | 
|---|
| 185 | } | 
|---|
| 186 | else | 
|---|
| 187 | throw Exception( "Column '"+ name + "' is ambiguous", ErrorCodes::AMBIGUOUS_COLUMN_NAME); | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | if (in_left_table) | 
|---|
| 191 | membership = 1; | 
|---|
| 192 | if (in_right_table) | 
|---|
| 193 | membership = 2; | 
|---|
| 194 | } | 
|---|
| 195 |  | 
|---|
| 196 | if (membership && table_number == 0) | 
|---|
| 197 | { | 
|---|
| 198 | table_number = membership; | 
|---|
| 199 | std::swap(ident, identifiers[0]); /// move first detected identifier to the first position | 
|---|
| 200 | } | 
|---|
| 201 |  | 
|---|
| 202 | if (membership && membership != table_number) | 
|---|
| 203 | { | 
|---|
| 204 | throw Exception( "Invalid columns in JOIN ON section. Columns " | 
|---|
| 205 | + identifiers[0]->getAliasOrColumnName() + " and "+ ident->getAliasOrColumnName() | 
|---|
| 206 | + " are from different tables.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION); | 
|---|
| 207 | } | 
|---|
| 208 | } | 
|---|
| 209 |  | 
|---|
| 210 | return table_number; | 
|---|
| 211 | } | 
|---|
| 212 |  | 
|---|
| 213 | [[noreturn]] void CollectJoinOnKeysMatcher::throwSyntaxException(const String & msg) | 
|---|
| 214 | { | 
|---|
| 215 | throw Exception( "Invalid expression for JOIN ON. "+ msg + | 
|---|
| 216 | " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) " | 
|---|
| 217 | "[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]", | 
|---|
| 218 | ErrorCodes::INVALID_JOIN_ON_EXPRESSION); | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 | } | 
|---|
| 222 |  | 
|---|