1 | #include <Parsers/queryToString.h> |
2 | |
3 | #include <Interpreters/CollectJoinOnKeysVisitor.h> |
4 | #include <Interpreters/IdentifierSemantic.h> |
5 | #include <Interpreters/AnalyzedJoin.h> |
6 | |
7 | namespace DB |
8 | { |
9 | |
10 | namespace ErrorCodes |
11 | { |
12 | extern const int INVALID_JOIN_ON_EXPRESSION; |
13 | extern const int AMBIGUOUS_COLUMN_NAME; |
14 | extern const int NOT_IMPLEMENTED; |
15 | extern const int LOGICAL_ERROR; |
16 | } |
17 | |
18 | void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, |
19 | const std::pair<size_t, size_t> & table_no) |
20 | { |
21 | ASTPtr left = left_ast->clone(); |
22 | ASTPtr right = right_ast->clone(); |
23 | |
24 | if (table_no.first == 1 || table_no.second == 2) |
25 | analyzed_join.addOnKeys(left, right); |
26 | else if (table_no.first == 2 || table_no.second == 1) |
27 | analyzed_join.addOnKeys(right, left); |
28 | else |
29 | throw Exception("Cannot detect left and right JOIN keys. JOIN ON section is ambiguous." , |
30 | ErrorCodes::AMBIGUOUS_COLUMN_NAME); |
31 | has_some = true; |
32 | } |
33 | |
34 | void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, |
35 | const std::pair<size_t, size_t> & table_no, const ASOF::Inequality & inequality) |
36 | { |
37 | if (table_no.first == 1 || table_no.second == 2) |
38 | { |
39 | asof_left_key = left_ast->clone(); |
40 | asof_right_key = right_ast->clone(); |
41 | analyzed_join.setAsofInequality(inequality); |
42 | } |
43 | else if (table_no.first == 2 || table_no.second == 1) |
44 | { |
45 | asof_left_key = right_ast->clone(); |
46 | asof_right_key = left_ast->clone(); |
47 | analyzed_join.setAsofInequality(ASOF::reverseInequality(inequality)); |
48 | } |
49 | } |
50 | |
51 | void CollectJoinOnKeysMatcher::Data::asofToJoinKeys() |
52 | { |
53 | if (!asof_left_key || !asof_right_key) |
54 | throw Exception("No inequality in ASOF JOIN ON section." , ErrorCodes::INVALID_JOIN_ON_EXPRESSION); |
55 | addJoinKeys(asof_left_key, asof_right_key, {1, 2}); |
56 | } |
57 | |
58 | |
59 | void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & ast, Data & data) |
60 | { |
61 | if (func.name == "and" ) |
62 | return; /// go into children |
63 | |
64 | if (func.name == "equals" ) |
65 | { |
66 | ASTPtr left = func.arguments->children.at(0); |
67 | ASTPtr right = func.arguments->children.at(1); |
68 | auto table_numbers = getTableNumbers(ast, left, right, data); |
69 | data.addJoinKeys(left, right, table_numbers); |
70 | return; |
71 | } |
72 | |
73 | ASOF::Inequality inequality = ASOF::getInequality(func.name); |
74 | |
75 | if (data.is_asof && (inequality != ASOF::Inequality::None)) |
76 | { |
77 | if (data.asof_left_key || data.asof_right_key) |
78 | throwSyntaxException("ASOF JOIN expects exactly one inequality in ON section, unexpected " + queryToString(ast) + "." ); |
79 | |
80 | ASTPtr left = func.arguments->children.at(0); |
81 | ASTPtr right = func.arguments->children.at(1); |
82 | auto table_numbers = getTableNumbers(ast, left, right, data); |
83 | |
84 | data.addAsofJoinKeys(left, right, table_numbers, inequality); |
85 | return; |
86 | } |
87 | |
88 | throwSyntaxException("Expected equals expression, got " + queryToString(ast) + "." ); |
89 | } |
90 | |
91 | void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out) |
92 | { |
93 | if (const auto * ident = ast->as<ASTIdentifier>()) |
94 | { |
95 | if (IdentifierSemantic::getColumnName(*ident)) |
96 | out.push_back(ident); |
97 | return; |
98 | } |
99 | |
100 | for (const auto & child : ast->children) |
101 | getIdentifiers(child, out); |
102 | } |
103 | |
104 | std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast, |
105 | Data & data) |
106 | { |
107 | std::vector<const ASTIdentifier *> left_identifiers; |
108 | std::vector<const ASTIdentifier *> right_identifiers; |
109 | |
110 | getIdentifiers(left_ast, left_identifiers); |
111 | getIdentifiers(right_ast, right_identifiers); |
112 | |
113 | size_t left_idents_table = getTableForIdentifiers(left_identifiers, data); |
114 | size_t right_idents_table = getTableForIdentifiers(right_identifiers, data); |
115 | |
116 | if (left_idents_table && left_idents_table == right_idents_table) |
117 | { |
118 | auto left_name = queryToString(*left_identifiers[0]); |
119 | auto right_name = queryToString(*right_identifiers[0]); |
120 | |
121 | throwSyntaxException("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name |
122 | + " are from the same table but from different arguments of equal function." ); |
123 | } |
124 | |
125 | return std::make_pair(left_idents_table, right_idents_table); |
126 | } |
127 | |
128 | const ASTIdentifier * CollectJoinOnKeysMatcher::unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases) |
129 | { |
130 | if (identifier->compound()) |
131 | return identifier; |
132 | |
133 | UInt32 max_attempts = 100; |
134 | for (auto it = aliases.find(identifier->name); it != aliases.end();) |
135 | { |
136 | const ASTIdentifier * parent = identifier; |
137 | identifier = it->second->as<ASTIdentifier>(); |
138 | if (!identifier) |
139 | break; /// not a column alias |
140 | if (identifier == parent) |
141 | break; /// alias to itself with the same name: 'a as a' |
142 | if (identifier->compound()) |
143 | break; /// not an alias. Break to prevent cycle through short names: 'a as b, t1.b as a' |
144 | |
145 | it = aliases.find(identifier->name); |
146 | if (!max_attempts--) |
147 | throw Exception("Cannot unroll aliases for '" + identifier->name + "'" , ErrorCodes::LOGICAL_ERROR); |
148 | } |
149 | |
150 | return identifier; |
151 | } |
152 | |
153 | /// @returns 1 if identifiers belongs to left table, 2 for right table and 0 if unknown. Throws on table mix. |
154 | /// Place detected identifier into identifiers[0] if any. |
155 | size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data) |
156 | { |
157 | size_t table_number = 0; |
158 | |
159 | for (auto & ident : identifiers) |
160 | { |
161 | const ASTIdentifier * identifier = unrollAliases(ident, data.aliases); |
162 | if (!identifier) |
163 | continue; |
164 | |
165 | /// Column name could be cropped to a short form in TranslateQualifiedNamesVisitor. |
166 | /// In this case it saves membership in IdentifierSemantic. |
167 | auto opt = IdentifierSemantic::getMembership(*identifier); |
168 | size_t membership = opt ? (*opt + 1) : 0; |
169 | |
170 | if (!membership) |
171 | { |
172 | const String & name = identifier->name; |
173 | bool in_left_table = data.left_table.hasColumn(name); |
174 | bool in_right_table = data.right_table.hasColumn(name); |
175 | |
176 | if (in_left_table && in_right_table) |
177 | { |
178 | /// Relax ambiguous check for multiple JOINs |
179 | if (auto original_name = IdentifierSemantic::uncover(*identifier)) |
180 | { |
181 | auto match = IdentifierSemantic::canReferColumnToTable(*original_name, data.right_table.table); |
182 | if (match == IdentifierSemantic::ColumnMatch::NoMatch) |
183 | in_right_table = false; |
184 | in_left_table = !in_right_table; |
185 | } |
186 | else |
187 | throw Exception("Column '" + name + "' is ambiguous" , ErrorCodes::AMBIGUOUS_COLUMN_NAME); |
188 | } |
189 | |
190 | if (in_left_table) |
191 | membership = 1; |
192 | if (in_right_table) |
193 | membership = 2; |
194 | } |
195 | |
196 | if (membership && table_number == 0) |
197 | { |
198 | table_number = membership; |
199 | std::swap(ident, identifiers[0]); /// move first detected identifier to the first position |
200 | } |
201 | |
202 | if (membership && membership != table_number) |
203 | { |
204 | throw Exception("Invalid columns in JOIN ON section. Columns " |
205 | + identifiers[0]->getAliasOrColumnName() + " and " + ident->getAliasOrColumnName() |
206 | + " are from different tables." , ErrorCodes::INVALID_JOIN_ON_EXPRESSION); |
207 | } |
208 | } |
209 | |
210 | return table_number; |
211 | } |
212 | |
213 | [[noreturn]] void CollectJoinOnKeysMatcher::throwSyntaxException(const String & msg) |
214 | { |
215 | throw Exception("Invalid expression for JOIN ON. " + msg + |
216 | " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) " |
217 | "[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]" , |
218 | ErrorCodes::INVALID_JOIN_ON_EXPRESSION); |
219 | } |
220 | |
221 | } |
222 | |