1#include <Parsers/queryToString.h>
2
3#include <Interpreters/CollectJoinOnKeysVisitor.h>
4#include <Interpreters/IdentifierSemantic.h>
5#include <Interpreters/AnalyzedJoin.h>
6
7namespace DB
8{
9
10namespace ErrorCodes
11{
12 extern const int INVALID_JOIN_ON_EXPRESSION;
13 extern const int AMBIGUOUS_COLUMN_NAME;
14 extern const int NOT_IMPLEMENTED;
15 extern const int LOGICAL_ERROR;
16}
17
18void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast,
19 const std::pair<size_t, size_t> & table_no)
20{
21 ASTPtr left = left_ast->clone();
22 ASTPtr right = right_ast->clone();
23
24 if (table_no.first == 1 || table_no.second == 2)
25 analyzed_join.addOnKeys(left, right);
26 else if (table_no.first == 2 || table_no.second == 1)
27 analyzed_join.addOnKeys(right, left);
28 else
29 throw Exception("Cannot detect left and right JOIN keys. JOIN ON section is ambiguous.",
30 ErrorCodes::AMBIGUOUS_COLUMN_NAME);
31 has_some = true;
32}
33
34void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast,
35 const std::pair<size_t, size_t> & table_no, const ASOF::Inequality & inequality)
36{
37 if (table_no.first == 1 || table_no.second == 2)
38 {
39 asof_left_key = left_ast->clone();
40 asof_right_key = right_ast->clone();
41 analyzed_join.setAsofInequality(inequality);
42 }
43 else if (table_no.first == 2 || table_no.second == 1)
44 {
45 asof_left_key = right_ast->clone();
46 asof_right_key = left_ast->clone();
47 analyzed_join.setAsofInequality(ASOF::reverseInequality(inequality));
48 }
49}
50
51void CollectJoinOnKeysMatcher::Data::asofToJoinKeys()
52{
53 if (!asof_left_key || !asof_right_key)
54 throw Exception("No inequality in ASOF JOIN ON section.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
55 addJoinKeys(asof_left_key, asof_right_key, {1, 2});
56}
57
58
59void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & ast, Data & data)
60{
61 if (func.name == "and")
62 return; /// go into children
63
64 if (func.name == "equals")
65 {
66 ASTPtr left = func.arguments->children.at(0);
67 ASTPtr right = func.arguments->children.at(1);
68 auto table_numbers = getTableNumbers(ast, left, right, data);
69 data.addJoinKeys(left, right, table_numbers);
70 return;
71 }
72
73 ASOF::Inequality inequality = ASOF::getInequality(func.name);
74
75 if (data.is_asof && (inequality != ASOF::Inequality::None))
76 {
77 if (data.asof_left_key || data.asof_right_key)
78 throwSyntaxException("ASOF JOIN expects exactly one inequality in ON section, unexpected " + queryToString(ast) + ".");
79
80 ASTPtr left = func.arguments->children.at(0);
81 ASTPtr right = func.arguments->children.at(1);
82 auto table_numbers = getTableNumbers(ast, left, right, data);
83
84 data.addAsofJoinKeys(left, right, table_numbers, inequality);
85 return;
86 }
87
88 throwSyntaxException("Expected equals expression, got " + queryToString(ast) + ".");
89}
90
91void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out)
92{
93 if (const auto * ident = ast->as<ASTIdentifier>())
94 {
95 if (IdentifierSemantic::getColumnName(*ident))
96 out.push_back(ident);
97 return;
98 }
99
100 for (const auto & child : ast->children)
101 getIdentifiers(child, out);
102}
103
104std::pair<size_t, size_t> CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast,
105 Data & data)
106{
107 std::vector<const ASTIdentifier *> left_identifiers;
108 std::vector<const ASTIdentifier *> right_identifiers;
109
110 getIdentifiers(left_ast, left_identifiers);
111 getIdentifiers(right_ast, right_identifiers);
112
113 size_t left_idents_table = getTableForIdentifiers(left_identifiers, data);
114 size_t right_idents_table = getTableForIdentifiers(right_identifiers, data);
115
116 if (left_idents_table && left_idents_table == right_idents_table)
117 {
118 auto left_name = queryToString(*left_identifiers[0]);
119 auto right_name = queryToString(*right_identifiers[0]);
120
121 throwSyntaxException("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name
122 + " are from the same table but from different arguments of equal function.");
123 }
124
125 return std::make_pair(left_idents_table, right_idents_table);
126}
127
128const ASTIdentifier * CollectJoinOnKeysMatcher::unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases)
129{
130 if (identifier->compound())
131 return identifier;
132
133 UInt32 max_attempts = 100;
134 for (auto it = aliases.find(identifier->name); it != aliases.end();)
135 {
136 const ASTIdentifier * parent = identifier;
137 identifier = it->second->as<ASTIdentifier>();
138 if (!identifier)
139 break; /// not a column alias
140 if (identifier == parent)
141 break; /// alias to itself with the same name: 'a as a'
142 if (identifier->compound())
143 break; /// not an alias. Break to prevent cycle through short names: 'a as b, t1.b as a'
144
145 it = aliases.find(identifier->name);
146 if (!max_attempts--)
147 throw Exception("Cannot unroll aliases for '" + identifier->name + "'", ErrorCodes::LOGICAL_ERROR);
148 }
149
150 return identifier;
151}
152
153/// @returns 1 if identifiers belongs to left table, 2 for right table and 0 if unknown. Throws on table mix.
154/// Place detected identifier into identifiers[0] if any.
155size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data)
156{
157 size_t table_number = 0;
158
159 for (auto & ident : identifiers)
160 {
161 const ASTIdentifier * identifier = unrollAliases(ident, data.aliases);
162 if (!identifier)
163 continue;
164
165 /// Column name could be cropped to a short form in TranslateQualifiedNamesVisitor.
166 /// In this case it saves membership in IdentifierSemantic.
167 auto opt = IdentifierSemantic::getMembership(*identifier);
168 size_t membership = opt ? (*opt + 1) : 0;
169
170 if (!membership)
171 {
172 const String & name = identifier->name;
173 bool in_left_table = data.left_table.hasColumn(name);
174 bool in_right_table = data.right_table.hasColumn(name);
175
176 if (in_left_table && in_right_table)
177 {
178 /// Relax ambiguous check for multiple JOINs
179 if (auto original_name = IdentifierSemantic::uncover(*identifier))
180 {
181 auto match = IdentifierSemantic::canReferColumnToTable(*original_name, data.right_table.table);
182 if (match == IdentifierSemantic::ColumnMatch::NoMatch)
183 in_right_table = false;
184 in_left_table = !in_right_table;
185 }
186 else
187 throw Exception("Column '" + name + "' is ambiguous", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
188 }
189
190 if (in_left_table)
191 membership = 1;
192 if (in_right_table)
193 membership = 2;
194 }
195
196 if (membership && table_number == 0)
197 {
198 table_number = membership;
199 std::swap(ident, identifiers[0]); /// move first detected identifier to the first position
200 }
201
202 if (membership && membership != table_number)
203 {
204 throw Exception("Invalid columns in JOIN ON section. Columns "
205 + identifiers[0]->getAliasOrColumnName() + " and " + ident->getAliasOrColumnName()
206 + " are from different tables.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
207 }
208 }
209
210 return table_number;
211}
212
213[[noreturn]] void CollectJoinOnKeysMatcher::throwSyntaxException(const String & msg)
214{
215 throw Exception("Invalid expression for JOIN ON. " + msg +
216 " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) "
217 "[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]",
218 ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
219}
220
221}
222