1 | #include <Interpreters/AnalyzedJoin.h> |
2 | #include <Interpreters/Join.h> |
3 | #include <Interpreters/MergeJoin.h> |
4 | |
5 | #include <Parsers/ASTExpressionList.h> |
6 | |
7 | #include <Core/Settings.h> |
8 | #include <Core/Block.h> |
9 | |
10 | #include <DataTypes/DataTypeNullable.h> |
11 | |
12 | |
13 | namespace DB |
14 | { |
15 | |
16 | namespace ErrorCodes |
17 | { |
18 | extern const int LOGICAL_ERROR; |
19 | extern const int PARAMETER_OUT_OF_BOUND; |
20 | } |
21 | |
22 | AnalyzedJoin::AnalyzedJoin(const Settings & settings, const String & tmp_path_) |
23 | : size_limits(SizeLimits{settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode}) |
24 | , default_max_bytes(settings.default_max_bytes_in_join) |
25 | , join_use_nulls(settings.join_use_nulls) |
26 | , partial_merge_join(settings.partial_merge_join) |
27 | , partial_merge_join_optimizations(settings.partial_merge_join_optimizations) |
28 | , partial_merge_join_rows_in_right_blocks(settings.partial_merge_join_rows_in_right_blocks) |
29 | , tmp_path(tmp_path_) |
30 | {} |
31 | |
32 | void AnalyzedJoin::addUsingKey(const ASTPtr & ast) |
33 | { |
34 | key_names_left.push_back(ast->getColumnName()); |
35 | key_names_right.push_back(ast->getAliasOrColumnName()); |
36 | |
37 | key_asts_left.push_back(ast); |
38 | key_asts_right.push_back(ast); |
39 | |
40 | auto & right_key = key_names_right.back(); |
41 | if (renames.count(right_key)) |
42 | right_key = renames[right_key]; |
43 | } |
44 | |
45 | void AnalyzedJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast) |
46 | { |
47 | key_names_left.push_back(left_table_ast->getColumnName()); |
48 | key_names_right.push_back(right_table_ast->getAliasOrColumnName()); |
49 | |
50 | key_asts_left.push_back(left_table_ast); |
51 | key_asts_right.push_back(right_table_ast); |
52 | } |
53 | |
54 | /// @return how many times right key appears in ON section. |
55 | size_t AnalyzedJoin::rightKeyInclusion(const String & name) const |
56 | { |
57 | if (hasUsing()) |
58 | return 0; |
59 | |
60 | size_t count = 0; |
61 | for (const auto & key_name : key_names_right) |
62 | if (name == key_name) |
63 | ++count; |
64 | return count; |
65 | } |
66 | |
67 | void AnalyzedJoin::deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix) |
68 | { |
69 | NameSet joined_columns; |
70 | NamesAndTypesList dedup_columns; |
71 | |
72 | for (auto & column : columns_from_joined_table) |
73 | { |
74 | if (joined_columns.count(column.name)) |
75 | continue; |
76 | |
77 | joined_columns.insert(column.name); |
78 | |
79 | dedup_columns.push_back(column); |
80 | auto & inserted = dedup_columns.back(); |
81 | |
82 | if (left_table_columns.count(column.name)) |
83 | inserted.name = right_table_prefix + column.name; |
84 | |
85 | original_names[inserted.name] = column.name; |
86 | if (inserted.name != column.name) |
87 | renames[column.name] = inserted.name; |
88 | } |
89 | |
90 | columns_from_joined_table.swap(dedup_columns); |
91 | } |
92 | |
93 | NameSet AnalyzedJoin::getQualifiedColumnsSet() const |
94 | { |
95 | NameSet out; |
96 | for (const auto & names : original_names) |
97 | out.insert(names.first); |
98 | return out; |
99 | } |
100 | |
101 | NamesWithAliases AnalyzedJoin::getNamesWithAliases(const NameSet & required_columns) const |
102 | { |
103 | NamesWithAliases out; |
104 | for (const auto & column : required_columns) |
105 | { |
106 | auto it = original_names.find(column); |
107 | if (it != original_names.end()) |
108 | out.emplace_back(it->second, it->first); /// {original_name, name} |
109 | } |
110 | return out; |
111 | } |
112 | |
113 | ASTPtr AnalyzedJoin::leftKeysList() const |
114 | { |
115 | ASTPtr keys_list = std::make_shared<ASTExpressionList>(); |
116 | keys_list->children = key_asts_left; |
117 | return keys_list; |
118 | } |
119 | |
120 | ASTPtr AnalyzedJoin::rightKeysList() const |
121 | { |
122 | ASTPtr keys_list = std::make_shared<ASTExpressionList>(); |
123 | if (hasOn()) |
124 | keys_list->children = key_asts_right; |
125 | return keys_list; |
126 | } |
127 | |
128 | Names AnalyzedJoin::requiredJoinedNames() const |
129 | { |
130 | NameSet required_columns_set(key_names_right.begin(), key_names_right.end()); |
131 | for (const auto & joined_column : columns_added_by_join) |
132 | required_columns_set.insert(joined_column.name); |
133 | |
134 | return Names(required_columns_set.begin(), required_columns_set.end()); |
135 | } |
136 | |
137 | NameSet AnalyzedJoin::requiredRightKeys() const |
138 | { |
139 | NameSet required; |
140 | for (const auto & name : key_names_right) |
141 | for (const auto & column : columns_added_by_join) |
142 | if (name == column.name) |
143 | required.insert(name); |
144 | return required; |
145 | } |
146 | |
147 | NamesWithAliases AnalyzedJoin::getRequiredColumns(const Block & sample, const Names & action_required_columns) const |
148 | { |
149 | NameSet required_columns(action_required_columns.begin(), action_required_columns.end()); |
150 | |
151 | for (auto & column : requiredJoinedNames()) |
152 | if (!sample.has(column)) |
153 | required_columns.insert(column); |
154 | |
155 | return getNamesWithAliases(required_columns); |
156 | } |
157 | |
158 | void AnalyzedJoin::addJoinedColumn(const NameAndTypePair & joined_column) |
159 | { |
160 | if (join_use_nulls && isLeftOrFull(table_join.kind)) |
161 | { |
162 | auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type; |
163 | columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, std::move(type))); |
164 | } |
165 | else |
166 | columns_added_by_join.push_back(joined_column); |
167 | } |
168 | |
169 | void AnalyzedJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) const |
170 | { |
171 | bool right_or_full_join = isRightOrFull(table_join.kind); |
172 | bool left_or_full_join = isLeftOrFull(table_join.kind); |
173 | |
174 | for (auto & col : sample_block) |
175 | { |
176 | /// Materialize column. |
177 | /// Column is not empty if it is constant, but after Join all constants will be materialized. |
178 | /// So, we need remove constants from header. |
179 | if (col.column) |
180 | col.column = nullptr; |
181 | |
182 | bool make_nullable = join_use_nulls && right_or_full_join; |
183 | |
184 | if (make_nullable && col.type->canBeInsideNullable()) |
185 | col.type = makeNullable(col.type); |
186 | } |
187 | |
188 | for (const auto & col : columns_added_by_join) |
189 | { |
190 | auto res_type = col.type; |
191 | |
192 | bool make_nullable = join_use_nulls && left_or_full_join; |
193 | |
194 | if (make_nullable && res_type->canBeInsideNullable()) |
195 | res_type = makeNullable(res_type); |
196 | |
197 | sample_block.insert(ColumnWithTypeAndName(nullptr, res_type, col.name)); |
198 | } |
199 | } |
200 | |
201 | bool AnalyzedJoin::sameJoin(const AnalyzedJoin * x, const AnalyzedJoin * y) |
202 | { |
203 | if (!x && !y) |
204 | return true; |
205 | if (!x || !y) |
206 | return false; |
207 | |
208 | return x->table_join.kind == y->table_join.kind |
209 | && x->table_join.strictness == y->table_join.strictness |
210 | && x->key_names_left == y->key_names_left |
211 | && x->key_names_right == y->key_names_right |
212 | && x->columns_added_by_join == y->columns_added_by_join; |
213 | } |
214 | |
215 | JoinPtr makeJoin(std::shared_ptr<AnalyzedJoin> table_join, const Block & right_sample_block) |
216 | { |
217 | bool is_left_or_inner = isLeft(table_join->kind()) || isInner(table_join->kind()); |
218 | bool is_asof = (table_join->strictness() == ASTTableJoin::Strictness::Asof); |
219 | |
220 | if (table_join->partial_merge_join && !is_asof && is_left_or_inner) |
221 | return std::make_shared<MergeJoin>(table_join, right_sample_block); |
222 | return std::make_shared<Join>(table_join, right_sample_block); |
223 | } |
224 | |
225 | bool isMergeJoin(const JoinPtr & join) |
226 | { |
227 | if (join) |
228 | return typeid_cast<const MergeJoin *>(join.get()); |
229 | return false; |
230 | } |
231 | |
232 | } |
233 | |