1#include <Interpreters/AnalyzedJoin.h>
2#include <Interpreters/Join.h>
3#include <Interpreters/MergeJoin.h>
4
5#include <Parsers/ASTExpressionList.h>
6
7#include <Core/Settings.h>
8#include <Core/Block.h>
9
10#include <DataTypes/DataTypeNullable.h>
11
12
13namespace DB
14{
15
16namespace ErrorCodes
17{
18 extern const int LOGICAL_ERROR;
19 extern const int PARAMETER_OUT_OF_BOUND;
20}
21
22AnalyzedJoin::AnalyzedJoin(const Settings & settings, const String & tmp_path_)
23 : size_limits(SizeLimits{settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode})
24 , default_max_bytes(settings.default_max_bytes_in_join)
25 , join_use_nulls(settings.join_use_nulls)
26 , partial_merge_join(settings.partial_merge_join)
27 , partial_merge_join_optimizations(settings.partial_merge_join_optimizations)
28 , partial_merge_join_rows_in_right_blocks(settings.partial_merge_join_rows_in_right_blocks)
29 , tmp_path(tmp_path_)
30{}
31
32void AnalyzedJoin::addUsingKey(const ASTPtr & ast)
33{
34 key_names_left.push_back(ast->getColumnName());
35 key_names_right.push_back(ast->getAliasOrColumnName());
36
37 key_asts_left.push_back(ast);
38 key_asts_right.push_back(ast);
39
40 auto & right_key = key_names_right.back();
41 if (renames.count(right_key))
42 right_key = renames[right_key];
43}
44
45void AnalyzedJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast)
46{
47 key_names_left.push_back(left_table_ast->getColumnName());
48 key_names_right.push_back(right_table_ast->getAliasOrColumnName());
49
50 key_asts_left.push_back(left_table_ast);
51 key_asts_right.push_back(right_table_ast);
52}
53
54/// @return how many times right key appears in ON section.
55size_t AnalyzedJoin::rightKeyInclusion(const String & name) const
56{
57 if (hasUsing())
58 return 0;
59
60 size_t count = 0;
61 for (const auto & key_name : key_names_right)
62 if (name == key_name)
63 ++count;
64 return count;
65}
66
67void AnalyzedJoin::deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix)
68{
69 NameSet joined_columns;
70 NamesAndTypesList dedup_columns;
71
72 for (auto & column : columns_from_joined_table)
73 {
74 if (joined_columns.count(column.name))
75 continue;
76
77 joined_columns.insert(column.name);
78
79 dedup_columns.push_back(column);
80 auto & inserted = dedup_columns.back();
81
82 if (left_table_columns.count(column.name))
83 inserted.name = right_table_prefix + column.name;
84
85 original_names[inserted.name] = column.name;
86 if (inserted.name != column.name)
87 renames[column.name] = inserted.name;
88 }
89
90 columns_from_joined_table.swap(dedup_columns);
91}
92
93NameSet AnalyzedJoin::getQualifiedColumnsSet() const
94{
95 NameSet out;
96 for (const auto & names : original_names)
97 out.insert(names.first);
98 return out;
99}
100
101NamesWithAliases AnalyzedJoin::getNamesWithAliases(const NameSet & required_columns) const
102{
103 NamesWithAliases out;
104 for (const auto & column : required_columns)
105 {
106 auto it = original_names.find(column);
107 if (it != original_names.end())
108 out.emplace_back(it->second, it->first); /// {original_name, name}
109 }
110 return out;
111}
112
113ASTPtr AnalyzedJoin::leftKeysList() const
114{
115 ASTPtr keys_list = std::make_shared<ASTExpressionList>();
116 keys_list->children = key_asts_left;
117 return keys_list;
118}
119
120ASTPtr AnalyzedJoin::rightKeysList() const
121{
122 ASTPtr keys_list = std::make_shared<ASTExpressionList>();
123 if (hasOn())
124 keys_list->children = key_asts_right;
125 return keys_list;
126}
127
128Names AnalyzedJoin::requiredJoinedNames() const
129{
130 NameSet required_columns_set(key_names_right.begin(), key_names_right.end());
131 for (const auto & joined_column : columns_added_by_join)
132 required_columns_set.insert(joined_column.name);
133
134 return Names(required_columns_set.begin(), required_columns_set.end());
135}
136
137NameSet AnalyzedJoin::requiredRightKeys() const
138{
139 NameSet required;
140 for (const auto & name : key_names_right)
141 for (const auto & column : columns_added_by_join)
142 if (name == column.name)
143 required.insert(name);
144 return required;
145}
146
147NamesWithAliases AnalyzedJoin::getRequiredColumns(const Block & sample, const Names & action_required_columns) const
148{
149 NameSet required_columns(action_required_columns.begin(), action_required_columns.end());
150
151 for (auto & column : requiredJoinedNames())
152 if (!sample.has(column))
153 required_columns.insert(column);
154
155 return getNamesWithAliases(required_columns);
156}
157
158void AnalyzedJoin::addJoinedColumn(const NameAndTypePair & joined_column)
159{
160 if (join_use_nulls && isLeftOrFull(table_join.kind))
161 {
162 auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type;
163 columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, std::move(type)));
164 }
165 else
166 columns_added_by_join.push_back(joined_column);
167}
168
169void AnalyzedJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) const
170{
171 bool right_or_full_join = isRightOrFull(table_join.kind);
172 bool left_or_full_join = isLeftOrFull(table_join.kind);
173
174 for (auto & col : sample_block)
175 {
176 /// Materialize column.
177 /// Column is not empty if it is constant, but after Join all constants will be materialized.
178 /// So, we need remove constants from header.
179 if (col.column)
180 col.column = nullptr;
181
182 bool make_nullable = join_use_nulls && right_or_full_join;
183
184 if (make_nullable && col.type->canBeInsideNullable())
185 col.type = makeNullable(col.type);
186 }
187
188 for (const auto & col : columns_added_by_join)
189 {
190 auto res_type = col.type;
191
192 bool make_nullable = join_use_nulls && left_or_full_join;
193
194 if (make_nullable && res_type->canBeInsideNullable())
195 res_type = makeNullable(res_type);
196
197 sample_block.insert(ColumnWithTypeAndName(nullptr, res_type, col.name));
198 }
199}
200
201bool AnalyzedJoin::sameJoin(const AnalyzedJoin * x, const AnalyzedJoin * y)
202{
203 if (!x && !y)
204 return true;
205 if (!x || !y)
206 return false;
207
208 return x->table_join.kind == y->table_join.kind
209 && x->table_join.strictness == y->table_join.strictness
210 && x->key_names_left == y->key_names_left
211 && x->key_names_right == y->key_names_right
212 && x->columns_added_by_join == y->columns_added_by_join;
213}
214
215JoinPtr makeJoin(std::shared_ptr<AnalyzedJoin> table_join, const Block & right_sample_block)
216{
217 bool is_left_or_inner = isLeft(table_join->kind()) || isInner(table_join->kind());
218 bool is_asof = (table_join->strictness() == ASTTableJoin::Strictness::Asof);
219
220 if (table_join->partial_merge_join && !is_asof && is_left_or_inner)
221 return std::make_shared<MergeJoin>(table_join, right_sample_block);
222 return std::make_shared<Join>(table_join, right_sample_block);
223}
224
225bool isMergeJoin(const JoinPtr & join)
226{
227 if (join)
228 return typeid_cast<const MergeJoin *>(join.get());
229 return false;
230}
231
232}
233