| 1 | #include <Interpreters/AnalyzedJoin.h> |
| 2 | #include <Interpreters/Join.h> |
| 3 | #include <Interpreters/MergeJoin.h> |
| 4 | |
| 5 | #include <Parsers/ASTExpressionList.h> |
| 6 | |
| 7 | #include <Core/Settings.h> |
| 8 | #include <Core/Block.h> |
| 9 | |
| 10 | #include <DataTypes/DataTypeNullable.h> |
| 11 | |
| 12 | |
| 13 | namespace DB |
| 14 | { |
| 15 | |
| 16 | namespace ErrorCodes |
| 17 | { |
| 18 | extern const int LOGICAL_ERROR; |
| 19 | extern const int PARAMETER_OUT_OF_BOUND; |
| 20 | } |
| 21 | |
| 22 | AnalyzedJoin::AnalyzedJoin(const Settings & settings, const String & tmp_path_) |
| 23 | : size_limits(SizeLimits{settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode}) |
| 24 | , default_max_bytes(settings.default_max_bytes_in_join) |
| 25 | , join_use_nulls(settings.join_use_nulls) |
| 26 | , partial_merge_join(settings.partial_merge_join) |
| 27 | , partial_merge_join_optimizations(settings.partial_merge_join_optimizations) |
| 28 | , partial_merge_join_rows_in_right_blocks(settings.partial_merge_join_rows_in_right_blocks) |
| 29 | , tmp_path(tmp_path_) |
| 30 | {} |
| 31 | |
| 32 | void AnalyzedJoin::addUsingKey(const ASTPtr & ast) |
| 33 | { |
| 34 | key_names_left.push_back(ast->getColumnName()); |
| 35 | key_names_right.push_back(ast->getAliasOrColumnName()); |
| 36 | |
| 37 | key_asts_left.push_back(ast); |
| 38 | key_asts_right.push_back(ast); |
| 39 | |
| 40 | auto & right_key = key_names_right.back(); |
| 41 | if (renames.count(right_key)) |
| 42 | right_key = renames[right_key]; |
| 43 | } |
| 44 | |
| 45 | void AnalyzedJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast) |
| 46 | { |
| 47 | key_names_left.push_back(left_table_ast->getColumnName()); |
| 48 | key_names_right.push_back(right_table_ast->getAliasOrColumnName()); |
| 49 | |
| 50 | key_asts_left.push_back(left_table_ast); |
| 51 | key_asts_right.push_back(right_table_ast); |
| 52 | } |
| 53 | |
| 54 | /// @return how many times right key appears in ON section. |
| 55 | size_t AnalyzedJoin::rightKeyInclusion(const String & name) const |
| 56 | { |
| 57 | if (hasUsing()) |
| 58 | return 0; |
| 59 | |
| 60 | size_t count = 0; |
| 61 | for (const auto & key_name : key_names_right) |
| 62 | if (name == key_name) |
| 63 | ++count; |
| 64 | return count; |
| 65 | } |
| 66 | |
| 67 | void AnalyzedJoin::deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix) |
| 68 | { |
| 69 | NameSet joined_columns; |
| 70 | NamesAndTypesList dedup_columns; |
| 71 | |
| 72 | for (auto & column : columns_from_joined_table) |
| 73 | { |
| 74 | if (joined_columns.count(column.name)) |
| 75 | continue; |
| 76 | |
| 77 | joined_columns.insert(column.name); |
| 78 | |
| 79 | dedup_columns.push_back(column); |
| 80 | auto & inserted = dedup_columns.back(); |
| 81 | |
| 82 | if (left_table_columns.count(column.name)) |
| 83 | inserted.name = right_table_prefix + column.name; |
| 84 | |
| 85 | original_names[inserted.name] = column.name; |
| 86 | if (inserted.name != column.name) |
| 87 | renames[column.name] = inserted.name; |
| 88 | } |
| 89 | |
| 90 | columns_from_joined_table.swap(dedup_columns); |
| 91 | } |
| 92 | |
| 93 | NameSet AnalyzedJoin::getQualifiedColumnsSet() const |
| 94 | { |
| 95 | NameSet out; |
| 96 | for (const auto & names : original_names) |
| 97 | out.insert(names.first); |
| 98 | return out; |
| 99 | } |
| 100 | |
| 101 | NamesWithAliases AnalyzedJoin::getNamesWithAliases(const NameSet & required_columns) const |
| 102 | { |
| 103 | NamesWithAliases out; |
| 104 | for (const auto & column : required_columns) |
| 105 | { |
| 106 | auto it = original_names.find(column); |
| 107 | if (it != original_names.end()) |
| 108 | out.emplace_back(it->second, it->first); /// {original_name, name} |
| 109 | } |
| 110 | return out; |
| 111 | } |
| 112 | |
| 113 | ASTPtr AnalyzedJoin::leftKeysList() const |
| 114 | { |
| 115 | ASTPtr keys_list = std::make_shared<ASTExpressionList>(); |
| 116 | keys_list->children = key_asts_left; |
| 117 | return keys_list; |
| 118 | } |
| 119 | |
| 120 | ASTPtr AnalyzedJoin::rightKeysList() const |
| 121 | { |
| 122 | ASTPtr keys_list = std::make_shared<ASTExpressionList>(); |
| 123 | if (hasOn()) |
| 124 | keys_list->children = key_asts_right; |
| 125 | return keys_list; |
| 126 | } |
| 127 | |
| 128 | Names AnalyzedJoin::requiredJoinedNames() const |
| 129 | { |
| 130 | NameSet required_columns_set(key_names_right.begin(), key_names_right.end()); |
| 131 | for (const auto & joined_column : columns_added_by_join) |
| 132 | required_columns_set.insert(joined_column.name); |
| 133 | |
| 134 | return Names(required_columns_set.begin(), required_columns_set.end()); |
| 135 | } |
| 136 | |
| 137 | NameSet AnalyzedJoin::requiredRightKeys() const |
| 138 | { |
| 139 | NameSet required; |
| 140 | for (const auto & name : key_names_right) |
| 141 | for (const auto & column : columns_added_by_join) |
| 142 | if (name == column.name) |
| 143 | required.insert(name); |
| 144 | return required; |
| 145 | } |
| 146 | |
| 147 | NamesWithAliases AnalyzedJoin::getRequiredColumns(const Block & sample, const Names & action_required_columns) const |
| 148 | { |
| 149 | NameSet required_columns(action_required_columns.begin(), action_required_columns.end()); |
| 150 | |
| 151 | for (auto & column : requiredJoinedNames()) |
| 152 | if (!sample.has(column)) |
| 153 | required_columns.insert(column); |
| 154 | |
| 155 | return getNamesWithAliases(required_columns); |
| 156 | } |
| 157 | |
| 158 | void AnalyzedJoin::addJoinedColumn(const NameAndTypePair & joined_column) |
| 159 | { |
| 160 | if (join_use_nulls && isLeftOrFull(table_join.kind)) |
| 161 | { |
| 162 | auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type; |
| 163 | columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, std::move(type))); |
| 164 | } |
| 165 | else |
| 166 | columns_added_by_join.push_back(joined_column); |
| 167 | } |
| 168 | |
| 169 | void AnalyzedJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) const |
| 170 | { |
| 171 | bool right_or_full_join = isRightOrFull(table_join.kind); |
| 172 | bool left_or_full_join = isLeftOrFull(table_join.kind); |
| 173 | |
| 174 | for (auto & col : sample_block) |
| 175 | { |
| 176 | /// Materialize column. |
| 177 | /// Column is not empty if it is constant, but after Join all constants will be materialized. |
| 178 | /// So, we need remove constants from header. |
| 179 | if (col.column) |
| 180 | col.column = nullptr; |
| 181 | |
| 182 | bool make_nullable = join_use_nulls && right_or_full_join; |
| 183 | |
| 184 | if (make_nullable && col.type->canBeInsideNullable()) |
| 185 | col.type = makeNullable(col.type); |
| 186 | } |
| 187 | |
| 188 | for (const auto & col : columns_added_by_join) |
| 189 | { |
| 190 | auto res_type = col.type; |
| 191 | |
| 192 | bool make_nullable = join_use_nulls && left_or_full_join; |
| 193 | |
| 194 | if (make_nullable && res_type->canBeInsideNullable()) |
| 195 | res_type = makeNullable(res_type); |
| 196 | |
| 197 | sample_block.insert(ColumnWithTypeAndName(nullptr, res_type, col.name)); |
| 198 | } |
| 199 | } |
| 200 | |
| 201 | bool AnalyzedJoin::sameJoin(const AnalyzedJoin * x, const AnalyzedJoin * y) |
| 202 | { |
| 203 | if (!x && !y) |
| 204 | return true; |
| 205 | if (!x || !y) |
| 206 | return false; |
| 207 | |
| 208 | return x->table_join.kind == y->table_join.kind |
| 209 | && x->table_join.strictness == y->table_join.strictness |
| 210 | && x->key_names_left == y->key_names_left |
| 211 | && x->key_names_right == y->key_names_right |
| 212 | && x->columns_added_by_join == y->columns_added_by_join; |
| 213 | } |
| 214 | |
| 215 | JoinPtr makeJoin(std::shared_ptr<AnalyzedJoin> table_join, const Block & right_sample_block) |
| 216 | { |
| 217 | bool is_left_or_inner = isLeft(table_join->kind()) || isInner(table_join->kind()); |
| 218 | bool is_asof = (table_join->strictness() == ASTTableJoin::Strictness::Asof); |
| 219 | |
| 220 | if (table_join->partial_merge_join && !is_asof && is_left_or_inner) |
| 221 | return std::make_shared<MergeJoin>(table_join, right_sample_block); |
| 222 | return std::make_shared<Join>(table_join, right_sample_block); |
| 223 | } |
| 224 | |
| 225 | bool isMergeJoin(const JoinPtr & join) |
| 226 | { |
| 227 | if (join) |
| 228 | return typeid_cast<const MergeJoin *>(join.get()); |
| 229 | return false; |
| 230 | } |
| 231 | |
| 232 | } |
| 233 | |