| 1 | #include "duckdb/execution/operator/join/physical_comparison_join.hpp" |
| 2 | #include "duckdb/common/types/chunk_collection.hpp" |
| 3 | #include "duckdb/common/enum_util.hpp" |
| 4 | |
| 5 | namespace duckdb { |
| 6 | |
| 7 | PhysicalComparisonJoin::PhysicalComparisonJoin(LogicalOperator &op, PhysicalOperatorType type, |
| 8 | vector<JoinCondition> conditions_p, JoinType join_type, |
| 9 | idx_t estimated_cardinality) |
| 10 | : PhysicalJoin(op, type, join_type, estimated_cardinality) { |
| 11 | conditions.resize(new_size: conditions_p.size()); |
| 12 | // we reorder conditions so the ones with COMPARE_EQUAL occur first |
| 13 | idx_t equal_position = 0; |
| 14 | idx_t other_position = conditions_p.size() - 1; |
| 15 | for (idx_t i = 0; i < conditions_p.size(); i++) { |
| 16 | if (conditions_p[i].comparison == ExpressionType::COMPARE_EQUAL || |
| 17 | conditions_p[i].comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) { |
| 18 | // COMPARE_EQUAL and COMPARE_NOT_DISTINCT_FROM, move to the start |
| 19 | conditions[equal_position++] = std::move(conditions_p[i]); |
| 20 | } else { |
| 21 | // other expression, move to the end |
| 22 | conditions[other_position--] = std::move(conditions_p[i]); |
| 23 | } |
| 24 | } |
| 25 | } |
| 26 | |
| 27 | string PhysicalComparisonJoin::ParamsToString() const { |
| 28 | string = EnumUtil::ToString(value: join_type) + "\n" ; |
| 29 | for (auto &it : conditions) { |
| 30 | string op = ExpressionTypeToOperator(type: it.comparison); |
| 31 | extra_info += it.left->GetName() + " " + op + " " + it.right->GetName() + "\n" ; |
| 32 | } |
| 33 | extra_info += "\n[INFOSEPARATOR]\n" ; |
| 34 | extra_info += StringUtil::Format(fmt_str: "EC: %llu\n" , params: estimated_props->GetCardinality<idx_t>()); |
| 35 | extra_info += StringUtil::Format(fmt_str: "Cost: %llu" , params: (idx_t)estimated_props->GetCost()); |
| 36 | return extra_info; |
| 37 | } |
| 38 | |
| 39 | void PhysicalComparisonJoin::ConstructEmptyJoinResult(JoinType join_type, bool has_null, DataChunk &input, |
| 40 | DataChunk &result) { |
| 41 | // empty hash table, special case |
| 42 | if (join_type == JoinType::ANTI) { |
| 43 | // anti join with empty hash table, NOP join |
| 44 | // return the input |
| 45 | D_ASSERT(input.ColumnCount() == result.ColumnCount()); |
| 46 | result.Reference(chunk&: input); |
| 47 | } else if (join_type == JoinType::MARK) { |
| 48 | // MARK join with empty hash table |
| 49 | D_ASSERT(join_type == JoinType::MARK); |
| 50 | D_ASSERT(result.ColumnCount() == input.ColumnCount() + 1); |
| 51 | auto &result_vector = result.data.back(); |
| 52 | D_ASSERT(result_vector.GetType() == LogicalType::BOOLEAN); |
| 53 | // for every data vector, we just reference the child chunk |
| 54 | result.SetCardinality(input); |
| 55 | for (idx_t i = 0; i < input.ColumnCount(); i++) { |
| 56 | result.data[i].Reference(other&: input.data[i]); |
| 57 | } |
| 58 | // for the MARK vector: |
| 59 | // if the HT has no NULL values (i.e. empty result set), return a vector that has false for every input |
| 60 | // entry if the HT has NULL values (i.e. result set had values, but all were NULL), return a vector that |
| 61 | // has NULL for every input entry |
| 62 | if (!has_null) { |
| 63 | auto bool_result = FlatVector::GetData<bool>(vector&: result_vector); |
| 64 | for (idx_t i = 0; i < result.size(); i++) { |
| 65 | bool_result[i] = false; |
| 66 | } |
| 67 | } else { |
| 68 | FlatVector::Validity(vector&: result_vector).SetAllInvalid(result.size()); |
| 69 | } |
| 70 | } else if (join_type == JoinType::LEFT || join_type == JoinType::OUTER || join_type == JoinType::SINGLE) { |
| 71 | // LEFT/FULL OUTER/SINGLE join and build side is empty |
| 72 | // for the LHS we reference the data |
| 73 | result.SetCardinality(input.size()); |
| 74 | for (idx_t i = 0; i < input.ColumnCount(); i++) { |
| 75 | result.data[i].Reference(other&: input.data[i]); |
| 76 | } |
| 77 | // for the RHS |
| 78 | for (idx_t k = input.ColumnCount(); k < result.ColumnCount(); k++) { |
| 79 | result.data[k].SetVectorType(VectorType::CONSTANT_VECTOR); |
| 80 | ConstantVector::SetNull(vector&: result.data[k], is_null: true); |
| 81 | } |
| 82 | } |
| 83 | } |
| 84 | } // namespace duckdb |
| 85 | |