1 | #include "duckdb/execution/operator/join/physical_comparison_join.hpp" |
2 | #include "duckdb/common/types/chunk_collection.hpp" |
3 | #include "duckdb/common/enum_util.hpp" |
4 | |
5 | namespace duckdb { |
6 | |
7 | PhysicalComparisonJoin::PhysicalComparisonJoin(LogicalOperator &op, PhysicalOperatorType type, |
8 | vector<JoinCondition> conditions_p, JoinType join_type, |
9 | idx_t estimated_cardinality) |
10 | : PhysicalJoin(op, type, join_type, estimated_cardinality) { |
11 | conditions.resize(new_size: conditions_p.size()); |
12 | // we reorder conditions so the ones with COMPARE_EQUAL occur first |
13 | idx_t equal_position = 0; |
14 | idx_t other_position = conditions_p.size() - 1; |
15 | for (idx_t i = 0; i < conditions_p.size(); i++) { |
16 | if (conditions_p[i].comparison == ExpressionType::COMPARE_EQUAL || |
17 | conditions_p[i].comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) { |
18 | // COMPARE_EQUAL and COMPARE_NOT_DISTINCT_FROM, move to the start |
19 | conditions[equal_position++] = std::move(conditions_p[i]); |
20 | } else { |
21 | // other expression, move to the end |
22 | conditions[other_position--] = std::move(conditions_p[i]); |
23 | } |
24 | } |
25 | } |
26 | |
27 | string PhysicalComparisonJoin::ParamsToString() const { |
28 | string = EnumUtil::ToString(value: join_type) + "\n" ; |
29 | for (auto &it : conditions) { |
30 | string op = ExpressionTypeToOperator(type: it.comparison); |
31 | extra_info += it.left->GetName() + " " + op + " " + it.right->GetName() + "\n" ; |
32 | } |
33 | extra_info += "\n[INFOSEPARATOR]\n" ; |
34 | extra_info += StringUtil::Format(fmt_str: "EC: %llu\n" , params: estimated_props->GetCardinality<idx_t>()); |
35 | extra_info += StringUtil::Format(fmt_str: "Cost: %llu" , params: (idx_t)estimated_props->GetCost()); |
36 | return extra_info; |
37 | } |
38 | |
39 | void PhysicalComparisonJoin::ConstructEmptyJoinResult(JoinType join_type, bool has_null, DataChunk &input, |
40 | DataChunk &result) { |
41 | // empty hash table, special case |
42 | if (join_type == JoinType::ANTI) { |
43 | // anti join with empty hash table, NOP join |
44 | // return the input |
45 | D_ASSERT(input.ColumnCount() == result.ColumnCount()); |
46 | result.Reference(chunk&: input); |
47 | } else if (join_type == JoinType::MARK) { |
48 | // MARK join with empty hash table |
49 | D_ASSERT(join_type == JoinType::MARK); |
50 | D_ASSERT(result.ColumnCount() == input.ColumnCount() + 1); |
51 | auto &result_vector = result.data.back(); |
52 | D_ASSERT(result_vector.GetType() == LogicalType::BOOLEAN); |
53 | // for every data vector, we just reference the child chunk |
54 | result.SetCardinality(input); |
55 | for (idx_t i = 0; i < input.ColumnCount(); i++) { |
56 | result.data[i].Reference(other&: input.data[i]); |
57 | } |
58 | // for the MARK vector: |
59 | // if the HT has no NULL values (i.e. empty result set), return a vector that has false for every input |
60 | // entry if the HT has NULL values (i.e. result set had values, but all were NULL), return a vector that |
61 | // has NULL for every input entry |
62 | if (!has_null) { |
63 | auto bool_result = FlatVector::GetData<bool>(vector&: result_vector); |
64 | for (idx_t i = 0; i < result.size(); i++) { |
65 | bool_result[i] = false; |
66 | } |
67 | } else { |
68 | FlatVector::Validity(vector&: result_vector).SetAllInvalid(result.size()); |
69 | } |
70 | } else if (join_type == JoinType::LEFT || join_type == JoinType::OUTER || join_type == JoinType::SINGLE) { |
71 | // LEFT/FULL OUTER/SINGLE join and build side is empty |
72 | // for the LHS we reference the data |
73 | result.SetCardinality(input.size()); |
74 | for (idx_t i = 0; i < input.ColumnCount(); i++) { |
75 | result.data[i].Reference(other&: input.data[i]); |
76 | } |
77 | // for the RHS |
78 | for (idx_t k = input.ColumnCount(); k < result.ColumnCount(); k++) { |
79 | result.data[k].SetVectorType(VectorType::CONSTANT_VECTOR); |
80 | ConstantVector::SetNull(vector&: result.data[k], is_null: true); |
81 | } |
82 | } |
83 | } |
84 | } // namespace duckdb |
85 | |