1#include "duckdb/execution/operator/join/physical_comparison_join.hpp"
2#include "duckdb/common/types/chunk_collection.hpp"
3#include "duckdb/common/enum_util.hpp"
4
5namespace duckdb {
6
7PhysicalComparisonJoin::PhysicalComparisonJoin(LogicalOperator &op, PhysicalOperatorType type,
8 vector<JoinCondition> conditions_p, JoinType join_type,
9 idx_t estimated_cardinality)
10 : PhysicalJoin(op, type, join_type, estimated_cardinality) {
11 conditions.resize(new_size: conditions_p.size());
12 // we reorder conditions so the ones with COMPARE_EQUAL occur first
13 idx_t equal_position = 0;
14 idx_t other_position = conditions_p.size() - 1;
15 for (idx_t i = 0; i < conditions_p.size(); i++) {
16 if (conditions_p[i].comparison == ExpressionType::COMPARE_EQUAL ||
17 conditions_p[i].comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
18 // COMPARE_EQUAL and COMPARE_NOT_DISTINCT_FROM, move to the start
19 conditions[equal_position++] = std::move(conditions_p[i]);
20 } else {
21 // other expression, move to the end
22 conditions[other_position--] = std::move(conditions_p[i]);
23 }
24 }
25}
26
27string PhysicalComparisonJoin::ParamsToString() const {
28 string extra_info = EnumUtil::ToString(value: join_type) + "\n";
29 for (auto &it : conditions) {
30 string op = ExpressionTypeToOperator(type: it.comparison);
31 extra_info += it.left->GetName() + " " + op + " " + it.right->GetName() + "\n";
32 }
33 extra_info += "\n[INFOSEPARATOR]\n";
34 extra_info += StringUtil::Format(fmt_str: "EC: %llu\n", params: estimated_props->GetCardinality<idx_t>());
35 extra_info += StringUtil::Format(fmt_str: "Cost: %llu", params: (idx_t)estimated_props->GetCost());
36 return extra_info;
37}
38
39void PhysicalComparisonJoin::ConstructEmptyJoinResult(JoinType join_type, bool has_null, DataChunk &input,
40 DataChunk &result) {
41 // empty hash table, special case
42 if (join_type == JoinType::ANTI) {
43 // anti join with empty hash table, NOP join
44 // return the input
45 D_ASSERT(input.ColumnCount() == result.ColumnCount());
46 result.Reference(chunk&: input);
47 } else if (join_type == JoinType::MARK) {
48 // MARK join with empty hash table
49 D_ASSERT(join_type == JoinType::MARK);
50 D_ASSERT(result.ColumnCount() == input.ColumnCount() + 1);
51 auto &result_vector = result.data.back();
52 D_ASSERT(result_vector.GetType() == LogicalType::BOOLEAN);
53 // for every data vector, we just reference the child chunk
54 result.SetCardinality(input);
55 for (idx_t i = 0; i < input.ColumnCount(); i++) {
56 result.data[i].Reference(other&: input.data[i]);
57 }
58 // for the MARK vector:
59 // if the HT has no NULL values (i.e. empty result set), return a vector that has false for every input
60 // entry if the HT has NULL values (i.e. result set had values, but all were NULL), return a vector that
61 // has NULL for every input entry
62 if (!has_null) {
63 auto bool_result = FlatVector::GetData<bool>(vector&: result_vector);
64 for (idx_t i = 0; i < result.size(); i++) {
65 bool_result[i] = false;
66 }
67 } else {
68 FlatVector::Validity(vector&: result_vector).SetAllInvalid(result.size());
69 }
70 } else if (join_type == JoinType::LEFT || join_type == JoinType::OUTER || join_type == JoinType::SINGLE) {
71 // LEFT/FULL OUTER/SINGLE join and build side is empty
72 // for the LHS we reference the data
73 result.SetCardinality(input.size());
74 for (idx_t i = 0; i < input.ColumnCount(); i++) {
75 result.data[i].Reference(other&: input.data[i]);
76 }
77 // for the RHS
78 for (idx_t k = input.ColumnCount(); k < result.ColumnCount(); k++) {
79 result.data[k].SetVectorType(VectorType::CONSTANT_VECTOR);
80 ConstantVector::SetNull(vector&: result.data[k], is_null: true);
81 }
82 }
83}
84} // namespace duckdb
85