1#include "duckdb/common/operator/comparison_operators.hpp"
2#include "duckdb/execution/nested_loop_join.hpp"
3
4using namespace duckdb;
5using namespace std;
6
7struct InitialNestedLoopJoin {
8 template <class T, class OP>
9 static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos,
10 SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) {
11 // initialize phase of nested loop join
12 // fill lvector and rvector with matches from the base vectors
13 VectorData left_data, right_data;
14 left.Orrify(left_size, left_data);
15 right.Orrify(right_size, right_data);
16
17 auto ldata = (T *)left_data.data;
18 auto rdata = (T *)right_data.data;
19 idx_t result_count = 0;
20 for (; rpos < right_size; rpos++) {
21 idx_t right_position = right_data.sel->get_index(rpos);
22 if ((*right_data.nullmask)[right_position]) {
23 continue;
24 }
25 for (; lpos < left_size; lpos++) {
26 if (result_count == STANDARD_VECTOR_SIZE) {
27 // out of space!
28 return result_count;
29 }
30 idx_t left_position = left_data.sel->get_index(lpos);
31 if ((*left_data.nullmask)[left_position]) {
32 continue;
33 }
34 if (OP::Operation(ldata[left_position], rdata[right_position])) {
35 // emit tuple
36 lvector.set_index(result_count, lpos);
37 rvector.set_index(result_count, rpos);
38 result_count++;
39 }
40 }
41 lpos = 0;
42 }
43 return result_count;
44 }
45};
46
47struct RefineNestedLoopJoin {
48 template <class T, class OP>
49 static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos,
50 SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) {
51 VectorData left_data, right_data;
52 left.Orrify(left_size, left_data);
53 right.Orrify(right_size, right_data);
54
55 // refine phase of the nested loop join
56 // refine lvector and rvector based on matches of subsequent conditions (in case there are multiple conditions
57 // in the join)
58 assert(current_match_count > 0);
59 auto ldata = (T *)left_data.data;
60 auto rdata = (T *)right_data.data;
61 idx_t result_count = 0;
62 for (idx_t i = 0; i < current_match_count; i++) {
63 auto lidx = lvector.get_index(i);
64 auto ridx = rvector.get_index(i);
65 auto left_idx = left_data.sel->get_index(lidx);
66 auto right_idx = right_data.sel->get_index(ridx);
67 // null values should be filtered out before
68 if ((*left_data.nullmask)[left_idx] || (*right_data.nullmask)[right_idx]) {
69 continue;
70 }
71 if (OP::Operation(ldata[left_idx], rdata[right_idx])) {
72 lvector.set_index(result_count, lidx);
73 rvector.set_index(result_count, ridx);
74 result_count++;
75 }
76 }
77 return result_count;
78 }
79};
80
81template <class NLTYPE, class OP>
82static idx_t nested_loop_join_inner_operator(Vector &left, Vector &right, idx_t left_size, idx_t right_size,
83 idx_t &lpos, idx_t &rpos, SelectionVector &lvector,
84 SelectionVector &rvector, idx_t current_match_count) {
85 switch (left.type) {
86 case TypeId::BOOL:
87 case TypeId::INT8:
88 return NLTYPE::template Operation<int8_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector,
89 current_match_count);
90 case TypeId::INT16:
91 return NLTYPE::template Operation<int16_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector,
92 current_match_count);
93 case TypeId::INT32:
94 return NLTYPE::template Operation<int32_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector,
95 current_match_count);
96 case TypeId::INT64:
97 return NLTYPE::template Operation<int64_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector,
98 current_match_count);
99 case TypeId::FLOAT:
100 return NLTYPE::template Operation<float, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector,
101 current_match_count);
102 case TypeId::DOUBLE:
103 return NLTYPE::template Operation<double, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector,
104 current_match_count);
105 case TypeId::VARCHAR:
106 return NLTYPE::template Operation<string_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector,
107 rvector, current_match_count);
108 default:
109 throw NotImplementedException("Unimplemented type for join!");
110 }
111}
112
113template <class NLTYPE>
114idx_t nested_loop_join_inner(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos,
115 SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count,
116 ExpressionType comparison_type) {
117 assert(left.type == right.type);
118 switch (comparison_type) {
119 case ExpressionType::COMPARE_EQUAL:
120 return nested_loop_join_inner_operator<NLTYPE, duckdb::Equals>(left, right, left_size, right_size, lpos, rpos,
121 lvector, rvector, current_match_count);
122 case ExpressionType::COMPARE_NOTEQUAL:
123 return nested_loop_join_inner_operator<NLTYPE, duckdb::NotEquals>(left, right, left_size, right_size, lpos,
124 rpos, lvector, rvector, current_match_count);
125 case ExpressionType::COMPARE_LESSTHAN:
126 return nested_loop_join_inner_operator<NLTYPE, duckdb::LessThan>(left, right, left_size, right_size, lpos, rpos,
127 lvector, rvector, current_match_count);
128 case ExpressionType::COMPARE_GREATERTHAN:
129 return nested_loop_join_inner_operator<NLTYPE, duckdb::GreaterThan>(
130 left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
131 case ExpressionType::COMPARE_LESSTHANOREQUALTO:
132 return nested_loop_join_inner_operator<NLTYPE, duckdb::LessThanEquals>(
133 left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
134 case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
135 return nested_loop_join_inner_operator<NLTYPE, duckdb::GreaterThanEquals>(
136 left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count);
137 default:
138 throw NotImplementedException("Unimplemented comparison type for join!");
139 }
140}
141
142idx_t NestedLoopJoinInner::Perform(idx_t &lpos, idx_t &rpos, DataChunk &left_conditions, DataChunk &right_conditions,
143 SelectionVector &lvector, SelectionVector &rvector,
144 vector<JoinCondition> &conditions) {
145 assert(left_conditions.column_count() == right_conditions.column_count());
146 if (lpos >= left_conditions.size() || rpos >= right_conditions.size()) {
147 return 0;
148 }
149 // for the first condition, lvector and rvector are not set yet
150 // we initialize them using the InitialNestedLoopJoin
151 idx_t match_count = nested_loop_join_inner<InitialNestedLoopJoin>(
152 left_conditions.data[0], right_conditions.data[0], left_conditions.size(), right_conditions.size(), lpos, rpos,
153 lvector, rvector, 0, conditions[0].comparison);
154 // now resolve the rest of the conditions
155 for (idx_t i = 1; i < conditions.size(); i++) {
156 // check if we have run out of tuples to compare
157 if (match_count == 0) {
158 return 0;
159 }
160 // if not, get the vectors to compare
161 Vector &l = left_conditions.data[i];
162 Vector &r = right_conditions.data[i];
163 // then we refine the currently obtained results using the RefineNestedLoopJoin
164 match_count =
165 nested_loop_join_inner<RefineNestedLoopJoin>(l, r, left_conditions.size(), right_conditions.size(), lpos,
166 rpos, lvector, rvector, match_count, conditions[i].comparison);
167 }
168 return match_count;
169}
170