1 | #include "duckdb/common/operator/comparison_operators.hpp" |
2 | #include "duckdb/execution/nested_loop_join.hpp" |
3 | |
4 | using namespace duckdb; |
5 | using namespace std; |
6 | |
7 | struct InitialNestedLoopJoin { |
8 | template <class T, class OP> |
9 | static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos, |
10 | SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) { |
11 | // initialize phase of nested loop join |
12 | // fill lvector and rvector with matches from the base vectors |
13 | VectorData left_data, right_data; |
14 | left.Orrify(left_size, left_data); |
15 | right.Orrify(right_size, right_data); |
16 | |
17 | auto ldata = (T *)left_data.data; |
18 | auto rdata = (T *)right_data.data; |
19 | idx_t result_count = 0; |
20 | for (; rpos < right_size; rpos++) { |
21 | idx_t right_position = right_data.sel->get_index(rpos); |
22 | if ((*right_data.nullmask)[right_position]) { |
23 | continue; |
24 | } |
25 | for (; lpos < left_size; lpos++) { |
26 | if (result_count == STANDARD_VECTOR_SIZE) { |
27 | // out of space! |
28 | return result_count; |
29 | } |
30 | idx_t left_position = left_data.sel->get_index(lpos); |
31 | if ((*left_data.nullmask)[left_position]) { |
32 | continue; |
33 | } |
34 | if (OP::Operation(ldata[left_position], rdata[right_position])) { |
35 | // emit tuple |
36 | lvector.set_index(result_count, lpos); |
37 | rvector.set_index(result_count, rpos); |
38 | result_count++; |
39 | } |
40 | } |
41 | lpos = 0; |
42 | } |
43 | return result_count; |
44 | } |
45 | }; |
46 | |
47 | struct RefineNestedLoopJoin { |
48 | template <class T, class OP> |
49 | static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos, |
50 | SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) { |
51 | VectorData left_data, right_data; |
52 | left.Orrify(left_size, left_data); |
53 | right.Orrify(right_size, right_data); |
54 | |
55 | // refine phase of the nested loop join |
56 | // refine lvector and rvector based on matches of subsequent conditions (in case there are multiple conditions |
57 | // in the join) |
58 | assert(current_match_count > 0); |
59 | auto ldata = (T *)left_data.data; |
60 | auto rdata = (T *)right_data.data; |
61 | idx_t result_count = 0; |
62 | for (idx_t i = 0; i < current_match_count; i++) { |
63 | auto lidx = lvector.get_index(i); |
64 | auto ridx = rvector.get_index(i); |
65 | auto left_idx = left_data.sel->get_index(lidx); |
66 | auto right_idx = right_data.sel->get_index(ridx); |
67 | // null values should be filtered out before |
68 | if ((*left_data.nullmask)[left_idx] || (*right_data.nullmask)[right_idx]) { |
69 | continue; |
70 | } |
71 | if (OP::Operation(ldata[left_idx], rdata[right_idx])) { |
72 | lvector.set_index(result_count, lidx); |
73 | rvector.set_index(result_count, ridx); |
74 | result_count++; |
75 | } |
76 | } |
77 | return result_count; |
78 | } |
79 | }; |
80 | |
81 | template <class NLTYPE, class OP> |
82 | static idx_t nested_loop_join_inner_operator(Vector &left, Vector &right, idx_t left_size, idx_t right_size, |
83 | idx_t &lpos, idx_t &rpos, SelectionVector &lvector, |
84 | SelectionVector &rvector, idx_t current_match_count) { |
85 | switch (left.type) { |
86 | case TypeId::BOOL: |
87 | case TypeId::INT8: |
88 | return NLTYPE::template Operation<int8_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
89 | current_match_count); |
90 | case TypeId::INT16: |
91 | return NLTYPE::template Operation<int16_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
92 | current_match_count); |
93 | case TypeId::INT32: |
94 | return NLTYPE::template Operation<int32_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
95 | current_match_count); |
96 | case TypeId::INT64: |
97 | return NLTYPE::template Operation<int64_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
98 | current_match_count); |
99 | case TypeId::FLOAT: |
100 | return NLTYPE::template Operation<float, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
101 | current_match_count); |
102 | case TypeId::DOUBLE: |
103 | return NLTYPE::template Operation<double, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
104 | current_match_count); |
105 | case TypeId::VARCHAR: |
106 | return NLTYPE::template Operation<string_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, |
107 | rvector, current_match_count); |
108 | default: |
109 | throw NotImplementedException("Unimplemented type for join!" ); |
110 | } |
111 | } |
112 | |
113 | template <class NLTYPE> |
114 | idx_t nested_loop_join_inner(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos, |
115 | SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count, |
116 | ExpressionType comparison_type) { |
117 | assert(left.type == right.type); |
118 | switch (comparison_type) { |
119 | case ExpressionType::COMPARE_EQUAL: |
120 | return nested_loop_join_inner_operator<NLTYPE, duckdb::Equals>(left, right, left_size, right_size, lpos, rpos, |
121 | lvector, rvector, current_match_count); |
122 | case ExpressionType::COMPARE_NOTEQUAL: |
123 | return nested_loop_join_inner_operator<NLTYPE, duckdb::NotEquals>(left, right, left_size, right_size, lpos, |
124 | rpos, lvector, rvector, current_match_count); |
125 | case ExpressionType::COMPARE_LESSTHAN: |
126 | return nested_loop_join_inner_operator<NLTYPE, duckdb::LessThan>(left, right, left_size, right_size, lpos, rpos, |
127 | lvector, rvector, current_match_count); |
128 | case ExpressionType::COMPARE_GREATERTHAN: |
129 | return nested_loop_join_inner_operator<NLTYPE, duckdb::GreaterThan>( |
130 | left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); |
131 | case ExpressionType::COMPARE_LESSTHANOREQUALTO: |
132 | return nested_loop_join_inner_operator<NLTYPE, duckdb::LessThanEquals>( |
133 | left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); |
134 | case ExpressionType::COMPARE_GREATERTHANOREQUALTO: |
135 | return nested_loop_join_inner_operator<NLTYPE, duckdb::GreaterThanEquals>( |
136 | left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); |
137 | default: |
138 | throw NotImplementedException("Unimplemented comparison type for join!" ); |
139 | } |
140 | } |
141 | |
142 | idx_t NestedLoopJoinInner::Perform(idx_t &lpos, idx_t &rpos, DataChunk &left_conditions, DataChunk &right_conditions, |
143 | SelectionVector &lvector, SelectionVector &rvector, |
144 | vector<JoinCondition> &conditions) { |
145 | assert(left_conditions.column_count() == right_conditions.column_count()); |
146 | if (lpos >= left_conditions.size() || rpos >= right_conditions.size()) { |
147 | return 0; |
148 | } |
149 | // for the first condition, lvector and rvector are not set yet |
150 | // we initialize them using the InitialNestedLoopJoin |
151 | idx_t match_count = nested_loop_join_inner<InitialNestedLoopJoin>( |
152 | left_conditions.data[0], right_conditions.data[0], left_conditions.size(), right_conditions.size(), lpos, rpos, |
153 | lvector, rvector, 0, conditions[0].comparison); |
154 | // now resolve the rest of the conditions |
155 | for (idx_t i = 1; i < conditions.size(); i++) { |
156 | // check if we have run out of tuples to compare |
157 | if (match_count == 0) { |
158 | return 0; |
159 | } |
160 | // if not, get the vectors to compare |
161 | Vector &l = left_conditions.data[i]; |
162 | Vector &r = right_conditions.data[i]; |
163 | // then we refine the currently obtained results using the RefineNestedLoopJoin |
164 | match_count = |
165 | nested_loop_join_inner<RefineNestedLoopJoin>(l, r, left_conditions.size(), right_conditions.size(), lpos, |
166 | rpos, lvector, rvector, match_count, conditions[i].comparison); |
167 | } |
168 | return match_count; |
169 | } |
170 | |