| 1 | #include "duckdb/common/operator/comparison_operators.hpp" |
| 2 | #include "duckdb/execution/nested_loop_join.hpp" |
| 3 | |
| 4 | using namespace duckdb; |
| 5 | using namespace std; |
| 6 | |
| 7 | struct InitialNestedLoopJoin { |
| 8 | template <class T, class OP> |
| 9 | static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos, |
| 10 | SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) { |
| 11 | // initialize phase of nested loop join |
| 12 | // fill lvector and rvector with matches from the base vectors |
| 13 | VectorData left_data, right_data; |
| 14 | left.Orrify(left_size, left_data); |
| 15 | right.Orrify(right_size, right_data); |
| 16 | |
| 17 | auto ldata = (T *)left_data.data; |
| 18 | auto rdata = (T *)right_data.data; |
| 19 | idx_t result_count = 0; |
| 20 | for (; rpos < right_size; rpos++) { |
| 21 | idx_t right_position = right_data.sel->get_index(rpos); |
| 22 | if ((*right_data.nullmask)[right_position]) { |
| 23 | continue; |
| 24 | } |
| 25 | for (; lpos < left_size; lpos++) { |
| 26 | if (result_count == STANDARD_VECTOR_SIZE) { |
| 27 | // out of space! |
| 28 | return result_count; |
| 29 | } |
| 30 | idx_t left_position = left_data.sel->get_index(lpos); |
| 31 | if ((*left_data.nullmask)[left_position]) { |
| 32 | continue; |
| 33 | } |
| 34 | if (OP::Operation(ldata[left_position], rdata[right_position])) { |
| 35 | // emit tuple |
| 36 | lvector.set_index(result_count, lpos); |
| 37 | rvector.set_index(result_count, rpos); |
| 38 | result_count++; |
| 39 | } |
| 40 | } |
| 41 | lpos = 0; |
| 42 | } |
| 43 | return result_count; |
| 44 | } |
| 45 | }; |
| 46 | |
| 47 | struct RefineNestedLoopJoin { |
| 48 | template <class T, class OP> |
| 49 | static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos, |
| 50 | SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) { |
| 51 | VectorData left_data, right_data; |
| 52 | left.Orrify(left_size, left_data); |
| 53 | right.Orrify(right_size, right_data); |
| 54 | |
| 55 | // refine phase of the nested loop join |
| 56 | // refine lvector and rvector based on matches of subsequent conditions (in case there are multiple conditions |
| 57 | // in the join) |
| 58 | assert(current_match_count > 0); |
| 59 | auto ldata = (T *)left_data.data; |
| 60 | auto rdata = (T *)right_data.data; |
| 61 | idx_t result_count = 0; |
| 62 | for (idx_t i = 0; i < current_match_count; i++) { |
| 63 | auto lidx = lvector.get_index(i); |
| 64 | auto ridx = rvector.get_index(i); |
| 65 | auto left_idx = left_data.sel->get_index(lidx); |
| 66 | auto right_idx = right_data.sel->get_index(ridx); |
| 67 | // null values should be filtered out before |
| 68 | if ((*left_data.nullmask)[left_idx] || (*right_data.nullmask)[right_idx]) { |
| 69 | continue; |
| 70 | } |
| 71 | if (OP::Operation(ldata[left_idx], rdata[right_idx])) { |
| 72 | lvector.set_index(result_count, lidx); |
| 73 | rvector.set_index(result_count, ridx); |
| 74 | result_count++; |
| 75 | } |
| 76 | } |
| 77 | return result_count; |
| 78 | } |
| 79 | }; |
| 80 | |
| 81 | template <class NLTYPE, class OP> |
| 82 | static idx_t nested_loop_join_inner_operator(Vector &left, Vector &right, idx_t left_size, idx_t right_size, |
| 83 | idx_t &lpos, idx_t &rpos, SelectionVector &lvector, |
| 84 | SelectionVector &rvector, idx_t current_match_count) { |
| 85 | switch (left.type) { |
| 86 | case TypeId::BOOL: |
| 87 | case TypeId::INT8: |
| 88 | return NLTYPE::template Operation<int8_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
| 89 | current_match_count); |
| 90 | case TypeId::INT16: |
| 91 | return NLTYPE::template Operation<int16_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
| 92 | current_match_count); |
| 93 | case TypeId::INT32: |
| 94 | return NLTYPE::template Operation<int32_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
| 95 | current_match_count); |
| 96 | case TypeId::INT64: |
| 97 | return NLTYPE::template Operation<int64_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
| 98 | current_match_count); |
| 99 | case TypeId::FLOAT: |
| 100 | return NLTYPE::template Operation<float, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
| 101 | current_match_count); |
| 102 | case TypeId::DOUBLE: |
| 103 | return NLTYPE::template Operation<double, OP>(left, right, left_size, right_size, lpos, rpos, lvector, rvector, |
| 104 | current_match_count); |
| 105 | case TypeId::VARCHAR: |
| 106 | return NLTYPE::template Operation<string_t, OP>(left, right, left_size, right_size, lpos, rpos, lvector, |
| 107 | rvector, current_match_count); |
| 108 | default: |
| 109 | throw NotImplementedException("Unimplemented type for join!" ); |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | template <class NLTYPE> |
| 114 | idx_t nested_loop_join_inner(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos, |
| 115 | SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count, |
| 116 | ExpressionType comparison_type) { |
| 117 | assert(left.type == right.type); |
| 118 | switch (comparison_type) { |
| 119 | case ExpressionType::COMPARE_EQUAL: |
| 120 | return nested_loop_join_inner_operator<NLTYPE, duckdb::Equals>(left, right, left_size, right_size, lpos, rpos, |
| 121 | lvector, rvector, current_match_count); |
| 122 | case ExpressionType::COMPARE_NOTEQUAL: |
| 123 | return nested_loop_join_inner_operator<NLTYPE, duckdb::NotEquals>(left, right, left_size, right_size, lpos, |
| 124 | rpos, lvector, rvector, current_match_count); |
| 125 | case ExpressionType::COMPARE_LESSTHAN: |
| 126 | return nested_loop_join_inner_operator<NLTYPE, duckdb::LessThan>(left, right, left_size, right_size, lpos, rpos, |
| 127 | lvector, rvector, current_match_count); |
| 128 | case ExpressionType::COMPARE_GREATERTHAN: |
| 129 | return nested_loop_join_inner_operator<NLTYPE, duckdb::GreaterThan>( |
| 130 | left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); |
| 131 | case ExpressionType::COMPARE_LESSTHANOREQUALTO: |
| 132 | return nested_loop_join_inner_operator<NLTYPE, duckdb::LessThanEquals>( |
| 133 | left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); |
| 134 | case ExpressionType::COMPARE_GREATERTHANOREQUALTO: |
| 135 | return nested_loop_join_inner_operator<NLTYPE, duckdb::GreaterThanEquals>( |
| 136 | left, right, left_size, right_size, lpos, rpos, lvector, rvector, current_match_count); |
| 137 | default: |
| 138 | throw NotImplementedException("Unimplemented comparison type for join!" ); |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | idx_t NestedLoopJoinInner::Perform(idx_t &lpos, idx_t &rpos, DataChunk &left_conditions, DataChunk &right_conditions, |
| 143 | SelectionVector &lvector, SelectionVector &rvector, |
| 144 | vector<JoinCondition> &conditions) { |
| 145 | assert(left_conditions.column_count() == right_conditions.column_count()); |
| 146 | if (lpos >= left_conditions.size() || rpos >= right_conditions.size()) { |
| 147 | return 0; |
| 148 | } |
| 149 | // for the first condition, lvector and rvector are not set yet |
| 150 | // we initialize them using the InitialNestedLoopJoin |
| 151 | idx_t match_count = nested_loop_join_inner<InitialNestedLoopJoin>( |
| 152 | left_conditions.data[0], right_conditions.data[0], left_conditions.size(), right_conditions.size(), lpos, rpos, |
| 153 | lvector, rvector, 0, conditions[0].comparison); |
| 154 | // now resolve the rest of the conditions |
| 155 | for (idx_t i = 1; i < conditions.size(); i++) { |
| 156 | // check if we have run out of tuples to compare |
| 157 | if (match_count == 0) { |
| 158 | return 0; |
| 159 | } |
| 160 | // if not, get the vectors to compare |
| 161 | Vector &l = left_conditions.data[i]; |
| 162 | Vector &r = right_conditions.data[i]; |
| 163 | // then we refine the currently obtained results using the RefineNestedLoopJoin |
| 164 | match_count = |
| 165 | nested_loop_join_inner<RefineNestedLoopJoin>(l, r, left_conditions.size(), right_conditions.size(), lpos, |
| 166 | rpos, lvector, rvector, match_count, conditions[i].comparison); |
| 167 | } |
| 168 | return match_count; |
| 169 | } |
| 170 | |