| 1 | #include "duckdb/common/operator/comparison_operators.hpp" | 
|---|
| 2 | #include "duckdb/common/vector_operations/vector_operations.hpp" | 
|---|
| 3 | #include "duckdb/execution/merge_join.hpp" | 
|---|
| 4 | #include "duckdb/parser/expression/comparison_expression.hpp" | 
|---|
| 5 |  | 
|---|
| 6 | using namespace duckdb; | 
|---|
| 7 | using namespace std; | 
|---|
| 8 |  | 
|---|
| 9 | template <class T> idx_t MergeJoinMark::Equality::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { | 
|---|
| 10 | throw NotImplementedException( "Merge Join with Equality not implemented"); | 
|---|
| 11 | } | 
|---|
| 12 |  | 
|---|
| 13 | template <class T, class OP> static idx_t merge_join_mark_gt(ScalarMergeInfo &l, ChunkMergeInfo &r) { | 
|---|
| 14 | auto ldata = (T *)l.order.vdata.data; | 
|---|
| 15 | auto &lorder = l.order.order; | 
|---|
| 16 | l.pos = l.order.count; | 
|---|
| 17 | for (idx_t chunk_idx = 0; chunk_idx < r.order_info.size(); chunk_idx++) { | 
|---|
| 18 | // we only care about the SMALLEST value in each of the RHS | 
|---|
| 19 | // because we want to figure out if they are greater than [or equal] to ANY value | 
|---|
| 20 | // get the smallest value from the RHS | 
|---|
| 21 | auto &rorder = r.order_info[chunk_idx]; | 
|---|
| 22 | auto rdata = (T *)rorder.vdata.data; | 
|---|
| 23 | auto min_r_value = rdata[rorder.vdata.sel->get_index(rorder.order.get_index(0))]; | 
|---|
| 24 | // now we start from the current lpos value and check if we found a new value that is [>= OR >] the min RHS | 
|---|
| 25 | // value | 
|---|
| 26 | while (true) { | 
|---|
| 27 | auto lidx = lorder.get_index(l.pos - 1); | 
|---|
| 28 | auto dlidx = l.order.vdata.sel->get_index(lidx); | 
|---|
| 29 | if (OP::Operation(ldata[dlidx], min_r_value)) { | 
|---|
| 30 | // found a match for lpos, set it in the found_match vector | 
|---|
| 31 | r.found_match[lidx] = true; | 
|---|
| 32 | l.pos--; | 
|---|
| 33 | if (l.pos == 0) { | 
|---|
| 34 | // early out: we exhausted the entire LHS and they all match | 
|---|
| 35 | return 0; | 
|---|
| 36 | } | 
|---|
| 37 | } else { | 
|---|
| 38 | // we found no match: any subsequent value from the LHS we scan now will be smaller and thus also not | 
|---|
| 39 | // match move to the next RHS chunk | 
|---|
| 40 | break; | 
|---|
| 41 | } | 
|---|
| 42 | } | 
|---|
| 43 | } | 
|---|
| 44 | return 0; | 
|---|
| 45 | } | 
|---|
| 46 | template <class T> idx_t MergeJoinMark::GreaterThan::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { | 
|---|
| 47 | return merge_join_mark_gt<T, duckdb::GreaterThan>(l, r); | 
|---|
| 48 | } | 
|---|
| 49 |  | 
|---|
| 50 | template <class T> idx_t MergeJoinMark::GreaterThanEquals::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { | 
|---|
| 51 | return merge_join_mark_gt<T, duckdb::GreaterThanEquals>(l, r); | 
|---|
| 52 | } | 
|---|
| 53 |  | 
|---|
| 54 | template <class T, class OP> static idx_t merge_join_mark_lt(ScalarMergeInfo &l, ChunkMergeInfo &r) { | 
|---|
| 55 | auto ldata = (T *)l.order.vdata.data; | 
|---|
| 56 | auto &lorder = l.order.order; | 
|---|
| 57 | l.pos = 0; | 
|---|
| 58 | for (idx_t chunk_idx = 0; chunk_idx < r.order_info.size(); chunk_idx++) { | 
|---|
| 59 | // we only care about the BIGGEST value in each of the RHS | 
|---|
| 60 | // because we want to figure out if they are less than [or equal] to ANY value | 
|---|
| 61 | // get the biggest value from the RHS | 
|---|
| 62 | auto &rorder = r.order_info[chunk_idx]; | 
|---|
| 63 | auto rdata = (T *)rorder.vdata.data; | 
|---|
| 64 | auto max_r_value = rdata[rorder.vdata.sel->get_index(rorder.order.get_index(rorder.count - 1))]; | 
|---|
| 65 | // now we start from the current lpos value and check if we found a new value that is [<= OR <] the max RHS | 
|---|
| 66 | // value | 
|---|
| 67 | while (true) { | 
|---|
| 68 | auto lidx = lorder.get_index(l.pos); | 
|---|
| 69 | auto dlidx = l.order.vdata.sel->get_index(lidx); | 
|---|
| 70 | if (OP::Operation(ldata[dlidx], max_r_value)) { | 
|---|
| 71 | // found a match for lpos, set it in the found_match vector | 
|---|
| 72 | r.found_match[lidx] = true; | 
|---|
| 73 | l.pos++; | 
|---|
| 74 | if (l.pos == l.order.count) { | 
|---|
| 75 | // early out: we exhausted the entire LHS and they all match | 
|---|
| 76 | return 0; | 
|---|
| 77 | } | 
|---|
| 78 | } else { | 
|---|
| 79 | // we found no match: any subsequent value from the LHS we scan now will be bigger and thus also not | 
|---|
| 80 | // match move to the next RHS chunk | 
|---|
| 81 | break; | 
|---|
| 82 | } | 
|---|
| 83 | } | 
|---|
| 84 | } | 
|---|
| 85 | return 0; | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | template <class T> idx_t MergeJoinMark::LessThan::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { | 
|---|
| 89 | return merge_join_mark_lt<T, duckdb::LessThan>(l, r); | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | template <class T> idx_t MergeJoinMark::LessThanEquals::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { | 
|---|
| 93 | return merge_join_mark_lt<T, duckdb::LessThanEquals>(l, r); | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, Equality, ScalarMergeInfo, ChunkMergeInfo); | 
|---|
| 97 | INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, LessThan, ScalarMergeInfo, ChunkMergeInfo); | 
|---|
| 98 | INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, LessThanEquals, ScalarMergeInfo, ChunkMergeInfo); | 
|---|
| 99 | INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, GreaterThan, ScalarMergeInfo, ChunkMergeInfo); | 
|---|
| 100 | INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, GreaterThanEquals, ScalarMergeInfo, ChunkMergeInfo); | 
|---|
| 101 |  | 
|---|