1 | #include "duckdb/common/operator/comparison_operators.hpp" |
2 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
3 | #include "duckdb/execution/merge_join.hpp" |
4 | #include "duckdb/parser/expression/comparison_expression.hpp" |
5 | |
6 | using namespace duckdb; |
7 | using namespace std; |
8 | |
9 | template <class T> idx_t MergeJoinMark::Equality::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { |
10 | throw NotImplementedException("Merge Join with Equality not implemented" ); |
11 | } |
12 | |
13 | template <class T, class OP> static idx_t merge_join_mark_gt(ScalarMergeInfo &l, ChunkMergeInfo &r) { |
14 | auto ldata = (T *)l.order.vdata.data; |
15 | auto &lorder = l.order.order; |
16 | l.pos = l.order.count; |
17 | for (idx_t chunk_idx = 0; chunk_idx < r.order_info.size(); chunk_idx++) { |
18 | // we only care about the SMALLEST value in each of the RHS |
19 | // because we want to figure out if they are greater than [or equal] to ANY value |
20 | // get the smallest value from the RHS |
21 | auto &rorder = r.order_info[chunk_idx]; |
22 | auto rdata = (T *)rorder.vdata.data; |
23 | auto min_r_value = rdata[rorder.vdata.sel->get_index(rorder.order.get_index(0))]; |
24 | // now we start from the current lpos value and check if we found a new value that is [>= OR >] the min RHS |
25 | // value |
26 | while (true) { |
27 | auto lidx = lorder.get_index(l.pos - 1); |
28 | auto dlidx = l.order.vdata.sel->get_index(lidx); |
29 | if (OP::Operation(ldata[dlidx], min_r_value)) { |
30 | // found a match for lpos, set it in the found_match vector |
31 | r.found_match[lidx] = true; |
32 | l.pos--; |
33 | if (l.pos == 0) { |
34 | // early out: we exhausted the entire LHS and they all match |
35 | return 0; |
36 | } |
37 | } else { |
38 | // we found no match: any subsequent value from the LHS we scan now will be smaller and thus also not |
39 | // match move to the next RHS chunk |
40 | break; |
41 | } |
42 | } |
43 | } |
44 | return 0; |
45 | } |
46 | template <class T> idx_t MergeJoinMark::GreaterThan::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { |
47 | return merge_join_mark_gt<T, duckdb::GreaterThan>(l, r); |
48 | } |
49 | |
50 | template <class T> idx_t MergeJoinMark::GreaterThanEquals::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { |
51 | return merge_join_mark_gt<T, duckdb::GreaterThanEquals>(l, r); |
52 | } |
53 | |
54 | template <class T, class OP> static idx_t merge_join_mark_lt(ScalarMergeInfo &l, ChunkMergeInfo &r) { |
55 | auto ldata = (T *)l.order.vdata.data; |
56 | auto &lorder = l.order.order; |
57 | l.pos = 0; |
58 | for (idx_t chunk_idx = 0; chunk_idx < r.order_info.size(); chunk_idx++) { |
59 | // we only care about the BIGGEST value in each of the RHS |
60 | // because we want to figure out if they are less than [or equal] to ANY value |
61 | // get the biggest value from the RHS |
62 | auto &rorder = r.order_info[chunk_idx]; |
63 | auto rdata = (T *)rorder.vdata.data; |
64 | auto max_r_value = rdata[rorder.vdata.sel->get_index(rorder.order.get_index(rorder.count - 1))]; |
65 | // now we start from the current lpos value and check if we found a new value that is [<= OR <] the max RHS |
66 | // value |
67 | while (true) { |
68 | auto lidx = lorder.get_index(l.pos); |
69 | auto dlidx = l.order.vdata.sel->get_index(lidx); |
70 | if (OP::Operation(ldata[dlidx], max_r_value)) { |
71 | // found a match for lpos, set it in the found_match vector |
72 | r.found_match[lidx] = true; |
73 | l.pos++; |
74 | if (l.pos == l.order.count) { |
75 | // early out: we exhausted the entire LHS and they all match |
76 | return 0; |
77 | } |
78 | } else { |
79 | // we found no match: any subsequent value from the LHS we scan now will be bigger and thus also not |
80 | // match move to the next RHS chunk |
81 | break; |
82 | } |
83 | } |
84 | } |
85 | return 0; |
86 | } |
87 | |
88 | template <class T> idx_t MergeJoinMark::LessThan::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { |
89 | return merge_join_mark_lt<T, duckdb::LessThan>(l, r); |
90 | } |
91 | |
92 | template <class T> idx_t MergeJoinMark::LessThanEquals::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) { |
93 | return merge_join_mark_lt<T, duckdb::LessThanEquals>(l, r); |
94 | } |
95 | |
96 | INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, Equality, ScalarMergeInfo, ChunkMergeInfo); |
97 | INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, LessThan, ScalarMergeInfo, ChunkMergeInfo); |
98 | INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, LessThanEquals, ScalarMergeInfo, ChunkMergeInfo); |
99 | INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, GreaterThan, ScalarMergeInfo, ChunkMergeInfo); |
100 | INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, GreaterThanEquals, ScalarMergeInfo, ChunkMergeInfo); |
101 | |