merge_join_mark.cpp source code [DuckDB/src/execution/merge_join/merge_join_mark.cpp]

1	#include "duckdb/common/operator/comparison_operators.hpp"
2	#include "duckdb/common/vector_operations/vector_operations.hpp"
3	#include "duckdb/execution/merge_join.hpp"
4	#include "duckdb/parser/expression/comparison_expression.hpp"
5
6	using namespace duckdb;
7	using namespace std;
8
9	template <class T> idx_t MergeJoinMark::Equality::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) {
10	throw NotImplementedException ("Merge Join with Equality not implemented");
11	}
12
13	template <class T, class OP> static idx_t merge_join_mark_gt(ScalarMergeInfo &l, ChunkMergeInfo &r) {
14	auto ldata = (T *)l.order.vdata.data;
15	auto &lorder = l.order.order;
16	l.pos = l.order.count;
17	for (idx_t chunk_idx = `0`; chunk_idx < r.order_info.size(); chunk_idx++) {
18	// we only care about the SMALLEST value in each of the RHS
19	// because we want to figure out if they are greater than [or equal] to ANY value
20	// get the smallest value from the RHS
21	auto &rorder = r.order_info [chunk_idx];
22	auto rdata = (T *)rorder.vdata.data;
23	auto min_r_value = rdata[rorder.vdata.sel->get_index(rorder.order.get_index(`0`))];
24	// now we start from the current lpos value and check if we found a new value that is [>= OR >] the min RHS
25	// value
26	while (true) {
27	auto lidx = lorder.get_index(l.pos - `1`);
28	auto dlidx = l.order.vdata.sel->get_index(lidx);
29	if (OP::Operation(ldata[dlidx], min_r_value)) {
30	// found a match for lpos, set it in the found_match vector
31	r.found_match[lidx] = true;
32	l.pos--;
33	if (l.pos == `0`) {
34	// early out: we exhausted the entire LHS and they all match
35	return `0`;
36	}
37	} else {
38	// we found no match: any subsequent value from the LHS we scan now will be smaller and thus also not
39	// match move to the next RHS chunk
40	break;
41	}
42	}
43	}
44	return `0`;
45	}
46	template <class T> idx_t MergeJoinMark::GreaterThan::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) {
47	return merge_join_mark_gt<T, duckdb::GreaterThan>(l, r);
48	}
49
50	template <class T> idx_t MergeJoinMark::GreaterThanEquals::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) {
51	return merge_join_mark_gt<T, duckdb::GreaterThanEquals>(l, r);
52	}
53
54	template <class T, class OP> static idx_t merge_join_mark_lt(ScalarMergeInfo &l, ChunkMergeInfo &r) {
55	auto ldata = (T *)l.order.vdata.data;
56	auto &lorder = l.order.order;
57	l.pos = `0`;
58	for (idx_t chunk_idx = `0`; chunk_idx < r.order_info.size(); chunk_idx++) {
59	// we only care about the BIGGEST value in each of the RHS
60	// because we want to figure out if they are less than [or equal] to ANY value
61	// get the biggest value from the RHS
62	auto &rorder = r.order_info [chunk_idx];
63	auto rdata = (T *)rorder.vdata.data;
64	auto max_r_value = rdata[rorder.vdata.sel->get_index(rorder.order.get_index(rorder.count - `1`))];
65	// now we start from the current lpos value and check if we found a new value that is [<= OR <] the max RHS
66	// value
67	while (true) {
68	auto lidx = lorder.get_index(l.pos);
69	auto dlidx = l.order.vdata.sel->get_index(lidx);
70	if (OP::Operation(ldata[dlidx], max_r_value)) {
71	// found a match for lpos, set it in the found_match vector
72	r.found_match[lidx] = true;
73	l.pos++;
74	if (l.pos == l.order.count) {
75	// early out: we exhausted the entire LHS and they all match
76	return `0`;
77	}
78	} else {
79	// we found no match: any subsequent value from the LHS we scan now will be bigger and thus also not
80	// match move to the next RHS chunk
81	break;
82	}
83	}
84	}
85	return `0`;
86	}
87
88	template <class T> idx_t MergeJoinMark::LessThan::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) {
89	return merge_join_mark_lt<T, duckdb::LessThan>(l, r);
90	}
91
92	template <class T> idx_t MergeJoinMark::LessThanEquals::Operation(ScalarMergeInfo &l, ChunkMergeInfo &r) {
93	return merge_join_mark_lt<T, duckdb::LessThanEquals>(l, r);
94	}
95
96	INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, Equality, ScalarMergeInfo, ChunkMergeInfo);
97	INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, LessThan, ScalarMergeInfo, ChunkMergeInfo);
98	INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, LessThanEquals, ScalarMergeInfo, ChunkMergeInfo);
99	INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, GreaterThan, ScalarMergeInfo, ChunkMergeInfo);
100	INSTANTIATE_MERGEJOIN_TEMPLATES(MergeJoinMark, GreaterThanEquals, ScalarMergeInfo, ChunkMergeInfo);
101

Browse the source code of DuckDB/src/execution/merge_join/merge_join_mark.cpp