1//===--------------------------------------------------------------------===//
2// row_match.cpp
3// Description: This file contains the implementation of the match operators
4//===--------------------------------------------------------------------===//
5
6#include "duckdb/common/exception.hpp"
7#include "duckdb/common/operator/comparison_operators.hpp"
8#include "duckdb/common/operator/constant_operators.hpp"
9#include "duckdb/common/row_operations/row_operations.hpp"
10#include "duckdb/common/types/row/tuple_data_collection.hpp"
11
12namespace duckdb {
13
14using ValidityBytes = RowLayout::ValidityBytes;
15using Predicates = RowOperations::Predicates;
16
17template <typename OP>
18static idx_t SelectComparison(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
19 SelectionVector *true_sel, SelectionVector *false_sel) {
20 throw NotImplementedException("Unsupported nested comparison operand for RowOperations::Match");
21}
22
23template <>
24idx_t SelectComparison<Equals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
25 SelectionVector *true_sel, SelectionVector *false_sel) {
26 return VectorOperations::NestedEquals(left, right, sel, count, true_sel, false_sel);
27}
28
29template <>
30idx_t SelectComparison<NotEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
31 SelectionVector *true_sel, SelectionVector *false_sel) {
32 return VectorOperations::NestedNotEquals(left, right, sel, count, true_sel, false_sel);
33}
34
35template <>
36idx_t SelectComparison<GreaterThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
37 SelectionVector *true_sel, SelectionVector *false_sel) {
38 return VectorOperations::DistinctGreaterThan(left, right, sel: &sel, count, true_sel, false_sel);
39}
40
41template <>
42idx_t SelectComparison<GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
43 SelectionVector *true_sel, SelectionVector *false_sel) {
44 return VectorOperations::DistinctGreaterThanEquals(left, right, sel: &sel, count, true_sel, false_sel);
45}
46
47template <>
48idx_t SelectComparison<LessThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
49 SelectionVector *true_sel, SelectionVector *false_sel) {
50 return VectorOperations::DistinctLessThan(left, right, sel: &sel, count, true_sel, false_sel);
51}
52
53template <>
54idx_t SelectComparison<LessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
55 SelectionVector *true_sel, SelectionVector *false_sel) {
56 return VectorOperations::DistinctLessThanEquals(left, right, sel: &sel, count, true_sel, false_sel);
57}
58
59template <class T, class OP, bool NO_MATCH_SEL>
60static void TemplatedMatchType(UnifiedVectorFormat &col, Vector &rows, SelectionVector &sel, idx_t &count,
61 idx_t col_offset, idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
62 // Precompute row_mask indexes
63 idx_t entry_idx;
64 idx_t idx_in_entry;
65 ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry);
66
67 auto data = UnifiedVectorFormat::GetData<T>(col);
68 auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows);
69 idx_t match_count = 0;
70 if (!col.validity.AllValid()) {
71 for (idx_t i = 0; i < count; i++) {
72 auto idx = sel.get_index(idx: i);
73
74 auto row = ptrs[idx];
75 ValidityBytes row_mask(row);
76 auto isnull = !row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry);
77
78 auto col_idx = col.sel->get_index(idx);
79 if (!col.validity.RowIsValid(row_idx: col_idx)) {
80 if (isnull) {
81 // match: move to next value to compare
82 sel.set_index(idx: match_count++, loc: idx);
83 } else {
84 if (NO_MATCH_SEL) {
85 no_match->set_index(idx: no_match_count++, loc: idx);
86 }
87 }
88 } else {
89 auto value = Load<T>(row + col_offset);
90 if (!isnull && OP::template Operation<T>(data[col_idx], value)) {
91 sel.set_index(idx: match_count++, loc: idx);
92 } else {
93 if (NO_MATCH_SEL) {
94 no_match->set_index(idx: no_match_count++, loc: idx);
95 }
96 }
97 }
98 }
99 } else {
100 for (idx_t i = 0; i < count; i++) {
101 auto idx = sel.get_index(idx: i);
102
103 auto row = ptrs[idx];
104 ValidityBytes row_mask(row);
105 auto isnull = !row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry);
106
107 auto col_idx = col.sel->get_index(idx);
108 auto value = Load<T>(row + col_offset);
109 if (!isnull && OP::template Operation<T>(data[col_idx], value)) {
110 sel.set_index(idx: match_count++, loc: idx);
111 } else {
112 if (NO_MATCH_SEL) {
113 no_match->set_index(idx: no_match_count++, loc: idx);
114 }
115 }
116 }
117 }
118 count = match_count;
119}
120
121//! Forward declaration for recursion
122template <class OP, bool NO_MATCH_SEL>
123static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
124 SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match,
125 idx_t &no_match_count, const idx_t original_count);
126
127template <class OP, bool NO_MATCH_SEL>
128static void TemplatedMatchStruct(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
129 SelectionVector &sel, idx_t &count, const idx_t col_no, SelectionVector *no_match,
130 idx_t &no_match_count, const idx_t original_count) {
131 // Precompute row_mask indexes
132 idx_t entry_idx;
133 idx_t idx_in_entry;
134 ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry);
135
136 // Work our way through the validity of the whole struct
137 auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows);
138 idx_t match_count = 0;
139 if (!col.validity.AllValid()) {
140 for (idx_t i = 0; i < count; i++) {
141 auto idx = sel.get_index(idx: i);
142
143 auto row = ptrs[idx];
144 ValidityBytes row_mask(row);
145 auto isnull = !row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry);
146
147 auto col_idx = col.sel->get_index(idx);
148 if (!col.validity.RowIsValid(row_idx: col_idx)) {
149 if (isnull) {
150 // match: move to next value to compare
151 sel.set_index(idx: match_count++, loc: idx);
152 } else {
153 if (NO_MATCH_SEL) {
154 no_match->set_index(idx: no_match_count++, loc: idx);
155 }
156 }
157 } else {
158 if (!isnull) {
159 sel.set_index(idx: match_count++, loc: idx);
160 } else {
161 if (NO_MATCH_SEL) {
162 no_match->set_index(idx: no_match_count++, loc: idx);
163 }
164 }
165 }
166 }
167 } else {
168 for (idx_t i = 0; i < count; i++) {
169 auto idx = sel.get_index(idx: i);
170
171 auto row = ptrs[idx];
172 ValidityBytes row_mask(row);
173 auto isnull = !row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry);
174
175 if (!isnull) {
176 sel.set_index(idx: match_count++, loc: idx);
177 } else {
178 if (NO_MATCH_SEL) {
179 no_match->set_index(idx: no_match_count++, loc: idx);
180 }
181 }
182 }
183 }
184 count = match_count;
185
186 // Now we construct row pointers to the structs
187 Vector struct_rows(LogicalTypeId::POINTER);
188 auto struct_ptrs = FlatVector::GetData<data_ptr_t>(vector&: struct_rows);
189
190 const auto col_offset = layout.GetOffsets()[col_no];
191 for (idx_t i = 0; i < count; i++) {
192 auto idx = sel.get_index(idx: i);
193 auto row = ptrs[idx];
194 struct_ptrs[idx] = row + col_offset;
195 }
196
197 // Get the struct layout, child columns, then recurse
198 const auto &struct_layout = layout.GetStructLayout(col_idx: col_no);
199 auto &struct_entries = StructVector::GetEntries(vector&: vec);
200 D_ASSERT(struct_layout.ColumnCount() == struct_entries.size());
201 for (idx_t struct_col_no = 0; struct_col_no < struct_layout.ColumnCount(); struct_col_no++) {
202 auto &struct_vec = *struct_entries[struct_col_no];
203 UnifiedVectorFormat struct_col;
204 struct_vec.ToUnifiedFormat(count: original_count, data&: struct_col);
205 TemplatedMatchOp<OP, NO_MATCH_SEL>(struct_vec, struct_col, struct_layout, struct_rows, sel, count,
206 struct_col_no, no_match, no_match_count, original_count);
207 }
208}
209
210template <class OP, bool NO_MATCH_SEL>
211static void TemplatedMatchList(Vector &col, Vector &rows, SelectionVector &sel, idx_t &count,
212 const TupleDataLayout &layout, const idx_t col_no, SelectionVector *no_match,
213 idx_t &no_match_count) {
214 // Gather a dense Vector containing the column values being matched
215 Vector key(col.GetType());
216 const auto gather_function = TupleDataCollection::GetGatherFunction(type: col.GetType());
217 gather_function.function(layout, rows, col_no, sel, count, key, *FlatVector::IncrementalSelectionVector(), key,
218 gather_function.child_functions);
219
220 // Densify the input column
221 Vector sliced(col, sel, count);
222
223 if (NO_MATCH_SEL) {
224 SelectionVector no_match_sel_offset(no_match->data() + no_match_count);
225 auto match_count = SelectComparison<OP>(sliced, key, sel, count, &sel, &no_match_sel_offset);
226 no_match_count += count - match_count;
227 count = match_count;
228 } else {
229 count = SelectComparison<OP>(sliced, key, sel, count, &sel, nullptr);
230 }
231}
232
233template <class OP, bool NO_MATCH_SEL>
234static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows,
235 SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match,
236 idx_t &no_match_count, const idx_t original_count) {
237 if (count == 0) {
238 return;
239 }
240 auto col_offset = layout.GetOffsets()[col_no];
241 switch (layout.GetTypes()[col_no].InternalType()) {
242 case PhysicalType::BOOL:
243 case PhysicalType::INT8:
244 TemplatedMatchType<int8_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
245 no_match_count);
246 break;
247 case PhysicalType::INT16:
248 TemplatedMatchType<int16_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
249 no_match_count);
250 break;
251 case PhysicalType::INT32:
252 TemplatedMatchType<int32_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
253 no_match_count);
254 break;
255 case PhysicalType::INT64:
256 TemplatedMatchType<int64_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
257 no_match_count);
258 break;
259 case PhysicalType::UINT8:
260 TemplatedMatchType<uint8_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
261 no_match_count);
262 break;
263 case PhysicalType::UINT16:
264 TemplatedMatchType<uint16_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
265 no_match_count);
266 break;
267 case PhysicalType::UINT32:
268 TemplatedMatchType<uint32_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
269 no_match_count);
270 break;
271 case PhysicalType::UINT64:
272 TemplatedMatchType<uint64_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
273 no_match_count);
274 break;
275 case PhysicalType::INT128:
276 TemplatedMatchType<hugeint_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
277 no_match_count);
278 break;
279 case PhysicalType::FLOAT:
280 TemplatedMatchType<float, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
281 no_match_count);
282 break;
283 case PhysicalType::DOUBLE:
284 TemplatedMatchType<double, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
285 no_match_count);
286 break;
287 case PhysicalType::INTERVAL:
288 TemplatedMatchType<interval_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
289 no_match_count);
290 break;
291 case PhysicalType::VARCHAR:
292 TemplatedMatchType<string_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
293 no_match_count);
294 break;
295 case PhysicalType::STRUCT:
296 TemplatedMatchStruct<OP, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, no_match_count,
297 original_count);
298 break;
299 case PhysicalType::LIST:
300 TemplatedMatchList<OP, NO_MATCH_SEL>(vec, rows, sel, count, layout, col_no, no_match, no_match_count);
301 break;
302 default:
303 throw InternalException("Unsupported column type for RowOperations::Match");
304 }
305}
306
307template <bool NO_MATCH_SEL>
308static void TemplatedMatch(DataChunk &columns, UnifiedVectorFormat col_data[], const TupleDataLayout &layout,
309 Vector &rows, const Predicates &predicates, SelectionVector &sel, idx_t &count,
310 SelectionVector *no_match, idx_t &no_match_count) {
311 for (idx_t col_no = 0; col_no < predicates.size(); ++col_no) {
312 auto &vec = columns.data[col_no];
313 auto &col = col_data[col_no];
314 switch (predicates[col_no]) {
315 case ExpressionType::COMPARE_EQUAL:
316 case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
317 case ExpressionType::COMPARE_DISTINCT_FROM:
318 TemplatedMatchOp<Equals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, no_match_count,
319 count);
320 break;
321 case ExpressionType::COMPARE_NOTEQUAL:
322 TemplatedMatchOp<NotEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
323 no_match_count, count);
324 break;
325 case ExpressionType::COMPARE_GREATERTHAN:
326 TemplatedMatchOp<GreaterThan, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
327 no_match_count, count);
328 break;
329 case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
330 TemplatedMatchOp<GreaterThanEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
331 no_match_count, count);
332 break;
333 case ExpressionType::COMPARE_LESSTHAN:
334 TemplatedMatchOp<LessThan, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
335 no_match_count, count);
336 break;
337 case ExpressionType::COMPARE_LESSTHANOREQUALTO:
338 TemplatedMatchOp<LessThanEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match,
339 no_match_count, count);
340 break;
341 default:
342 throw InternalException("Unsupported comparison type for RowOperations::Match");
343 }
344 }
345}
346
347idx_t RowOperations::Match(DataChunk &columns, UnifiedVectorFormat col_data[], const TupleDataLayout &layout,
348 Vector &rows, const Predicates &predicates, SelectionVector &sel, idx_t count,
349 SelectionVector *no_match, idx_t &no_match_count) {
350 if (no_match) {
351 TemplatedMatch<true>(columns, col_data, layout, rows, predicates, sel, count, no_match, no_match_count);
352 } else {
353 TemplatedMatch<false>(columns, col_data, layout, rows, predicates, sel, count, no_match, no_match_count);
354 }
355
356 return count;
357}
358
359} // namespace duckdb
360