1 | //===--------------------------------------------------------------------===// |
2 | // row_match.cpp |
3 | // Description: This file contains the implementation of the match operators |
4 | //===--------------------------------------------------------------------===// |
5 | |
6 | #include "duckdb/common/exception.hpp" |
7 | #include "duckdb/common/operator/comparison_operators.hpp" |
8 | #include "duckdb/common/operator/constant_operators.hpp" |
9 | #include "duckdb/common/row_operations/row_operations.hpp" |
10 | #include "duckdb/common/types/row/tuple_data_collection.hpp" |
11 | |
12 | namespace duckdb { |
13 | |
14 | using ValidityBytes = RowLayout::ValidityBytes; |
15 | using Predicates = RowOperations::Predicates; |
16 | |
17 | template <typename OP> |
18 | static idx_t SelectComparison(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
19 | SelectionVector *true_sel, SelectionVector *false_sel) { |
20 | throw NotImplementedException("Unsupported nested comparison operand for RowOperations::Match" ); |
21 | } |
22 | |
23 | template <> |
24 | idx_t SelectComparison<Equals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
25 | SelectionVector *true_sel, SelectionVector *false_sel) { |
26 | return VectorOperations::NestedEquals(left, right, sel, count, true_sel, false_sel); |
27 | } |
28 | |
29 | template <> |
30 | idx_t SelectComparison<NotEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
31 | SelectionVector *true_sel, SelectionVector *false_sel) { |
32 | return VectorOperations::NestedNotEquals(left, right, sel, count, true_sel, false_sel); |
33 | } |
34 | |
35 | template <> |
36 | idx_t SelectComparison<GreaterThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
37 | SelectionVector *true_sel, SelectionVector *false_sel) { |
38 | return VectorOperations::DistinctGreaterThan(left, right, sel: &sel, count, true_sel, false_sel); |
39 | } |
40 | |
41 | template <> |
42 | idx_t SelectComparison<GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
43 | SelectionVector *true_sel, SelectionVector *false_sel) { |
44 | return VectorOperations::DistinctGreaterThanEquals(left, right, sel: &sel, count, true_sel, false_sel); |
45 | } |
46 | |
47 | template <> |
48 | idx_t SelectComparison<LessThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
49 | SelectionVector *true_sel, SelectionVector *false_sel) { |
50 | return VectorOperations::DistinctLessThan(left, right, sel: &sel, count, true_sel, false_sel); |
51 | } |
52 | |
53 | template <> |
54 | idx_t SelectComparison<LessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
55 | SelectionVector *true_sel, SelectionVector *false_sel) { |
56 | return VectorOperations::DistinctLessThanEquals(left, right, sel: &sel, count, true_sel, false_sel); |
57 | } |
58 | |
59 | template <class T, class OP, bool NO_MATCH_SEL> |
60 | static void TemplatedMatchType(UnifiedVectorFormat &col, Vector &rows, SelectionVector &sel, idx_t &count, |
61 | idx_t col_offset, idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) { |
62 | // Precompute row_mask indexes |
63 | idx_t entry_idx; |
64 | idx_t idx_in_entry; |
65 | ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry); |
66 | |
67 | auto data = UnifiedVectorFormat::GetData<T>(col); |
68 | auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows); |
69 | idx_t match_count = 0; |
70 | if (!col.validity.AllValid()) { |
71 | for (idx_t i = 0; i < count; i++) { |
72 | auto idx = sel.get_index(idx: i); |
73 | |
74 | auto row = ptrs[idx]; |
75 | ValidityBytes row_mask(row); |
76 | auto isnull = !row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry); |
77 | |
78 | auto col_idx = col.sel->get_index(idx); |
79 | if (!col.validity.RowIsValid(row_idx: col_idx)) { |
80 | if (isnull) { |
81 | // match: move to next value to compare |
82 | sel.set_index(idx: match_count++, loc: idx); |
83 | } else { |
84 | if (NO_MATCH_SEL) { |
85 | no_match->set_index(idx: no_match_count++, loc: idx); |
86 | } |
87 | } |
88 | } else { |
89 | auto value = Load<T>(row + col_offset); |
90 | if (!isnull && OP::template Operation<T>(data[col_idx], value)) { |
91 | sel.set_index(idx: match_count++, loc: idx); |
92 | } else { |
93 | if (NO_MATCH_SEL) { |
94 | no_match->set_index(idx: no_match_count++, loc: idx); |
95 | } |
96 | } |
97 | } |
98 | } |
99 | } else { |
100 | for (idx_t i = 0; i < count; i++) { |
101 | auto idx = sel.get_index(idx: i); |
102 | |
103 | auto row = ptrs[idx]; |
104 | ValidityBytes row_mask(row); |
105 | auto isnull = !row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry); |
106 | |
107 | auto col_idx = col.sel->get_index(idx); |
108 | auto value = Load<T>(row + col_offset); |
109 | if (!isnull && OP::template Operation<T>(data[col_idx], value)) { |
110 | sel.set_index(idx: match_count++, loc: idx); |
111 | } else { |
112 | if (NO_MATCH_SEL) { |
113 | no_match->set_index(idx: no_match_count++, loc: idx); |
114 | } |
115 | } |
116 | } |
117 | } |
118 | count = match_count; |
119 | } |
120 | |
121 | //! Forward declaration for recursion |
122 | template <class OP, bool NO_MATCH_SEL> |
123 | static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows, |
124 | SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match, |
125 | idx_t &no_match_count, const idx_t original_count); |
126 | |
127 | template <class OP, bool NO_MATCH_SEL> |
128 | static void TemplatedMatchStruct(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows, |
129 | SelectionVector &sel, idx_t &count, const idx_t col_no, SelectionVector *no_match, |
130 | idx_t &no_match_count, const idx_t original_count) { |
131 | // Precompute row_mask indexes |
132 | idx_t entry_idx; |
133 | idx_t idx_in_entry; |
134 | ValidityBytes::GetEntryIndex(row_idx: col_no, entry_idx, idx_in_entry); |
135 | |
136 | // Work our way through the validity of the whole struct |
137 | auto ptrs = FlatVector::GetData<data_ptr_t>(vector&: rows); |
138 | idx_t match_count = 0; |
139 | if (!col.validity.AllValid()) { |
140 | for (idx_t i = 0; i < count; i++) { |
141 | auto idx = sel.get_index(idx: i); |
142 | |
143 | auto row = ptrs[idx]; |
144 | ValidityBytes row_mask(row); |
145 | auto isnull = !row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry); |
146 | |
147 | auto col_idx = col.sel->get_index(idx); |
148 | if (!col.validity.RowIsValid(row_idx: col_idx)) { |
149 | if (isnull) { |
150 | // match: move to next value to compare |
151 | sel.set_index(idx: match_count++, loc: idx); |
152 | } else { |
153 | if (NO_MATCH_SEL) { |
154 | no_match->set_index(idx: no_match_count++, loc: idx); |
155 | } |
156 | } |
157 | } else { |
158 | if (!isnull) { |
159 | sel.set_index(idx: match_count++, loc: idx); |
160 | } else { |
161 | if (NO_MATCH_SEL) { |
162 | no_match->set_index(idx: no_match_count++, loc: idx); |
163 | } |
164 | } |
165 | } |
166 | } |
167 | } else { |
168 | for (idx_t i = 0; i < count; i++) { |
169 | auto idx = sel.get_index(idx: i); |
170 | |
171 | auto row = ptrs[idx]; |
172 | ValidityBytes row_mask(row); |
173 | auto isnull = !row_mask.RowIsValid(entry: row_mask.GetValidityEntry(entry_idx), idx_in_entry); |
174 | |
175 | if (!isnull) { |
176 | sel.set_index(idx: match_count++, loc: idx); |
177 | } else { |
178 | if (NO_MATCH_SEL) { |
179 | no_match->set_index(idx: no_match_count++, loc: idx); |
180 | } |
181 | } |
182 | } |
183 | } |
184 | count = match_count; |
185 | |
186 | // Now we construct row pointers to the structs |
187 | Vector struct_rows(LogicalTypeId::POINTER); |
188 | auto struct_ptrs = FlatVector::GetData<data_ptr_t>(vector&: struct_rows); |
189 | |
190 | const auto col_offset = layout.GetOffsets()[col_no]; |
191 | for (idx_t i = 0; i < count; i++) { |
192 | auto idx = sel.get_index(idx: i); |
193 | auto row = ptrs[idx]; |
194 | struct_ptrs[idx] = row + col_offset; |
195 | } |
196 | |
197 | // Get the struct layout, child columns, then recurse |
198 | const auto &struct_layout = layout.GetStructLayout(col_idx: col_no); |
199 | auto &struct_entries = StructVector::GetEntries(vector&: vec); |
200 | D_ASSERT(struct_layout.ColumnCount() == struct_entries.size()); |
201 | for (idx_t struct_col_no = 0; struct_col_no < struct_layout.ColumnCount(); struct_col_no++) { |
202 | auto &struct_vec = *struct_entries[struct_col_no]; |
203 | UnifiedVectorFormat struct_col; |
204 | struct_vec.ToUnifiedFormat(count: original_count, data&: struct_col); |
205 | TemplatedMatchOp<OP, NO_MATCH_SEL>(struct_vec, struct_col, struct_layout, struct_rows, sel, count, |
206 | struct_col_no, no_match, no_match_count, original_count); |
207 | } |
208 | } |
209 | |
210 | template <class OP, bool NO_MATCH_SEL> |
211 | static void TemplatedMatchList(Vector &col, Vector &rows, SelectionVector &sel, idx_t &count, |
212 | const TupleDataLayout &layout, const idx_t col_no, SelectionVector *no_match, |
213 | idx_t &no_match_count) { |
214 | // Gather a dense Vector containing the column values being matched |
215 | Vector key(col.GetType()); |
216 | const auto gather_function = TupleDataCollection::GetGatherFunction(type: col.GetType()); |
217 | gather_function.function(layout, rows, col_no, sel, count, key, *FlatVector::IncrementalSelectionVector(), key, |
218 | gather_function.child_functions); |
219 | |
220 | // Densify the input column |
221 | Vector sliced(col, sel, count); |
222 | |
223 | if (NO_MATCH_SEL) { |
224 | SelectionVector no_match_sel_offset(no_match->data() + no_match_count); |
225 | auto match_count = SelectComparison<OP>(sliced, key, sel, count, &sel, &no_match_sel_offset); |
226 | no_match_count += count - match_count; |
227 | count = match_count; |
228 | } else { |
229 | count = SelectComparison<OP>(sliced, key, sel, count, &sel, nullptr); |
230 | } |
231 | } |
232 | |
233 | template <class OP, bool NO_MATCH_SEL> |
234 | static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const TupleDataLayout &layout, Vector &rows, |
235 | SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match, |
236 | idx_t &no_match_count, const idx_t original_count) { |
237 | if (count == 0) { |
238 | return; |
239 | } |
240 | auto col_offset = layout.GetOffsets()[col_no]; |
241 | switch (layout.GetTypes()[col_no].InternalType()) { |
242 | case PhysicalType::BOOL: |
243 | case PhysicalType::INT8: |
244 | TemplatedMatchType<int8_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
245 | no_match_count); |
246 | break; |
247 | case PhysicalType::INT16: |
248 | TemplatedMatchType<int16_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
249 | no_match_count); |
250 | break; |
251 | case PhysicalType::INT32: |
252 | TemplatedMatchType<int32_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
253 | no_match_count); |
254 | break; |
255 | case PhysicalType::INT64: |
256 | TemplatedMatchType<int64_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
257 | no_match_count); |
258 | break; |
259 | case PhysicalType::UINT8: |
260 | TemplatedMatchType<uint8_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
261 | no_match_count); |
262 | break; |
263 | case PhysicalType::UINT16: |
264 | TemplatedMatchType<uint16_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
265 | no_match_count); |
266 | break; |
267 | case PhysicalType::UINT32: |
268 | TemplatedMatchType<uint32_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
269 | no_match_count); |
270 | break; |
271 | case PhysicalType::UINT64: |
272 | TemplatedMatchType<uint64_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
273 | no_match_count); |
274 | break; |
275 | case PhysicalType::INT128: |
276 | TemplatedMatchType<hugeint_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
277 | no_match_count); |
278 | break; |
279 | case PhysicalType::FLOAT: |
280 | TemplatedMatchType<float, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
281 | no_match_count); |
282 | break; |
283 | case PhysicalType::DOUBLE: |
284 | TemplatedMatchType<double, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
285 | no_match_count); |
286 | break; |
287 | case PhysicalType::INTERVAL: |
288 | TemplatedMatchType<interval_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
289 | no_match_count); |
290 | break; |
291 | case PhysicalType::VARCHAR: |
292 | TemplatedMatchType<string_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match, |
293 | no_match_count); |
294 | break; |
295 | case PhysicalType::STRUCT: |
296 | TemplatedMatchStruct<OP, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, no_match_count, |
297 | original_count); |
298 | break; |
299 | case PhysicalType::LIST: |
300 | TemplatedMatchList<OP, NO_MATCH_SEL>(vec, rows, sel, count, layout, col_no, no_match, no_match_count); |
301 | break; |
302 | default: |
303 | throw InternalException("Unsupported column type for RowOperations::Match" ); |
304 | } |
305 | } |
306 | |
307 | template <bool NO_MATCH_SEL> |
308 | static void TemplatedMatch(DataChunk &columns, UnifiedVectorFormat col_data[], const TupleDataLayout &layout, |
309 | Vector &rows, const Predicates &predicates, SelectionVector &sel, idx_t &count, |
310 | SelectionVector *no_match, idx_t &no_match_count) { |
311 | for (idx_t col_no = 0; col_no < predicates.size(); ++col_no) { |
312 | auto &vec = columns.data[col_no]; |
313 | auto &col = col_data[col_no]; |
314 | switch (predicates[col_no]) { |
315 | case ExpressionType::COMPARE_EQUAL: |
316 | case ExpressionType::COMPARE_NOT_DISTINCT_FROM: |
317 | case ExpressionType::COMPARE_DISTINCT_FROM: |
318 | TemplatedMatchOp<Equals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, no_match_count, |
319 | count); |
320 | break; |
321 | case ExpressionType::COMPARE_NOTEQUAL: |
322 | TemplatedMatchOp<NotEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, |
323 | no_match_count, count); |
324 | break; |
325 | case ExpressionType::COMPARE_GREATERTHAN: |
326 | TemplatedMatchOp<GreaterThan, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, |
327 | no_match_count, count); |
328 | break; |
329 | case ExpressionType::COMPARE_GREATERTHANOREQUALTO: |
330 | TemplatedMatchOp<GreaterThanEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, |
331 | no_match_count, count); |
332 | break; |
333 | case ExpressionType::COMPARE_LESSTHAN: |
334 | TemplatedMatchOp<LessThan, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, |
335 | no_match_count, count); |
336 | break; |
337 | case ExpressionType::COMPARE_LESSTHANOREQUALTO: |
338 | TemplatedMatchOp<LessThanEquals, NO_MATCH_SEL>(vec, col, layout, rows, sel, count, col_no, no_match, |
339 | no_match_count, count); |
340 | break; |
341 | default: |
342 | throw InternalException("Unsupported comparison type for RowOperations::Match" ); |
343 | } |
344 | } |
345 | } |
346 | |
347 | idx_t RowOperations::Match(DataChunk &columns, UnifiedVectorFormat col_data[], const TupleDataLayout &layout, |
348 | Vector &rows, const Predicates &predicates, SelectionVector &sel, idx_t count, |
349 | SelectionVector *no_match, idx_t &no_match_count) { |
350 | if (no_match) { |
351 | TemplatedMatch<true>(columns, col_data, layout, rows, predicates, sel, count, no_match, no_match_count); |
352 | } else { |
353 | TemplatedMatch<false>(columns, col_data, layout, rows, predicates, sel, count, no_match, no_match_count); |
354 | } |
355 | |
356 | return count; |
357 | } |
358 | |
359 | } // namespace duckdb |
360 | |