1 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
2 | #include "duckdb/execution/expression_executor.hpp" |
3 | #include "duckdb/planner/expression/bound_comparison_expression.hpp" |
4 | #include "duckdb/common/operator/comparison_operators.hpp" |
5 | #include "duckdb/common/vector_operations/binary_executor.hpp" |
6 | |
7 | #include <algorithm> |
8 | |
9 | namespace duckdb { |
10 | |
11 | unique_ptr<ExpressionState> ExpressionExecutor::InitializeState(const BoundComparisonExpression &expr, |
12 | ExpressionExecutorState &root) { |
13 | auto result = make_uniq<ExpressionState>(args: expr, args&: root); |
14 | result->AddChild(expr: expr.left.get()); |
15 | result->AddChild(expr: expr.right.get()); |
16 | result->Finalize(); |
17 | return result; |
18 | } |
19 | |
20 | void ExpressionExecutor::Execute(const BoundComparisonExpression &expr, ExpressionState *state, |
21 | const SelectionVector *sel, idx_t count, Vector &result) { |
22 | // resolve the children |
23 | state->intermediate_chunk.Reset(); |
24 | auto &left = state->intermediate_chunk.data[0]; |
25 | auto &right = state->intermediate_chunk.data[1]; |
26 | |
27 | Execute(expr: *expr.left, state: state->child_states[0].get(), sel, count, result&: left); |
28 | Execute(expr: *expr.right, state: state->child_states[1].get(), sel, count, result&: right); |
29 | |
30 | switch (expr.type) { |
31 | case ExpressionType::COMPARE_EQUAL: |
32 | VectorOperations::Equals(left, right, result, count); |
33 | break; |
34 | case ExpressionType::COMPARE_NOTEQUAL: |
35 | VectorOperations::NotEquals(left, right, result, count); |
36 | break; |
37 | case ExpressionType::COMPARE_LESSTHAN: |
38 | VectorOperations::LessThan(left, right, result, count); |
39 | break; |
40 | case ExpressionType::COMPARE_GREATERTHAN: |
41 | VectorOperations::GreaterThan(left, right, result, count); |
42 | break; |
43 | case ExpressionType::COMPARE_LESSTHANOREQUALTO: |
44 | VectorOperations::LessThanEquals(left, right, result, count); |
45 | break; |
46 | case ExpressionType::COMPARE_GREATERTHANOREQUALTO: |
47 | VectorOperations::GreaterThanEquals(left, right, result, count); |
48 | break; |
49 | case ExpressionType::COMPARE_DISTINCT_FROM: |
50 | VectorOperations::DistinctFrom(left, right, result, count); |
51 | break; |
52 | case ExpressionType::COMPARE_NOT_DISTINCT_FROM: |
53 | VectorOperations::NotDistinctFrom(left, right, result, count); |
54 | break; |
55 | default: |
56 | throw InternalException("Unknown comparison type!" ); |
57 | } |
58 | } |
59 | |
60 | template <typename OP> |
61 | static idx_t NestedSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
62 | SelectionVector *true_sel, SelectionVector *false_sel); |
63 | |
64 | template <class OP> |
65 | static idx_t TemplatedSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
66 | SelectionVector *true_sel, SelectionVector *false_sel) { |
67 | // the inplace loops take the result as the last parameter |
68 | switch (left.GetType().InternalType()) { |
69 | case PhysicalType::BOOL: |
70 | case PhysicalType::INT8: |
71 | return BinaryExecutor::Select<int8_t, int8_t, OP>(left, right, sel, count, true_sel, false_sel); |
72 | case PhysicalType::INT16: |
73 | return BinaryExecutor::Select<int16_t, int16_t, OP>(left, right, sel, count, true_sel, false_sel); |
74 | case PhysicalType::INT32: |
75 | return BinaryExecutor::Select<int32_t, int32_t, OP>(left, right, sel, count, true_sel, false_sel); |
76 | case PhysicalType::INT64: |
77 | return BinaryExecutor::Select<int64_t, int64_t, OP>(left, right, sel, count, true_sel, false_sel); |
78 | case PhysicalType::UINT8: |
79 | return BinaryExecutor::Select<uint8_t, uint8_t, OP>(left, right, sel, count, true_sel, false_sel); |
80 | case PhysicalType::UINT16: |
81 | return BinaryExecutor::Select<uint16_t, uint16_t, OP>(left, right, sel, count, true_sel, false_sel); |
82 | case PhysicalType::UINT32: |
83 | return BinaryExecutor::Select<uint32_t, uint32_t, OP>(left, right, sel, count, true_sel, false_sel); |
84 | case PhysicalType::UINT64: |
85 | return BinaryExecutor::Select<uint64_t, uint64_t, OP>(left, right, sel, count, true_sel, false_sel); |
86 | case PhysicalType::INT128: |
87 | return BinaryExecutor::Select<hugeint_t, hugeint_t, OP>(left, right, sel, count, true_sel, false_sel); |
88 | case PhysicalType::FLOAT: |
89 | return BinaryExecutor::Select<float, float, OP>(left, right, sel, count, true_sel, false_sel); |
90 | case PhysicalType::DOUBLE: |
91 | return BinaryExecutor::Select<double, double, OP>(left, right, sel, count, true_sel, false_sel); |
92 | case PhysicalType::INTERVAL: |
93 | return BinaryExecutor::Select<interval_t, interval_t, OP>(left, right, sel, count, true_sel, false_sel); |
94 | case PhysicalType::VARCHAR: |
95 | return BinaryExecutor::Select<string_t, string_t, OP>(left, right, sel, count, true_sel, false_sel); |
96 | case PhysicalType::LIST: |
97 | case PhysicalType::STRUCT: |
98 | return NestedSelectOperation<OP>(left, right, sel, count, true_sel, false_sel); |
99 | default: |
100 | throw InternalException("Invalid type for comparison" ); |
101 | } |
102 | } |
103 | |
104 | struct NestedSelector { |
105 | // Select the matching rows for the values of a nested type that are not both NULL. |
106 | // Those semantics are the same as the corresponding non-distinct comparator |
107 | template <typename OP> |
108 | static idx_t Select(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, SelectionVector *true_sel, |
109 | SelectionVector *false_sel) { |
110 | throw InvalidTypeException(left.GetType(), "Invalid operation for nested SELECT" ); |
111 | } |
112 | }; |
113 | |
114 | template <> |
115 | idx_t NestedSelector::Select<duckdb::Equals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
116 | SelectionVector *true_sel, SelectionVector *false_sel) { |
117 | return VectorOperations::NestedEquals(left, right, sel, count, true_sel, false_sel); |
118 | } |
119 | |
120 | template <> |
121 | idx_t NestedSelector::Select<duckdb::NotEquals>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
122 | SelectionVector *true_sel, SelectionVector *false_sel) { |
123 | return VectorOperations::NestedNotEquals(left, right, sel, count, true_sel, false_sel); |
124 | } |
125 | |
126 | template <> |
127 | idx_t NestedSelector::Select<duckdb::LessThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
128 | SelectionVector *true_sel, SelectionVector *false_sel) { |
129 | return VectorOperations::DistinctLessThan(left, right, sel: &sel, count, true_sel, false_sel); |
130 | } |
131 | |
132 | template <> |
133 | idx_t NestedSelector::Select<duckdb::LessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, |
134 | idx_t count, SelectionVector *true_sel, |
135 | SelectionVector *false_sel) { |
136 | return VectorOperations::DistinctLessThanEquals(left, right, sel: &sel, count, true_sel, false_sel); |
137 | } |
138 | |
139 | template <> |
140 | idx_t NestedSelector::Select<duckdb::GreaterThan>(Vector &left, Vector &right, const SelectionVector &sel, idx_t count, |
141 | SelectionVector *true_sel, SelectionVector *false_sel) { |
142 | return VectorOperations::DistinctGreaterThan(left, right, sel: &sel, count, true_sel, false_sel); |
143 | } |
144 | |
145 | template <> |
146 | idx_t NestedSelector::Select<duckdb::GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector &sel, |
147 | idx_t count, SelectionVector *true_sel, |
148 | SelectionVector *false_sel) { |
149 | return VectorOperations::DistinctGreaterThanEquals(left, right, sel: &sel, count, true_sel, false_sel); |
150 | } |
151 | |
152 | static inline idx_t SelectNotNull(Vector &left, Vector &right, const idx_t count, const SelectionVector &sel, |
153 | SelectionVector &maybe_vec, OptionalSelection &false_opt) { |
154 | |
155 | UnifiedVectorFormat lvdata, rvdata; |
156 | left.ToUnifiedFormat(count, data&: lvdata); |
157 | right.ToUnifiedFormat(count, data&: rvdata); |
158 | |
159 | auto &lmask = lvdata.validity; |
160 | auto &rmask = rvdata.validity; |
161 | |
162 | // For top-level comparisons, NULL semantics are in effect, |
163 | // so filter out any NULLs |
164 | idx_t remaining = 0; |
165 | if (lmask.AllValid() && rmask.AllValid()) { |
166 | // None are NULL, distinguish values. |
167 | for (idx_t i = 0; i < count; ++i) { |
168 | const auto idx = sel.get_index(idx: i); |
169 | maybe_vec.set_index(idx: remaining++, loc: idx); |
170 | } |
171 | return remaining; |
172 | } |
173 | |
174 | // Slice the Vectors down to the rows that are not determined (i.e., neither is NULL) |
175 | SelectionVector slicer(count); |
176 | idx_t false_count = 0; |
177 | for (idx_t i = 0; i < count; ++i) { |
178 | const auto result_idx = sel.get_index(idx: i); |
179 | const auto lidx = lvdata.sel->get_index(idx: i); |
180 | const auto ridx = rvdata.sel->get_index(idx: i); |
181 | if (!lmask.RowIsValid(row_idx: lidx) || !rmask.RowIsValid(row_idx: ridx)) { |
182 | false_opt.Append(count&: false_count, idx: result_idx); |
183 | } else { |
184 | // Neither is NULL, distinguish values. |
185 | slicer.set_index(idx: remaining, loc: i); |
186 | maybe_vec.set_index(idx: remaining++, loc: result_idx); |
187 | } |
188 | } |
189 | false_opt.Advance(completed: false_count); |
190 | |
191 | if (remaining && remaining < count) { |
192 | left.Slice(sel: slicer, count: remaining); |
193 | right.Slice(sel: slicer, count: remaining); |
194 | } |
195 | |
196 | return remaining; |
197 | } |
198 | |
199 | static void ScatterSelection(SelectionVector *target, const idx_t count, const SelectionVector &dense_vec) { |
200 | if (target) { |
201 | for (idx_t i = 0; i < count; ++i) { |
202 | target->set_index(idx: i, loc: dense_vec.get_index(idx: i)); |
203 | } |
204 | } |
205 | } |
206 | |
207 | template <typename OP> |
208 | static idx_t NestedSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
209 | SelectionVector *true_sel, SelectionVector *false_sel) { |
210 | // The Select operations all use a dense pair of input vectors to partition |
211 | // a selection vector in a single pass. But to implement progressive comparisons, |
212 | // we have to make multiple passes, so we need to keep track of the original input positions |
213 | // and then scatter the output selections when we are done. |
214 | if (!sel) { |
215 | sel = FlatVector::IncrementalSelectionVector(); |
216 | } |
217 | |
218 | // Make buffered selections for progressive comparisons |
219 | // TODO: Remove unnecessary allocations |
220 | SelectionVector true_vec(count); |
221 | OptionalSelection true_opt(&true_vec); |
222 | |
223 | SelectionVector false_vec(count); |
224 | OptionalSelection false_opt(&false_vec); |
225 | |
226 | SelectionVector maybe_vec(count); |
227 | |
228 | // Handle NULL nested values |
229 | Vector l_not_null(left); |
230 | Vector r_not_null(right); |
231 | |
232 | auto match_count = SelectNotNull(left&: l_not_null, right&: r_not_null, count, sel: *sel, maybe_vec, false_opt); |
233 | auto no_match_count = count - match_count; |
234 | count = match_count; |
235 | |
236 | // Now that we have handled the NULLs, we can use the recursive nested comparator for the rest. |
237 | match_count = NestedSelector::Select<OP>(l_not_null, r_not_null, maybe_vec, count, true_opt, false_opt); |
238 | no_match_count += (count - match_count); |
239 | |
240 | // Copy the buffered selections to the output selections |
241 | ScatterSelection(target: true_sel, count: match_count, dense_vec: true_vec); |
242 | ScatterSelection(target: false_sel, count: no_match_count, dense_vec: false_vec); |
243 | |
244 | return match_count; |
245 | } |
246 | |
247 | idx_t VectorOperations::Equals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
248 | SelectionVector *true_sel, SelectionVector *false_sel) { |
249 | return TemplatedSelectOperation<duckdb::Equals>(left, right, sel, count, true_sel, false_sel); |
250 | } |
251 | |
252 | idx_t VectorOperations::NotEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
253 | SelectionVector *true_sel, SelectionVector *false_sel) { |
254 | return TemplatedSelectOperation<duckdb::NotEquals>(left, right, sel, count, true_sel, false_sel); |
255 | } |
256 | |
257 | idx_t VectorOperations::GreaterThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
258 | SelectionVector *true_sel, SelectionVector *false_sel) { |
259 | return TemplatedSelectOperation<duckdb::GreaterThan>(left, right, sel, count, true_sel, false_sel); |
260 | } |
261 | |
262 | idx_t VectorOperations::GreaterThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
263 | SelectionVector *true_sel, SelectionVector *false_sel) { |
264 | return TemplatedSelectOperation<duckdb::GreaterThanEquals>(left, right, sel, count, true_sel, false_sel); |
265 | } |
266 | |
267 | idx_t VectorOperations::LessThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
268 | SelectionVector *true_sel, SelectionVector *false_sel) { |
269 | return TemplatedSelectOperation<duckdb::GreaterThan>(left&: right, right&: left, sel, count, true_sel, false_sel); |
270 | } |
271 | |
272 | idx_t VectorOperations::LessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, |
273 | SelectionVector *true_sel, SelectionVector *false_sel) { |
274 | return TemplatedSelectOperation<duckdb::GreaterThanEquals>(left&: right, right&: left, sel, count, true_sel, false_sel); |
275 | } |
276 | |
277 | idx_t ExpressionExecutor::Select(const BoundComparisonExpression &expr, ExpressionState *state, |
278 | const SelectionVector *sel, idx_t count, SelectionVector *true_sel, |
279 | SelectionVector *false_sel) { |
280 | // resolve the children |
281 | state->intermediate_chunk.Reset(); |
282 | auto &left = state->intermediate_chunk.data[0]; |
283 | auto &right = state->intermediate_chunk.data[1]; |
284 | |
285 | Execute(expr: *expr.left, state: state->child_states[0].get(), sel, count, result&: left); |
286 | Execute(expr: *expr.right, state: state->child_states[1].get(), sel, count, result&: right); |
287 | |
288 | switch (expr.type) { |
289 | case ExpressionType::COMPARE_EQUAL: |
290 | return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel); |
291 | case ExpressionType::COMPARE_NOTEQUAL: |
292 | return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel); |
293 | case ExpressionType::COMPARE_LESSTHAN: |
294 | return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel); |
295 | case ExpressionType::COMPARE_GREATERTHAN: |
296 | return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel); |
297 | case ExpressionType::COMPARE_LESSTHANOREQUALTO: |
298 | return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel); |
299 | case ExpressionType::COMPARE_GREATERTHANOREQUALTO: |
300 | return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel); |
301 | case ExpressionType::COMPARE_DISTINCT_FROM: |
302 | return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, false_sel); |
303 | case ExpressionType::COMPARE_NOT_DISTINCT_FROM: |
304 | return VectorOperations::NotDistinctFrom(left, right, sel, count, true_sel, false_sel); |
305 | default: |
306 | throw InternalException("Unknown comparison type!" ); |
307 | } |
308 | } |
309 | |
310 | } // namespace duckdb |
311 | |