1 | #include "duckdb/optimizer/in_clause_rewriter.hpp" |
2 | #include "duckdb/optimizer/optimizer.hpp" |
3 | #include "duckdb/planner/binder.hpp" |
4 | #include "duckdb/planner/expression/bound_comparison_expression.hpp" |
5 | #include "duckdb/planner/expression/bound_conjunction_expression.hpp" |
6 | #include "duckdb/planner/expression/bound_operator_expression.hpp" |
7 | #include "duckdb/planner/operator/logical_column_data_get.hpp" |
8 | #include "duckdb/planner/operator/logical_comparison_join.hpp" |
9 | #include "duckdb/execution/expression_executor.hpp" |
10 | |
11 | namespace duckdb { |
12 | |
13 | unique_ptr<LogicalOperator> InClauseRewriter::Rewrite(unique_ptr<LogicalOperator> op) { |
14 | if (op->children.size() == 1) { |
15 | root = std::move(op->children[0]); |
16 | VisitOperatorExpressions(op&: *op); |
17 | op->children[0] = std::move(root); |
18 | } |
19 | |
20 | for (auto &child : op->children) { |
21 | child = Rewrite(op: std::move(child)); |
22 | } |
23 | return op; |
24 | } |
25 | |
26 | unique_ptr<Expression> InClauseRewriter::VisitReplace(BoundOperatorExpression &expr, unique_ptr<Expression> *expr_ptr) { |
27 | if (expr.type != ExpressionType::COMPARE_IN && expr.type != ExpressionType::COMPARE_NOT_IN) { |
28 | return nullptr; |
29 | } |
30 | D_ASSERT(root); |
31 | auto in_type = expr.children[0]->return_type; |
32 | bool is_regular_in = expr.type == ExpressionType::COMPARE_IN; |
33 | bool all_scalar = true; |
34 | // IN clause with many children: try to generate a mark join that replaces this IN expression |
35 | // we can only do this if the expressions in the expression list are scalar |
36 | for (idx_t i = 1; i < expr.children.size(); i++) { |
37 | if (!expr.children[i]->IsFoldable()) { |
38 | // non-scalar expression |
39 | all_scalar = false; |
40 | } |
41 | } |
42 | if (expr.children.size() == 2) { |
43 | // only one child |
44 | // IN: turn into X = 1 |
45 | // NOT IN: turn into X <> 1 |
46 | return make_uniq<BoundComparisonExpression>(args: is_regular_in ? ExpressionType::COMPARE_EQUAL |
47 | : ExpressionType::COMPARE_NOTEQUAL, |
48 | args: std::move(expr.children[0]), args: std::move(expr.children[1])); |
49 | } |
50 | if (expr.children.size() < 6 || !all_scalar) { |
51 | // low amount of children or not all scalar |
52 | // IN: turn into (X = 1 OR X = 2 OR X = 3...) |
53 | // NOT IN: turn into (X <> 1 AND X <> 2 AND X <> 3 ...) |
54 | auto conjunction = make_uniq<BoundConjunctionExpression>(args: is_regular_in ? ExpressionType::CONJUNCTION_OR |
55 | : ExpressionType::CONJUNCTION_AND); |
56 | for (idx_t i = 1; i < expr.children.size(); i++) { |
57 | conjunction->children.push_back(x: make_uniq<BoundComparisonExpression>( |
58 | args: is_regular_in ? ExpressionType::COMPARE_EQUAL : ExpressionType::COMPARE_NOTEQUAL, |
59 | args: expr.children[0]->Copy(), args: std::move(expr.children[i]))); |
60 | } |
61 | return std::move(conjunction); |
62 | } |
63 | // IN clause with many constant children |
64 | // generate a mark join that replaces this IN expression |
65 | // first generate a ColumnDataCollection from the set of expressions |
66 | vector<LogicalType> types = {in_type}; |
67 | auto collection = make_uniq<ColumnDataCollection>(args&: context, args&: types); |
68 | ColumnDataAppendState append_state; |
69 | collection->InitializeAppend(state&: append_state); |
70 | |
71 | DataChunk chunk; |
72 | chunk.Initialize(context, types); |
73 | for (idx_t i = 1; i < expr.children.size(); i++) { |
74 | // resolve this expression to a constant |
75 | auto value = ExpressionExecutor::EvaluateScalar(context, expr: *expr.children[i]); |
76 | idx_t index = chunk.size(); |
77 | chunk.SetCardinality(chunk.size() + 1); |
78 | chunk.SetValue(col_idx: 0, index, val: value); |
79 | if (chunk.size() == STANDARD_VECTOR_SIZE || i + 1 == expr.children.size()) { |
80 | // chunk full: append to chunk collection |
81 | collection->Append(state&: append_state, new_chunk&: chunk); |
82 | chunk.Reset(); |
83 | } |
84 | } |
85 | // now generate a ChunkGet that scans this collection |
86 | auto chunk_index = optimizer.binder.GenerateTableIndex(); |
87 | auto chunk_scan = make_uniq<LogicalColumnDataGet>(args&: chunk_index, args&: types, args: std::move(collection)); |
88 | |
89 | // then we generate the MARK join with the chunk scan on the RHS |
90 | auto join = make_uniq<LogicalComparisonJoin>(args: JoinType::MARK); |
91 | join->mark_index = chunk_index; |
92 | join->AddChild(child: std::move(root)); |
93 | join->AddChild(child: std::move(chunk_scan)); |
94 | // create the JOIN condition |
95 | JoinCondition cond; |
96 | cond.left = std::move(expr.children[0]); |
97 | |
98 | cond.right = make_uniq<BoundColumnRefExpression>(args&: in_type, args: ColumnBinding(chunk_index, 0)); |
99 | cond.comparison = ExpressionType::COMPARE_EQUAL; |
100 | join->conditions.push_back(x: std::move(cond)); |
101 | root = std::move(join); |
102 | |
103 | // we replace the original subquery with a BoundColumnRefExpression referring to the mark column |
104 | unique_ptr<Expression> result = |
105 | make_uniq<BoundColumnRefExpression>(args: "IN (...)" , args: LogicalType::BOOLEAN, args: ColumnBinding(chunk_index, 0)); |
106 | if (!is_regular_in) { |
107 | // NOT IN: invert |
108 | auto invert = make_uniq<BoundOperatorExpression>(args: ExpressionType::OPERATOR_NOT, args: LogicalType::BOOLEAN); |
109 | invert->children.push_back(x: std::move(result)); |
110 | result = std::move(invert); |
111 | } |
112 | return result; |
113 | } |
114 | |
115 | } // namespace duckdb |
116 | |