| 1 | #include "duckdb/optimizer/in_clause_rewriter.hpp" |
| 2 | #include "duckdb/optimizer/optimizer.hpp" |
| 3 | #include "duckdb/planner/binder.hpp" |
| 4 | #include "duckdb/planner/expression/bound_comparison_expression.hpp" |
| 5 | #include "duckdb/planner/expression/bound_conjunction_expression.hpp" |
| 6 | #include "duckdb/planner/expression/bound_operator_expression.hpp" |
| 7 | #include "duckdb/planner/operator/logical_chunk_get.hpp" |
| 8 | #include "duckdb/planner/operator/logical_comparison_join.hpp" |
| 9 | #include "duckdb/execution/expression_executor.hpp" |
| 10 | |
| 11 | namespace duckdb { |
| 12 | |
| 13 | unique_ptr<LogicalOperator> InClauseRewriter::Rewrite(unique_ptr<LogicalOperator> op) { |
| 14 | if (op->children.size() == 1) { |
| 15 | root = move(op->children[0]); |
| 16 | VisitOperatorExpressions(*op); |
| 17 | op->children[0] = move(root); |
| 18 | } |
| 19 | |
| 20 | for (auto &child : op->children) { |
| 21 | child = Rewrite(move(child)); |
| 22 | } |
| 23 | return op; |
| 24 | } |
| 25 | |
| 26 | unique_ptr<Expression> InClauseRewriter::VisitReplace(BoundOperatorExpression &expr, unique_ptr<Expression> *expr_ptr) { |
| 27 | if (expr.type != ExpressionType::COMPARE_IN && expr.type != ExpressionType::COMPARE_NOT_IN) { |
| 28 | return nullptr; |
| 29 | } |
| 30 | assert(root); |
| 31 | auto in_type = expr.children[0]->return_type; |
| 32 | bool is_regular_in = expr.type == ExpressionType::COMPARE_IN; |
| 33 | bool all_scalar = true; |
| 34 | // IN clause with many children: try to generate a mark join that replaces this IN expression |
| 35 | // we can only do this if the expressions in the expression list are scalar |
| 36 | for (idx_t i = 1; i < expr.children.size(); i++) { |
| 37 | assert(expr.children[i]->return_type == in_type); |
| 38 | if (!expr.children[i]->IsFoldable()) { |
| 39 | // non-scalar expression |
| 40 | all_scalar = false; |
| 41 | } |
| 42 | } |
| 43 | if (expr.children.size() == 2) { |
| 44 | // only one child |
| 45 | // IN: turn into X = 1 |
| 46 | // NOT IN: turn into X <> 1 |
| 47 | return make_unique<BoundComparisonExpression>(is_regular_in ? ExpressionType::COMPARE_EQUAL |
| 48 | : ExpressionType::COMPARE_NOTEQUAL, |
| 49 | move(expr.children[0]), move(expr.children[1])); |
| 50 | } |
| 51 | if (expr.children.size() < 6 || !all_scalar) { |
| 52 | // low amount of children or not all scalar |
| 53 | // IN: turn into (X = 1 OR X = 2 OR X = 3...) |
| 54 | // NOT IN: turn into (X <> 1 AND X <> 2 AND X <> 3 ...) |
| 55 | auto conjunction = make_unique<BoundConjunctionExpression>(is_regular_in ? ExpressionType::CONJUNCTION_OR |
| 56 | : ExpressionType::CONJUNCTION_AND); |
| 57 | for (idx_t i = 1; i < expr.children.size(); i++) { |
| 58 | conjunction->children.push_back(make_unique<BoundComparisonExpression>( |
| 59 | is_regular_in ? ExpressionType::COMPARE_EQUAL : ExpressionType::COMPARE_NOTEQUAL, |
| 60 | expr.children[0]->Copy(), move(expr.children[i]))); |
| 61 | } |
| 62 | return move(conjunction); |
| 63 | } |
| 64 | // IN clause with many constant children |
| 65 | // generate a mark join that replaces this IN expression |
| 66 | // first generate a ChunkCollection from the set of expressions |
| 67 | vector<TypeId> types = {in_type}; |
| 68 | auto collection = make_unique<ChunkCollection>(); |
| 69 | DataChunk chunk; |
| 70 | chunk.Initialize(types); |
| 71 | for (idx_t i = 1; i < expr.children.size(); i++) { |
| 72 | // reoslve this expression to a constant |
| 73 | auto value = ExpressionExecutor::EvaluateScalar(*expr.children[i]); |
| 74 | idx_t index = chunk.size(); |
| 75 | chunk.SetCardinality(chunk.size() + 1); |
| 76 | chunk.SetValue(0, index, value); |
| 77 | if (chunk.size() == STANDARD_VECTOR_SIZE || i + 1 == expr.children.size()) { |
| 78 | // chunk full: append to chunk collection |
| 79 | collection->Append(chunk); |
| 80 | chunk.Reset(); |
| 81 | } |
| 82 | } |
| 83 | // now generate a ChunkGet that scans this collection |
| 84 | auto chunk_index = optimizer.binder.GenerateTableIndex(); |
| 85 | auto chunk_scan = make_unique<LogicalChunkGet>(chunk_index, types, move(collection)); |
| 86 | |
| 87 | // then we generate the MARK join with the chunk scan on the RHS |
| 88 | auto join = make_unique<LogicalComparisonJoin>(JoinType::MARK); |
| 89 | join->mark_index = chunk_index; |
| 90 | join->AddChild(move(root)); |
| 91 | join->AddChild(move(chunk_scan)); |
| 92 | // create the JOIN condition |
| 93 | JoinCondition cond; |
| 94 | cond.left = move(expr.children[0]); |
| 95 | |
| 96 | cond.right = make_unique<BoundColumnRefExpression>(in_type, ColumnBinding(chunk_index, 0)); |
| 97 | cond.comparison = ExpressionType::COMPARE_EQUAL; |
| 98 | join->conditions.push_back(move(cond)); |
| 99 | root = move(join); |
| 100 | |
| 101 | // we replace the original subquery with a BoundColumnRefExpression refering to the mark column |
| 102 | unique_ptr<Expression> result = |
| 103 | make_unique<BoundColumnRefExpression>("IN (...)" , TypeId::BOOL, ColumnBinding(chunk_index, 0)); |
| 104 | if (!is_regular_in) { |
| 105 | // NOT IN: invert |
| 106 | auto invert = make_unique<BoundOperatorExpression>(ExpressionType::OPERATOR_NOT, TypeId::BOOL); |
| 107 | invert->children.push_back(move(result)); |
| 108 | result = move(invert); |
| 109 | } |
| 110 | return result; |
| 111 | } |
| 112 | |
| 113 | } // namespace duckdb |
| 114 | |