| 1 | #include "duckdb/optimizer/rule/regex_optimizations.hpp" |
|---|---|
| 2 | |
| 3 | #include "duckdb/execution/expression_executor.hpp" |
| 4 | #include "duckdb/planner/expression/bound_function_expression.hpp" |
| 5 | #include "duckdb/planner/expression/bound_constant_expression.hpp" |
| 6 | |
| 7 | #include "re2/re2.h" |
| 8 | #include "re2/regexp.h" |
| 9 | |
| 10 | namespace duckdb { |
| 11 | |
| 12 | RegexOptimizationRule::RegexOptimizationRule(ExpressionRewriter &rewriter) : Rule(rewriter) { |
| 13 | auto func = make_uniq<FunctionExpressionMatcher>(); |
| 14 | func->function = make_uniq<SpecificFunctionMatcher>(args: "regexp_matches"); |
| 15 | func->policy = SetMatcher::Policy::ORDERED; |
| 16 | func->matchers.push_back(x: make_uniq<ExpressionMatcher>()); |
| 17 | func->matchers.push_back(x: make_uniq<ConstantExpressionMatcher>()); |
| 18 | root = std::move(func); |
| 19 | } |
| 20 | |
| 21 | unique_ptr<Expression> RegexOptimizationRule::Apply(LogicalOperator &op, vector<reference<Expression>> &bindings, |
| 22 | bool &changes_made, bool is_root) { |
| 23 | auto &root = bindings[0].get().Cast<BoundFunctionExpression>(); |
| 24 | auto &constant_expr = bindings[2].get().Cast<BoundConstantExpression>(); |
| 25 | D_ASSERT(root.children.size() == 2); |
| 26 | |
| 27 | if (constant_expr.value.IsNull()) { |
| 28 | return make_uniq<BoundConstantExpression>(args: Value(root.return_type)); |
| 29 | } |
| 30 | |
| 31 | // the constant_expr is a scalar expression that we have to fold |
| 32 | if (!constant_expr.IsFoldable()) { |
| 33 | return nullptr; |
| 34 | } |
| 35 | |
| 36 | auto constant_value = ExpressionExecutor::EvaluateScalar(context&: GetContext(), expr: constant_expr); |
| 37 | D_ASSERT(constant_value.type() == constant_expr.return_type); |
| 38 | auto patt_str = StringValue::Get(value: constant_value); |
| 39 | |
| 40 | duckdb_re2::RE2 pattern(patt_str); |
| 41 | if (!pattern.ok()) { |
| 42 | return nullptr; // this should fail somewhere else |
| 43 | } |
| 44 | |
| 45 | if (pattern.Regexp()->op() == duckdb_re2::kRegexpLiteralString || |
| 46 | pattern.Regexp()->op() == duckdb_re2::kRegexpLiteral) { |
| 47 | |
| 48 | string min; |
| 49 | string max; |
| 50 | pattern.PossibleMatchRange(min: &min, max: &max, maxlen: patt_str.size() + 1); |
| 51 | if (min != max) { |
| 52 | return nullptr; |
| 53 | } |
| 54 | auto parameter = make_uniq<BoundConstantExpression>(args: Value(std::move(min))); |
| 55 | auto contains = make_uniq<BoundFunctionExpression>(args&: root.return_type, args: ContainsFun::GetFunction(), |
| 56 | args: std::move(root.children), args: nullptr); |
| 57 | contains->children[1] = std::move(parameter); |
| 58 | |
| 59 | return std::move(contains); |
| 60 | } |
| 61 | return nullptr; |
| 62 | } |
| 63 | |
| 64 | } // namespace duckdb |
| 65 |