1 | #include "duckdb/optimizer/rule/regex_optimizations.hpp" |
---|---|
2 | |
3 | #include "duckdb/execution/expression_executor.hpp" |
4 | #include "duckdb/planner/expression/bound_function_expression.hpp" |
5 | #include "duckdb/planner/expression/bound_constant_expression.hpp" |
6 | |
7 | #include "re2/re2.h" |
8 | #include "re2/regexp.h" |
9 | |
10 | namespace duckdb { |
11 | |
12 | RegexOptimizationRule::RegexOptimizationRule(ExpressionRewriter &rewriter) : Rule(rewriter) { |
13 | auto func = make_uniq<FunctionExpressionMatcher>(); |
14 | func->function = make_uniq<SpecificFunctionMatcher>(args: "regexp_matches"); |
15 | func->policy = SetMatcher::Policy::ORDERED; |
16 | func->matchers.push_back(x: make_uniq<ExpressionMatcher>()); |
17 | func->matchers.push_back(x: make_uniq<ConstantExpressionMatcher>()); |
18 | root = std::move(func); |
19 | } |
20 | |
21 | unique_ptr<Expression> RegexOptimizationRule::Apply(LogicalOperator &op, vector<reference<Expression>> &bindings, |
22 | bool &changes_made, bool is_root) { |
23 | auto &root = bindings[0].get().Cast<BoundFunctionExpression>(); |
24 | auto &constant_expr = bindings[2].get().Cast<BoundConstantExpression>(); |
25 | D_ASSERT(root.children.size() == 2); |
26 | |
27 | if (constant_expr.value.IsNull()) { |
28 | return make_uniq<BoundConstantExpression>(args: Value(root.return_type)); |
29 | } |
30 | |
31 | // the constant_expr is a scalar expression that we have to fold |
32 | if (!constant_expr.IsFoldable()) { |
33 | return nullptr; |
34 | } |
35 | |
36 | auto constant_value = ExpressionExecutor::EvaluateScalar(context&: GetContext(), expr: constant_expr); |
37 | D_ASSERT(constant_value.type() == constant_expr.return_type); |
38 | auto patt_str = StringValue::Get(value: constant_value); |
39 | |
40 | duckdb_re2::RE2 pattern(patt_str); |
41 | if (!pattern.ok()) { |
42 | return nullptr; // this should fail somewhere else |
43 | } |
44 | |
45 | if (pattern.Regexp()->op() == duckdb_re2::kRegexpLiteralString || |
46 | pattern.Regexp()->op() == duckdb_re2::kRegexpLiteral) { |
47 | |
48 | string min; |
49 | string max; |
50 | pattern.PossibleMatchRange(min: &min, max: &max, maxlen: patt_str.size() + 1); |
51 | if (min != max) { |
52 | return nullptr; |
53 | } |
54 | auto parameter = make_uniq<BoundConstantExpression>(args: Value(std::move(min))); |
55 | auto contains = make_uniq<BoundFunctionExpression>(args&: root.return_type, args: ContainsFun::GetFunction(), |
56 | args: std::move(root.children), args: nullptr); |
57 | contains->children[1] = std::move(parameter); |
58 | |
59 | return std::move(contains); |
60 | } |
61 | return nullptr; |
62 | } |
63 | |
64 | } // namespace duckdb |
65 |