1#include "duckdb/optimizer/rule/like_optimizations.hpp"
2
3#include "duckdb/execution/expression_executor.hpp"
4#include "duckdb/planner/expression/bound_function_expression.hpp"
5#include "duckdb/planner/expression/bound_constant_expression.hpp"
6#include "duckdb/planner/expression/bound_operator_expression.hpp"
7#include "duckdb/planner/expression/bound_comparison_expression.hpp"
8
9namespace duckdb {
10
11LikeOptimizationRule::LikeOptimizationRule(ExpressionRewriter &rewriter) : Rule(rewriter) {
12 // match on a FunctionExpression that has a foldable ConstantExpression
13 auto func = make_uniq<FunctionExpressionMatcher>();
14 func->matchers.push_back(x: make_uniq<ExpressionMatcher>());
15 func->matchers.push_back(x: make_uniq<ConstantExpressionMatcher>());
16 func->policy = SetMatcher::Policy::ORDERED;
17 // we match on LIKE ("~~") and NOT LIKE ("!~~")
18 func->function = make_uniq<ManyFunctionMatcher>(args: unordered_set<string> {"!~~", "~~"});
19 root = std::move(func);
20}
21
22static bool PatternIsConstant(const string &pattern) {
23 for (idx_t i = 0; i < pattern.size(); i++) {
24 if (pattern[i] == '%' || pattern[i] == '_') {
25 return false;
26 }
27 }
28 return true;
29}
30
31static bool PatternIsPrefix(const string &pattern) {
32 idx_t i;
33 for (i = pattern.size(); i > 0; i--) {
34 if (pattern[i - 1] != '%') {
35 break;
36 }
37 }
38 if (i == pattern.size()) {
39 // no trailing %
40 // cannot be a prefix
41 return false;
42 }
43 // continue to look in the string
44 // if there is a % or _ in the string (besides at the very end) this is not a prefix match
45 for (; i > 0; i--) {
46 if (pattern[i - 1] == '%' || pattern[i - 1] == '_') {
47 return false;
48 }
49 }
50 return true;
51}
52
53static bool PatternIsSuffix(const string &pattern) {
54 idx_t i;
55 for (i = 0; i < pattern.size(); i++) {
56 if (pattern[i] != '%') {
57 break;
58 }
59 }
60 if (i == 0) {
61 // no leading %
62 // cannot be a suffix
63 return false;
64 }
65 // continue to look in the string
66 // if there is a % or _ in the string (besides at the beginning) this is not a suffix match
67 for (; i < pattern.size(); i++) {
68 if (pattern[i] == '%' || pattern[i] == '_') {
69 return false;
70 }
71 }
72 return true;
73}
74
75static bool PatternIsContains(const string &pattern) {
76 idx_t start;
77 idx_t end;
78 for (start = 0; start < pattern.size(); start++) {
79 if (pattern[start] != '%') {
80 break;
81 }
82 }
83 for (end = pattern.size(); end > 0; end--) {
84 if (pattern[end - 1] != '%') {
85 break;
86 }
87 }
88 if (start == 0 || end == pattern.size()) {
89 // contains requires both a leading AND a trailing %
90 return false;
91 }
92 // check if there are any other special characters in the string
93 // if there is a % or _ in the string (besides at the beginning/end) this is not a contains match
94 for (idx_t i = start; i < end; i++) {
95 if (pattern[i] == '%' || pattern[i] == '_') {
96 return false;
97 }
98 }
99 return true;
100}
101
102unique_ptr<Expression> LikeOptimizationRule::Apply(LogicalOperator &op, vector<reference<Expression>> &bindings,
103 bool &changes_made, bool is_root) {
104 auto &root = bindings[0].get().Cast<BoundFunctionExpression>();
105 auto &constant_expr = bindings[2].get().Cast<BoundConstantExpression>();
106 D_ASSERT(root.children.size() == 2);
107
108 if (constant_expr.value.IsNull()) {
109 return make_uniq<BoundConstantExpression>(args: Value(root.return_type));
110 }
111
112 // the constant_expr is a scalar expression that we have to fold
113 if (!constant_expr.IsFoldable()) {
114 return nullptr;
115 }
116
117 auto constant_value = ExpressionExecutor::EvaluateScalar(context&: GetContext(), expr: constant_expr);
118 D_ASSERT(constant_value.type() == constant_expr.return_type);
119 auto &patt_str = StringValue::Get(value: constant_value);
120
121 bool is_not_like = root.function.name == "!~~";
122 if (PatternIsConstant(pattern: patt_str)) {
123 // Pattern is constant
124 return make_uniq<BoundComparisonExpression>(args: is_not_like ? ExpressionType::COMPARE_NOTEQUAL
125 : ExpressionType::COMPARE_EQUAL,
126 args: std::move(root.children[0]), args: std::move(root.children[1]));
127 } else if (PatternIsPrefix(pattern: patt_str)) {
128 // Prefix LIKE pattern : [^%_]*[%]+, ignoring underscore
129 return ApplyRule(expr&: root, function: PrefixFun::GetFunction(), pattern: patt_str, is_not_like);
130 } else if (PatternIsSuffix(pattern: patt_str)) {
131 // Suffix LIKE pattern: [%]+[^%_]*, ignoring underscore
132 return ApplyRule(expr&: root, function: SuffixFun::GetFunction(), pattern: patt_str, is_not_like);
133 } else if (PatternIsContains(pattern: patt_str)) {
134 // Contains LIKE pattern: [%]+[^%_]*[%]+, ignoring underscore
135 return ApplyRule(expr&: root, function: ContainsFun::GetFunction(), pattern: patt_str, is_not_like);
136 }
137 return nullptr;
138}
139
140unique_ptr<Expression> LikeOptimizationRule::ApplyRule(BoundFunctionExpression &expr, ScalarFunction function,
141 string pattern, bool is_not_like) {
142 // replace LIKE by an optimized function
143 unique_ptr<Expression> result;
144 auto new_function =
145 make_uniq<BoundFunctionExpression>(args&: expr.return_type, args: std::move(function), args: std::move(expr.children), args: nullptr);
146
147 // removing "%" from the pattern
148 pattern.erase(first: std::remove(first: pattern.begin(), last: pattern.end(), value: '%'), last: pattern.end());
149
150 new_function->children[1] = make_uniq<BoundConstantExpression>(args: Value(std::move(pattern)));
151
152 result = std::move(new_function);
153 if (is_not_like) {
154 auto negation = make_uniq<BoundOperatorExpression>(args: ExpressionType::OPERATOR_NOT, args: LogicalType::BOOLEAN);
155 negation->children.push_back(x: std::move(result));
156 result = std::move(negation);
157 }
158
159 return result;
160}
161
162} // namespace duckdb
163