1 | #include "duckdb/optimizer/rule/like_optimizations.hpp" |
2 | |
3 | #include "duckdb/execution/expression_executor.hpp" |
4 | #include "duckdb/planner/expression/bound_function_expression.hpp" |
5 | #include "duckdb/planner/expression/bound_constant_expression.hpp" |
6 | #include "duckdb/planner/expression/bound_operator_expression.hpp" |
7 | #include "duckdb/planner/expression/bound_comparison_expression.hpp" |
8 | |
9 | namespace duckdb { |
10 | |
11 | LikeOptimizationRule::LikeOptimizationRule(ExpressionRewriter &rewriter) : Rule(rewriter) { |
12 | // match on a FunctionExpression that has a foldable ConstantExpression |
13 | auto func = make_uniq<FunctionExpressionMatcher>(); |
14 | func->matchers.push_back(x: make_uniq<ExpressionMatcher>()); |
15 | func->matchers.push_back(x: make_uniq<ConstantExpressionMatcher>()); |
16 | func->policy = SetMatcher::Policy::ORDERED; |
17 | // we match on LIKE ("~~") and NOT LIKE ("!~~") |
18 | func->function = make_uniq<ManyFunctionMatcher>(args: unordered_set<string> {"!~~" , "~~" }); |
19 | root = std::move(func); |
20 | } |
21 | |
22 | static bool PatternIsConstant(const string &pattern) { |
23 | for (idx_t i = 0; i < pattern.size(); i++) { |
24 | if (pattern[i] == '%' || pattern[i] == '_') { |
25 | return false; |
26 | } |
27 | } |
28 | return true; |
29 | } |
30 | |
31 | static bool PatternIsPrefix(const string &pattern) { |
32 | idx_t i; |
33 | for (i = pattern.size(); i > 0; i--) { |
34 | if (pattern[i - 1] != '%') { |
35 | break; |
36 | } |
37 | } |
38 | if (i == pattern.size()) { |
39 | // no trailing % |
40 | // cannot be a prefix |
41 | return false; |
42 | } |
43 | // continue to look in the string |
44 | // if there is a % or _ in the string (besides at the very end) this is not a prefix match |
45 | for (; i > 0; i--) { |
46 | if (pattern[i - 1] == '%' || pattern[i - 1] == '_') { |
47 | return false; |
48 | } |
49 | } |
50 | return true; |
51 | } |
52 | |
53 | static bool PatternIsSuffix(const string &pattern) { |
54 | idx_t i; |
55 | for (i = 0; i < pattern.size(); i++) { |
56 | if (pattern[i] != '%') { |
57 | break; |
58 | } |
59 | } |
60 | if (i == 0) { |
61 | // no leading % |
62 | // cannot be a suffix |
63 | return false; |
64 | } |
65 | // continue to look in the string |
66 | // if there is a % or _ in the string (besides at the beginning) this is not a suffix match |
67 | for (; i < pattern.size(); i++) { |
68 | if (pattern[i] == '%' || pattern[i] == '_') { |
69 | return false; |
70 | } |
71 | } |
72 | return true; |
73 | } |
74 | |
75 | static bool PatternIsContains(const string &pattern) { |
76 | idx_t start; |
77 | idx_t end; |
78 | for (start = 0; start < pattern.size(); start++) { |
79 | if (pattern[start] != '%') { |
80 | break; |
81 | } |
82 | } |
83 | for (end = pattern.size(); end > 0; end--) { |
84 | if (pattern[end - 1] != '%') { |
85 | break; |
86 | } |
87 | } |
88 | if (start == 0 || end == pattern.size()) { |
89 | // contains requires both a leading AND a trailing % |
90 | return false; |
91 | } |
92 | // check if there are any other special characters in the string |
93 | // if there is a % or _ in the string (besides at the beginning/end) this is not a contains match |
94 | for (idx_t i = start; i < end; i++) { |
95 | if (pattern[i] == '%' || pattern[i] == '_') { |
96 | return false; |
97 | } |
98 | } |
99 | return true; |
100 | } |
101 | |
102 | unique_ptr<Expression> LikeOptimizationRule::Apply(LogicalOperator &op, vector<reference<Expression>> &bindings, |
103 | bool &changes_made, bool is_root) { |
104 | auto &root = bindings[0].get().Cast<BoundFunctionExpression>(); |
105 | auto &constant_expr = bindings[2].get().Cast<BoundConstantExpression>(); |
106 | D_ASSERT(root.children.size() == 2); |
107 | |
108 | if (constant_expr.value.IsNull()) { |
109 | return make_uniq<BoundConstantExpression>(args: Value(root.return_type)); |
110 | } |
111 | |
112 | // the constant_expr is a scalar expression that we have to fold |
113 | if (!constant_expr.IsFoldable()) { |
114 | return nullptr; |
115 | } |
116 | |
117 | auto constant_value = ExpressionExecutor::EvaluateScalar(context&: GetContext(), expr: constant_expr); |
118 | D_ASSERT(constant_value.type() == constant_expr.return_type); |
119 | auto &patt_str = StringValue::Get(value: constant_value); |
120 | |
121 | bool is_not_like = root.function.name == "!~~" ; |
122 | if (PatternIsConstant(pattern: patt_str)) { |
123 | // Pattern is constant |
124 | return make_uniq<BoundComparisonExpression>(args: is_not_like ? ExpressionType::COMPARE_NOTEQUAL |
125 | : ExpressionType::COMPARE_EQUAL, |
126 | args: std::move(root.children[0]), args: std::move(root.children[1])); |
127 | } else if (PatternIsPrefix(pattern: patt_str)) { |
128 | // Prefix LIKE pattern : [^%_]*[%]+, ignoring underscore |
129 | return ApplyRule(expr&: root, function: PrefixFun::GetFunction(), pattern: patt_str, is_not_like); |
130 | } else if (PatternIsSuffix(pattern: patt_str)) { |
131 | // Suffix LIKE pattern: [%]+[^%_]*, ignoring underscore |
132 | return ApplyRule(expr&: root, function: SuffixFun::GetFunction(), pattern: patt_str, is_not_like); |
133 | } else if (PatternIsContains(pattern: patt_str)) { |
134 | // Contains LIKE pattern: [%]+[^%_]*[%]+, ignoring underscore |
135 | return ApplyRule(expr&: root, function: ContainsFun::GetFunction(), pattern: patt_str, is_not_like); |
136 | } |
137 | return nullptr; |
138 | } |
139 | |
140 | unique_ptr<Expression> LikeOptimizationRule::ApplyRule(BoundFunctionExpression &expr, ScalarFunction function, |
141 | string pattern, bool is_not_like) { |
142 | // replace LIKE by an optimized function |
143 | unique_ptr<Expression> result; |
144 | auto new_function = |
145 | make_uniq<BoundFunctionExpression>(args&: expr.return_type, args: std::move(function), args: std::move(expr.children), args: nullptr); |
146 | |
147 | // removing "%" from the pattern |
148 | pattern.erase(first: std::remove(first: pattern.begin(), last: pattern.end(), value: '%'), last: pattern.end()); |
149 | |
150 | new_function->children[1] = make_uniq<BoundConstantExpression>(args: Value(std::move(pattern))); |
151 | |
152 | result = std::move(new_function); |
153 | if (is_not_like) { |
154 | auto negation = make_uniq<BoundOperatorExpression>(args: ExpressionType::OPERATOR_NOT, args: LogicalType::BOOLEAN); |
155 | negation->children.push_back(x: std::move(result)); |
156 | result = std::move(negation); |
157 | } |
158 | |
159 | return result; |
160 | } |
161 | |
162 | } // namespace duckdb |
163 | |