1 | #include "duckdb/function/scalar/regexp.hpp" |
2 | |
3 | #include "duckdb/common/exception.hpp" |
4 | #include "duckdb/common/vector_operations/binary_executor.hpp" |
5 | #include "duckdb/common/vector_operations/ternary_executor.hpp" |
6 | #include "duckdb/common/vector_operations/unary_executor.hpp" |
7 | #include "duckdb/common/vector_operations/vector_operations.hpp" |
8 | #include "duckdb/execution/expression_executor.hpp" |
9 | #include "duckdb/function/scalar/string_functions.hpp" |
10 | #include "duckdb/planner/expression/bound_function_expression.hpp" |
11 | #include "utf8proc_wrapper.hpp" |
12 | |
13 | namespace duckdb { |
14 | |
15 | using regexp_util::CreateStringPiece; |
16 | using regexp_util::Extract; |
17 | using regexp_util::ParseRegexOptions; |
18 | using regexp_util::TryParseConstantPattern; |
19 | |
20 | static bool RegexOptionsEquals(const duckdb_re2::RE2::Options &opt_a, const duckdb_re2::RE2::Options &opt_b) { |
21 | return opt_a.case_sensitive() == opt_b.case_sensitive(); |
22 | } |
23 | |
24 | RegexpBaseBindData::RegexpBaseBindData() : constant_pattern(false) { |
25 | } |
26 | RegexpBaseBindData::RegexpBaseBindData(duckdb_re2::RE2::Options options, string constant_string_p, |
27 | bool constant_pattern) |
28 | : options(options), constant_string(std::move(constant_string_p)), constant_pattern(constant_pattern) { |
29 | } |
30 | |
31 | RegexpBaseBindData::~RegexpBaseBindData() { |
32 | } |
33 | |
34 | bool RegexpBaseBindData::Equals(const FunctionData &other_p) const { |
35 | auto &other = other_p.Cast<RegexpBaseBindData>(); |
36 | return constant_pattern == other.constant_pattern && constant_string == other.constant_string && |
37 | RegexOptionsEquals(opt_a: options, opt_b: other.options); |
38 | } |
39 | |
40 | unique_ptr<FunctionLocalState> RegexInitLocalState(ExpressionState &state, const BoundFunctionExpression &expr, |
41 | FunctionData *bind_data) { |
42 | auto &info = bind_data->Cast<RegexpBaseBindData>(); |
43 | if (info.constant_pattern) { |
44 | return make_uniq<RegexLocalState>(args&: info); |
45 | } |
46 | return nullptr; |
47 | } |
48 | |
49 | //===--------------------------------------------------------------------===// |
50 | // Regexp Matches |
51 | //===--------------------------------------------------------------------===// |
52 | RegexpMatchesBindData::RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string_p, |
53 | bool constant_pattern) |
54 | : RegexpBaseBindData(options, std::move(constant_string_p), constant_pattern) { |
55 | if (constant_pattern) { |
56 | auto pattern = make_uniq<RE2>(args&: constant_string, args&: options); |
57 | if (!pattern->ok()) { |
58 | throw Exception(pattern->error()); |
59 | } |
60 | |
61 | range_success = pattern->PossibleMatchRange(min: &range_min, max: &range_max, maxlen: 1000); |
62 | } else { |
63 | range_success = false; |
64 | } |
65 | } |
66 | |
67 | RegexpMatchesBindData::RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string_p, |
68 | bool constant_pattern, string range_min_p, string range_max_p, |
69 | bool range_success) |
70 | : RegexpBaseBindData(options, std::move(constant_string_p), constant_pattern), range_min(std::move(range_min_p)), |
71 | range_max(std::move(range_max_p)), range_success(range_success) { |
72 | } |
73 | |
74 | unique_ptr<FunctionData> RegexpMatchesBindData::Copy() const { |
75 | return make_uniq<RegexpMatchesBindData>(args: options, args: constant_string, args: constant_pattern, args: range_min, args: range_max, |
76 | args: range_success); |
77 | } |
78 | |
79 | unique_ptr<FunctionData> RegexpMatchesBind(ClientContext &context, ScalarFunction &bound_function, |
80 | vector<unique_ptr<Expression>> &arguments) { |
81 | // pattern is the second argument. If its constant, we can already prepare the pattern and store it for later. |
82 | D_ASSERT(arguments.size() == 2 || arguments.size() == 3); |
83 | RE2::Options options; |
84 | options.set_log_errors(false); |
85 | if (arguments.size() == 3) { |
86 | ParseRegexOptions(context, expr&: *arguments[2], target&: options); |
87 | } |
88 | |
89 | string constant_string; |
90 | bool constant_pattern; |
91 | constant_pattern = TryParseConstantPattern(context, expr&: *arguments[1], constant_string); |
92 | return make_uniq<RegexpMatchesBindData>(args&: options, args: std::move(constant_string), args&: constant_pattern); |
93 | } |
94 | |
95 | struct RegexPartialMatch { |
96 | static inline bool Operation(const duckdb_re2::StringPiece &input, duckdb_re2::RE2 &re) { |
97 | return duckdb_re2::RE2::PartialMatch(text: input, re); |
98 | } |
99 | }; |
100 | |
101 | struct RegexFullMatch { |
102 | static inline bool Operation(const duckdb_re2::StringPiece &input, duckdb_re2::RE2 &re) { |
103 | return duckdb_re2::RE2::FullMatch(text: input, re); |
104 | } |
105 | }; |
106 | |
107 | template <class OP> |
108 | static void RegexpMatchesFunction(DataChunk &args, ExpressionState &state, Vector &result) { |
109 | auto &strings = args.data[0]; |
110 | auto &patterns = args.data[1]; |
111 | |
112 | auto &func_expr = state.expr.Cast<BoundFunctionExpression>(); |
113 | auto &info = func_expr.bind_info->Cast<RegexpMatchesBindData>(); |
114 | |
115 | if (info.constant_pattern) { |
116 | auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<RegexLocalState>(); |
117 | UnaryExecutor::Execute<string_t, bool>(strings, result, args.size(), [&](string_t input) { |
118 | return OP::Operation(CreateStringPiece(input), lstate.constant_pattern); |
119 | }); |
120 | } else { |
121 | BinaryExecutor::Execute<string_t, string_t, bool>(strings, patterns, result, args.size(), |
122 | [&](string_t input, string_t pattern) { |
123 | RE2 re(CreateStringPiece(input: pattern), info.options); |
124 | if (!re.ok()) { |
125 | throw Exception(re.error()); |
126 | } |
127 | return OP::Operation(CreateStringPiece(input), re); |
128 | }); |
129 | } |
130 | } |
131 | |
132 | //===--------------------------------------------------------------------===// |
133 | // Regexp Replace |
134 | //===--------------------------------------------------------------------===// |
135 | RegexpReplaceBindData::RegexpReplaceBindData() : global_replace(false) { |
136 | } |
137 | |
138 | RegexpReplaceBindData::RegexpReplaceBindData(duckdb_re2::RE2::Options options, string constant_string_p, |
139 | bool constant_pattern, bool global_replace) |
140 | : RegexpBaseBindData(options, std::move(constant_string_p), constant_pattern), global_replace(global_replace) { |
141 | } |
142 | |
143 | unique_ptr<FunctionData> RegexpReplaceBindData::Copy() const { |
144 | auto copy = make_uniq<RegexpReplaceBindData>(args: options, args: constant_string, args: constant_pattern, args: global_replace); |
145 | return std::move(copy); |
146 | } |
147 | |
148 | bool RegexpReplaceBindData::Equals(const FunctionData &other_p) const { |
149 | auto &other = other_p.Cast<RegexpReplaceBindData>(); |
150 | return RegexpBaseBindData::Equals(other_p: other) && global_replace == other.global_replace; |
151 | } |
152 | |
153 | static unique_ptr<FunctionData> RegexReplaceBind(ClientContext &context, ScalarFunction &bound_function, |
154 | vector<unique_ptr<Expression>> &arguments) { |
155 | auto data = make_uniq<RegexpReplaceBindData>(); |
156 | |
157 | data->constant_pattern = TryParseConstantPattern(context, expr&: *arguments[1], constant_string&: data->constant_string); |
158 | if (arguments.size() == 4) { |
159 | ParseRegexOptions(context, expr&: *arguments[3], target&: data->options, global_replace: &data->global_replace); |
160 | } |
161 | data->options.set_log_errors(false); |
162 | return std::move(data); |
163 | } |
164 | |
165 | static void RegexReplaceFunction(DataChunk &args, ExpressionState &state, Vector &result) { |
166 | auto &func_expr = state.expr.Cast<BoundFunctionExpression>(); |
167 | auto &info = func_expr.bind_info->Cast<RegexpReplaceBindData>(); |
168 | |
169 | auto &strings = args.data[0]; |
170 | auto &patterns = args.data[1]; |
171 | auto &replaces = args.data[2]; |
172 | |
173 | if (info.constant_pattern) { |
174 | auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<RegexLocalState>(); |
175 | BinaryExecutor::Execute<string_t, string_t, string_t>( |
176 | left&: strings, right&: replaces, result, count: args.size(), fun: [&](string_t input, string_t replace) { |
177 | std::string sstring = input.GetString(); |
178 | if (info.global_replace) { |
179 | RE2::GlobalReplace(str: &sstring, re: lstate.constant_pattern, rewrite: CreateStringPiece(input: replace)); |
180 | } else { |
181 | RE2::Replace(str: &sstring, re: lstate.constant_pattern, rewrite: CreateStringPiece(input: replace)); |
182 | } |
183 | return StringVector::AddString(vector&: result, data: sstring); |
184 | }); |
185 | } else { |
186 | TernaryExecutor::Execute<string_t, string_t, string_t, string_t>( |
187 | a&: strings, b&: patterns, c&: replaces, result, count: args.size(), fun: [&](string_t input, string_t pattern, string_t replace) { |
188 | RE2 re(CreateStringPiece(input: pattern), info.options); |
189 | std::string sstring = input.GetString(); |
190 | if (info.global_replace) { |
191 | RE2::GlobalReplace(str: &sstring, re, rewrite: CreateStringPiece(input: replace)); |
192 | } else { |
193 | RE2::Replace(str: &sstring, re, rewrite: CreateStringPiece(input: replace)); |
194 | } |
195 | return StringVector::AddString(vector&: result, data: sstring); |
196 | }); |
197 | } |
198 | } |
199 | |
200 | //===--------------------------------------------------------------------===// |
201 | // Regexp Extract |
202 | //===--------------------------------------------------------------------===// |
203 | RegexpExtractBindData::() { |
204 | } |
205 | |
206 | RegexpExtractBindData::(duckdb_re2::RE2::Options options, string constant_string_p, |
207 | bool constant_pattern, string group_string_p) |
208 | : RegexpBaseBindData(options, std::move(constant_string_p), constant_pattern), |
209 | group_string(std::move(group_string_p)), rewrite(group_string) { |
210 | } |
211 | |
212 | unique_ptr<FunctionData> RegexpExtractBindData::() const { |
213 | return make_uniq<RegexpExtractBindData>(args: options, args: constant_string, args: constant_pattern, args: group_string); |
214 | } |
215 | |
216 | bool RegexpExtractBindData::(const FunctionData &other_p) const { |
217 | auto &other = other_p.Cast<RegexpExtractBindData>(); |
218 | return RegexpBaseBindData::Equals(other_p: other) && group_string == other.group_string; |
219 | } |
220 | |
221 | static void (DataChunk &args, ExpressionState &state, Vector &result) { |
222 | auto &func_expr = state.expr.Cast<BoundFunctionExpression>(); |
223 | const auto &info = func_expr.bind_info->Cast<RegexpExtractBindData>(); |
224 | |
225 | auto &strings = args.data[0]; |
226 | auto &patterns = args.data[1]; |
227 | if (info.constant_pattern) { |
228 | auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<RegexLocalState>(); |
229 | UnaryExecutor::Execute<string_t, string_t>(input&: strings, result, count: args.size(), fun: [&](string_t input) { |
230 | return Extract(input, result, re: lstate.constant_pattern, rewrite: info.rewrite); |
231 | }); |
232 | } else { |
233 | BinaryExecutor::Execute<string_t, string_t, string_t>(left&: strings, right&: patterns, result, count: args.size(), |
234 | fun: [&](string_t input, string_t pattern) { |
235 | RE2 re(CreateStringPiece(input: pattern), info.options); |
236 | return Extract(input, result, re, rewrite: info.rewrite); |
237 | }); |
238 | } |
239 | } |
240 | |
241 | //===--------------------------------------------------------------------===// |
242 | // Regexp Extract Struct |
243 | //===--------------------------------------------------------------------===// |
244 | static void (DataChunk &args, ExpressionState &state, Vector &result) { |
245 | auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<RegexLocalState>(); |
246 | |
247 | const auto count = args.size(); |
248 | auto &input = args.data[0]; |
249 | |
250 | auto &child_entries = StructVector::GetEntries(vector&: result); |
251 | const auto groupSize = child_entries.size(); |
252 | // Reference the 'input' StringBuffer, because we won't need to allocate new data |
253 | // for the result, all returned strings are substrings of the originals |
254 | for (auto &child_entry : child_entries) { |
255 | child_entry->SetAuxiliary(input.GetAuxiliary()); |
256 | } |
257 | |
258 | vector<RE2::Arg> argv(groupSize); |
259 | vector<RE2::Arg *> groups(groupSize); |
260 | vector<duckdb_re2::StringPiece> ws(groupSize); |
261 | for (size_t i = 0; i < groupSize; ++i) { |
262 | groups[i] = &argv[i]; |
263 | argv[i] = &ws[i]; |
264 | } |
265 | |
266 | if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { |
267 | result.SetVectorType(VectorType::CONSTANT_VECTOR); |
268 | |
269 | if (ConstantVector::IsNull(vector: input)) { |
270 | ConstantVector::SetNull(vector&: result, is_null: true); |
271 | } else { |
272 | ConstantVector::SetNull(vector&: result, is_null: false); |
273 | auto idata = ConstantVector::GetData<string_t>(vector&: input); |
274 | auto str = CreateStringPiece(input: idata[0]); |
275 | auto match = duckdb_re2::RE2::PartialMatchN(text: str, re: lstate.constant_pattern, args: groups.data(), n: groups.size()); |
276 | for (size_t col = 0; col < child_entries.size(); ++col) { |
277 | auto &child_entry = child_entries[col]; |
278 | ConstantVector::SetNull(vector&: *child_entry, is_null: false); |
279 | auto & = ws[col]; |
280 | auto cdata = ConstantVector::GetData<string_t>(vector&: *child_entry); |
281 | cdata[0] = string_t(extracted.data(), match ? extracted.size() : 0); |
282 | } |
283 | } |
284 | } else { |
285 | UnifiedVectorFormat iunified; |
286 | input.ToUnifiedFormat(count, data&: iunified); |
287 | |
288 | const auto &ivalidity = iunified.validity; |
289 | auto idata = UnifiedVectorFormat::GetData<string_t>(format: iunified); |
290 | |
291 | // Start with a valid flat vector |
292 | result.SetVectorType(VectorType::FLAT_VECTOR); |
293 | |
294 | // Start with valid children |
295 | for (size_t col = 0; col < child_entries.size(); ++col) { |
296 | auto &child_entry = child_entries[col]; |
297 | child_entry->SetVectorType(VectorType::FLAT_VECTOR); |
298 | } |
299 | |
300 | for (idx_t i = 0; i < count; ++i) { |
301 | const auto idx = iunified.sel->get_index(idx: i); |
302 | if (ivalidity.RowIsValid(row_idx: idx)) { |
303 | auto str = CreateStringPiece(input: idata[idx]); |
304 | auto match = duckdb_re2::RE2::PartialMatchN(text: str, re: lstate.constant_pattern, args: groups.data(), n: groups.size()); |
305 | for (size_t col = 0; col < child_entries.size(); ++col) { |
306 | auto &child_entry = child_entries[col]; |
307 | auto cdata = FlatVector::GetData<string_t>(vector&: *child_entry); |
308 | auto & = ws[col]; |
309 | cdata[i] = string_t(extracted.data(), match ? extracted.size() : 0); |
310 | } |
311 | } else { |
312 | FlatVector::SetNull(vector&: result, idx: i, is_null: true); |
313 | } |
314 | } |
315 | } |
316 | } |
317 | |
318 | static unique_ptr<FunctionData> (ClientContext &context, ScalarFunction &bound_function, |
319 | vector<unique_ptr<Expression>> &arguments) { |
320 | D_ASSERT(arguments.size() >= 2); |
321 | |
322 | duckdb_re2::RE2::Options options; |
323 | |
324 | string constant_string; |
325 | bool constant_pattern = TryParseConstantPattern(context, expr&: *arguments[1], constant_string); |
326 | |
327 | if (arguments.size() >= 4) { |
328 | ParseRegexOptions(context, expr&: *arguments[3], target&: options); |
329 | } |
330 | |
331 | string group_string = "\\0" ; |
332 | if (arguments.size() >= 3) { |
333 | if (arguments[2]->HasParameter()) { |
334 | throw ParameterNotResolvedException(); |
335 | } |
336 | if (!arguments[2]->IsFoldable()) { |
337 | throw InvalidInputException("Group specification field must be a constant!" ); |
338 | } |
339 | Value group = ExpressionExecutor::EvaluateScalar(context, expr: *arguments[2]); |
340 | if (group.IsNull()) { |
341 | group_string = "" ; |
342 | } else if (group.type().id() == LogicalTypeId::LIST) { |
343 | if (!constant_pattern) { |
344 | throw BinderException("%s with LIST requires a constant pattern" , bound_function.name); |
345 | } |
346 | auto &list_children = ListValue::GetChildren(value: group); |
347 | if (list_children.empty()) { |
348 | throw BinderException("%s requires non-empty lists of capture names" , bound_function.name); |
349 | } |
350 | case_insensitive_set_t name_collision_set; |
351 | child_list_t<LogicalType> struct_children; |
352 | for (const auto &child : list_children) { |
353 | if (child.IsNull()) { |
354 | throw BinderException("NULL group name in %s" , bound_function.name); |
355 | } |
356 | const auto group_name = child.ToString(); |
357 | if (name_collision_set.find(x: group_name) != name_collision_set.end()) { |
358 | throw BinderException("Duplicate group name \"%s\" in %s" , group_name, bound_function.name); |
359 | } |
360 | name_collision_set.insert(x: group_name); |
361 | struct_children.emplace_back(args: make_pair(x: group_name, y: LogicalType::VARCHAR)); |
362 | } |
363 | bound_function.return_type = LogicalType::STRUCT(children: struct_children); |
364 | |
365 | duckdb_re2::StringPiece constant_piece(constant_string.c_str(), constant_string.size()); |
366 | RE2 constant_pattern(constant_piece, options); |
367 | if (size_t(constant_pattern.NumberOfCapturingGroups()) < list_children.size()) { |
368 | throw BinderException("Not enough group names in %s" , bound_function.name); |
369 | } |
370 | } else { |
371 | auto group_idx = group.GetValue<int32_t>(); |
372 | if (group_idx < 0 || group_idx > 9) { |
373 | throw InvalidInputException("Group index must be between 0 and 9!" ); |
374 | } |
375 | group_string = "\\" + to_string(val: group_idx); |
376 | } |
377 | } |
378 | |
379 | return make_uniq<RegexpExtractBindData>(args&: options, args: std::move(constant_string), args&: constant_pattern, |
380 | args: std::move(group_string)); |
381 | } |
382 | |
383 | void RegexpFun::RegisterFunction(BuiltinFunctions &set) { |
384 | ScalarFunctionSet regexp_full_match("regexp_full_match" ); |
385 | regexp_full_match.AddFunction(function: ScalarFunction( |
386 | {LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::BOOLEAN, RegexpMatchesFunction<RegexFullMatch>, |
387 | RegexpMatchesBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID, |
388 | FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
389 | regexp_full_match.AddFunction(function: ScalarFunction( |
390 | {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::BOOLEAN, |
391 | RegexpMatchesFunction<RegexFullMatch>, RegexpMatchesBind, nullptr, nullptr, RegexInitLocalState, |
392 | LogicalType::INVALID, FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
393 | |
394 | ScalarFunctionSet regexp_partial_match("regexp_matches" ); |
395 | regexp_partial_match.AddFunction(function: ScalarFunction( |
396 | {LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::BOOLEAN, RegexpMatchesFunction<RegexPartialMatch>, |
397 | RegexpMatchesBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID, |
398 | FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
399 | regexp_partial_match.AddFunction(function: ScalarFunction( |
400 | {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::BOOLEAN, |
401 | RegexpMatchesFunction<RegexPartialMatch>, RegexpMatchesBind, nullptr, nullptr, RegexInitLocalState, |
402 | LogicalType::INVALID, FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
403 | |
404 | ScalarFunctionSet regexp_replace("regexp_replace" ); |
405 | regexp_replace.AddFunction(function: ScalarFunction({LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}, |
406 | LogicalType::VARCHAR, RegexReplaceFunction, RegexReplaceBind, nullptr, |
407 | nullptr, RegexInitLocalState)); |
408 | regexp_replace.AddFunction(function: ScalarFunction( |
409 | {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::VARCHAR, |
410 | RegexReplaceFunction, RegexReplaceBind, nullptr, nullptr, RegexInitLocalState)); |
411 | |
412 | ScalarFunctionSet ("regexp_extract" ); |
413 | regexp_extract.AddFunction( |
414 | function: ScalarFunction({LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::VARCHAR, RegexExtractFunction, |
415 | RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID, |
416 | FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
417 | regexp_extract.AddFunction(function: ScalarFunction( |
418 | {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER}, LogicalType::VARCHAR, RegexExtractFunction, |
419 | RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID, |
420 | FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
421 | regexp_extract.AddFunction(function: ScalarFunction( |
422 | {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER, LogicalType::VARCHAR}, LogicalType::VARCHAR, |
423 | RegexExtractFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID, |
424 | FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
425 | // REGEXP_EXTRACT(<string>, <pattern>, [<group 1 name>[, <group n name>]...]) |
426 | regexp_extract.AddFunction(function: ScalarFunction( |
427 | {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::LIST(child: LogicalType::VARCHAR)}, LogicalType::VARCHAR, |
428 | RegexExtractStructFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID, |
429 | FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
430 | // REGEXP_EXTRACT(<string>, <pattern>, [<group 1 name>[, <group n name>]...], <options>) |
431 | regexp_extract.AddFunction(function: ScalarFunction( |
432 | {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::LIST(child: LogicalType::VARCHAR), LogicalType::VARCHAR}, |
433 | LogicalType::VARCHAR, RegexExtractStructFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState, |
434 | LogicalType::INVALID, FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
435 | |
436 | ScalarFunctionSet ("regexp_extract_all" ); |
437 | regexp_extract_all.AddFunction(function: ScalarFunction( |
438 | {LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::LIST(child: LogicalType::VARCHAR), |
439 | RegexpExtractAll::Execute, RegexpExtractAll::Bind, nullptr, nullptr, RegexpExtractAll::InitLocalState, |
440 | LogicalType::INVALID, FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
441 | regexp_extract_all.AddFunction(function: ScalarFunction( |
442 | {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER}, LogicalType::LIST(child: LogicalType::VARCHAR), |
443 | RegexpExtractAll::Execute, RegexpExtractAll::Bind, nullptr, nullptr, RegexpExtractAll::InitLocalState, |
444 | LogicalType::INVALID, FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
445 | regexp_extract_all.AddFunction( |
446 | function: ScalarFunction({LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER, LogicalType::VARCHAR}, |
447 | LogicalType::LIST(child: LogicalType::VARCHAR), RegexpExtractAll::Execute, RegexpExtractAll::Bind, |
448 | nullptr, nullptr, RegexpExtractAll::InitLocalState, LogicalType::INVALID, |
449 | FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING)); |
450 | |
451 | set.AddFunction(set: regexp_full_match); |
452 | set.AddFunction(set: regexp_partial_match); |
453 | set.AddFunction(set: regexp_replace); |
454 | set.AddFunction(set: regexp_extract); |
455 | set.AddFunction(set: regexp_extract_all); |
456 | } |
457 | |
458 | } // namespace duckdb |
459 | |