1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/function/scalar/regexp.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/function/function_set.hpp"
12#include "re2/re2.h"
13#include "duckdb/function/built_in_functions.hpp"
14#include "re2/stringpiece.h"
15
16namespace duckdb {
17
18namespace regexp_util {
19
20bool TryParseConstantPattern(ClientContext &context, Expression &expr, string &constant_string);
21void ParseRegexOptions(const string &options, duckdb_re2::RE2::Options &result, bool *global_replace = nullptr);
22void ParseRegexOptions(ClientContext &context, Expression &expr, RE2::Options &target, bool *global_replace = nullptr);
23
24inline duckdb_re2::StringPiece CreateStringPiece(const string_t &input) {
25 return duckdb_re2::StringPiece(input.GetData(), input.GetSize());
26}
27
28inline string_t Extract(const string_t &input, Vector &result, const RE2 &re, const duckdb_re2::StringPiece &rewrite) {
29 string extracted;
30 RE2::Extract(text: input.GetString(), re, rewrite, out: &extracted);
31 return StringVector::AddString(vector&: result, data: extracted.c_str(), len: extracted.size());
32}
33
34} // namespace regexp_util
35
36struct RegexpExtractAll {
37 static void Execute(DataChunk &args, ExpressionState &state, Vector &result);
38 static unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
39 vector<unique_ptr<Expression>> &arguments);
40 static unique_ptr<FunctionLocalState> InitLocalState(ExpressionState &state, const BoundFunctionExpression &expr,
41 FunctionData *bind_data);
42};
43
44struct RegexpBaseBindData : public FunctionData {
45 RegexpBaseBindData();
46 RegexpBaseBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern = true);
47 virtual ~RegexpBaseBindData();
48
49 duckdb_re2::RE2::Options options;
50 string constant_string;
51 bool constant_pattern;
52
53 virtual bool Equals(const FunctionData &other_p) const override;
54};
55
56struct RegexpMatchesBindData : public RegexpBaseBindData {
57 RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern);
58 RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern,
59 string range_min, string range_max, bool range_success);
60
61 string range_min;
62 string range_max;
63 bool range_success;
64
65 unique_ptr<FunctionData> Copy() const override;
66};
67
68struct RegexpReplaceBindData : public RegexpBaseBindData {
69 RegexpReplaceBindData();
70 RegexpReplaceBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern,
71 bool global_replace);
72
73 bool global_replace;
74
75 unique_ptr<FunctionData> Copy() const override;
76 bool Equals(const FunctionData &other_p) const override;
77};
78
79struct RegexpExtractBindData : public RegexpBaseBindData {
80 RegexpExtractBindData();
81 RegexpExtractBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern,
82 string group_string);
83
84 string group_string;
85 duckdb_re2::StringPiece rewrite;
86
87 unique_ptr<FunctionData> Copy() const override;
88 bool Equals(const FunctionData &other_p) const override;
89};
90
91struct RegexStringPieceArgs {
92 RegexStringPieceArgs() : size(0), capacity(0), group_buffer(nullptr) {
93 }
94 void Init(idx_t size) {
95 this->size = size;
96 // Allocate for one extra, for the all-encompassing match group
97 this->capacity = size + 1;
98 group_buffer = AllocateArray<duckdb_re2::StringPiece>(size: capacity);
99 }
100 void SetSize(idx_t size) {
101 this->size = size;
102 if (size + 1 > capacity) {
103 Clear();
104 Init(size);
105 }
106 }
107
108 RegexStringPieceArgs &operator=(RegexStringPieceArgs &&other) {
109 std::swap(a&: this->size, b&: other.size);
110 std::swap(a&: this->capacity, b&: other.capacity);
111 std::swap(a&: this->group_buffer, b&: other.group_buffer);
112 return *this;
113 }
114
115 ~RegexStringPieceArgs() {
116 Clear();
117 }
118
119private:
120 void Clear() {
121 DeleteArray<duckdb_re2::StringPiece>(ptr: group_buffer, size: capacity);
122 group_buffer = nullptr;
123
124 size = 0;
125 capacity = 0;
126 }
127
128public:
129 idx_t size;
130 //! The currently allocated capacity for the groups
131 idx_t capacity;
132 //! Used by ExtractAll to pre-allocate the storage for the groups
133 duckdb_re2::StringPiece *group_buffer;
134};
135
136struct RegexLocalState : public FunctionLocalState {
137 explicit RegexLocalState(RegexpBaseBindData &info, bool extract_all = false)
138 : constant_pattern(duckdb_re2::StringPiece(info.constant_string.c_str(), info.constant_string.size()),
139 info.options) {
140 if (extract_all) {
141 auto group_count_p = constant_pattern.NumberOfCapturingGroups();
142 if (group_count_p != -1) {
143 group_buffer.Init(size: group_count_p);
144 }
145 }
146 D_ASSERT(info.constant_pattern);
147 }
148
149 RE2 constant_pattern;
150 //! Used by regexp_extract_all to pre-allocate the args
151 RegexStringPieceArgs group_buffer;
152};
153
154unique_ptr<FunctionLocalState> RegexInitLocalState(ExpressionState &state, const BoundFunctionExpression &expr,
155 FunctionData *bind_data);
156unique_ptr<FunctionData> RegexpMatchesBind(ClientContext &context, ScalarFunction &bound_function,
157 vector<unique_ptr<Expression>> &arguments);
158
159} // namespace duckdb
160