1#include "duckdb/function/scalar/string_functions.hpp"
2#include "duckdb/common/exception.hpp"
3#include "duckdb/common/vector_operations/vector_operations.hpp"
4#include "duckdb/execution/expression_executor.hpp"
5#include "duckdb/planner/expression/bound_function_expression.hpp"
6#include "duckdb/common/vector_operations/unary_executor.hpp"
7#include "duckdb/common/vector_operations/binary_executor.hpp"
8#include "duckdb/common/vector_operations/ternary_executor.hpp"
9#include "utf8proc_wrapper.hpp"
10
11#include "re2/re2.h"
12
13using namespace std;
14
15namespace duckdb {
16
17RegexpMatchesBindData::RegexpMatchesBindData(unique_ptr<RE2> constant_pattern, string range_min, string range_max,
18 bool range_success)
19 : constant_pattern(std::move(constant_pattern)), range_min(range_min), range_max(range_max),
20 range_success(range_success) {
21}
22
23RegexpMatchesBindData::~RegexpMatchesBindData() {
24}
25
26unique_ptr<FunctionData> RegexpMatchesBindData::Copy() {
27 return make_unique<RegexpMatchesBindData>(move(constant_pattern), range_min, range_max, range_success);
28}
29
30static inline re2::StringPiece CreateStringPiece(string_t &input) {
31 return re2::StringPiece(input.GetData(), input.GetSize());
32}
33
34struct RegexPartialMatch {
35 static inline bool Operation(const re2::StringPiece &input, RE2 &re) {
36 return RE2::PartialMatch(input, re);
37 }
38};
39
40struct RegexFullMatch {
41 static inline bool Operation(const re2::StringPiece &input, RE2 &re) {
42 return RE2::FullMatch(input, re);
43 }
44};
45
46template <class OP> static void regexp_matches_function(DataChunk &args, ExpressionState &state, Vector &result) {
47 auto &strings = args.data[0];
48 auto &patterns = args.data[1];
49
50 auto &func_expr = (BoundFunctionExpression &)state.expr;
51 auto &info = (RegexpMatchesBindData &)*func_expr.bind_info;
52
53 RE2::Options options;
54 options.set_log_errors(false);
55
56 if (info.constant_pattern) {
57 // FIXME: this should be a unary loop
58 UnaryExecutor::Execute<string_t, bool, true>(strings, result, args.size(), [&](string_t input) {
59 return OP::Operation(CreateStringPiece(input), *info.constant_pattern);
60 });
61 } else {
62 BinaryExecutor::Execute<string_t, string_t, bool, true>(strings, patterns, result, args.size(),
63 [&](string_t input, string_t pattern) {
64 RE2 re(CreateStringPiece(pattern), options);
65 if (!re.ok()) {
66 throw Exception(re.error());
67 }
68 return OP::Operation(CreateStringPiece(input), re);
69 });
70 }
71}
72
73static unique_ptr<FunctionData> regexp_matches_get_bind_function(BoundFunctionExpression &expr,
74 ClientContext &context) {
75 // pattern is the second argument. If its constant, we can already prepare the pattern and store it for later.
76 assert(expr.children.size() == 2);
77 if (expr.children[1]->IsScalar()) {
78 Value pattern_str = ExpressionExecutor::EvaluateScalar(*expr.children[1]);
79 if (!pattern_str.is_null && pattern_str.type == TypeId::VARCHAR) {
80 RE2::Options options;
81 options.set_log_errors(false);
82 auto re = make_unique<RE2>(pattern_str.str_value, options);
83 if (!re->ok()) {
84 throw Exception(re->error());
85 }
86
87 string range_min, range_max;
88 auto range_success = re->PossibleMatchRange(&range_min, &range_max, 1000);
89 return make_unique<RegexpMatchesBindData>(move(re), range_min, range_max, range_success);
90 }
91 }
92 return make_unique<RegexpMatchesBindData>(nullptr, "", "", false);
93}
94
95static void regexp_replace_function(DataChunk &args, ExpressionState &state, Vector &result) {
96 auto &strings = args.data[0];
97 auto &patterns = args.data[1];
98 auto &replaces = args.data[2];
99
100 RE2::Options options;
101 options.set_log_errors(false);
102
103 TernaryExecutor::Execute<string_t, string_t, string_t, string_t>(
104 strings, patterns, replaces, result, args.size(), [&](string_t input, string_t pattern, string_t replace) {
105 RE2 re(CreateStringPiece(pattern), options);
106 std::string sstring(input.GetData(), input.GetSize());
107 RE2::Replace(&sstring, re, CreateStringPiece(replace));
108 return StringVector::AddString(result, sstring);
109 });
110}
111
112void RegexpFun::RegisterFunction(BuiltinFunctions &set) {
113 set.AddFunction(ScalarFunction("regexp_full_match", {SQLType::VARCHAR, SQLType::VARCHAR}, SQLType::BOOLEAN,
114 regexp_matches_function<RegexFullMatch>, false, regexp_matches_get_bind_function));
115 set.AddFunction(ScalarFunction("regexp_matches", {SQLType::VARCHAR, SQLType::VARCHAR}, SQLType::BOOLEAN,
116 regexp_matches_function<RegexPartialMatch>, false,
117 regexp_matches_get_bind_function));
118 set.AddFunction(ScalarFunction("regexp_replace", {SQLType::VARCHAR, SQLType::VARCHAR, SQLType::VARCHAR},
119 SQLType::VARCHAR, regexp_replace_function));
120}
121
122} // namespace duckdb
123