| 1 | #include "duckdb/function/scalar/string_functions.hpp" | 
|---|
| 2 | #include "duckdb/common/exception.hpp" | 
|---|
| 3 | #include "duckdb/common/vector_operations/vector_operations.hpp" | 
|---|
| 4 | #include "duckdb/execution/expression_executor.hpp" | 
|---|
| 5 | #include "duckdb/planner/expression/bound_function_expression.hpp" | 
|---|
| 6 | #include "duckdb/common/vector_operations/unary_executor.hpp" | 
|---|
| 7 | #include "duckdb/common/vector_operations/binary_executor.hpp" | 
|---|
| 8 | #include "duckdb/common/vector_operations/ternary_executor.hpp" | 
|---|
| 9 | #include "utf8proc_wrapper.hpp" | 
|---|
| 10 |  | 
|---|
| 11 | #include "re2/re2.h" | 
|---|
| 12 |  | 
|---|
| 13 | using namespace std; | 
|---|
| 14 |  | 
|---|
| 15 | namespace duckdb { | 
|---|
| 16 |  | 
|---|
| 17 | RegexpMatchesBindData::RegexpMatchesBindData(unique_ptr<RE2> constant_pattern, string range_min, string range_max, | 
|---|
| 18 | bool range_success) | 
|---|
| 19 | : constant_pattern(std::move(constant_pattern)), range_min(range_min), range_max(range_max), | 
|---|
| 20 | range_success(range_success) { | 
|---|
| 21 | } | 
|---|
| 22 |  | 
|---|
| 23 | RegexpMatchesBindData::~RegexpMatchesBindData() { | 
|---|
| 24 | } | 
|---|
| 25 |  | 
|---|
| 26 | unique_ptr<FunctionData> RegexpMatchesBindData::Copy() { | 
|---|
| 27 | return make_unique<RegexpMatchesBindData>(move(constant_pattern), range_min, range_max, range_success); | 
|---|
| 28 | } | 
|---|
| 29 |  | 
|---|
| 30 | static inline re2::StringPiece CreateStringPiece(string_t &input) { | 
|---|
| 31 | return re2::StringPiece(input.GetData(), input.GetSize()); | 
|---|
| 32 | } | 
|---|
| 33 |  | 
|---|
| 34 | struct RegexPartialMatch { | 
|---|
| 35 | static inline bool Operation(const re2::StringPiece &input, RE2 &re) { | 
|---|
| 36 | return RE2::PartialMatch(input, re); | 
|---|
| 37 | } | 
|---|
| 38 | }; | 
|---|
| 39 |  | 
|---|
| 40 | struct RegexFullMatch { | 
|---|
| 41 | static inline bool Operation(const re2::StringPiece &input, RE2 &re) { | 
|---|
| 42 | return RE2::FullMatch(input, re); | 
|---|
| 43 | } | 
|---|
| 44 | }; | 
|---|
| 45 |  | 
|---|
| 46 | template <class OP> static void regexp_matches_function(DataChunk &args, ExpressionState &state, Vector &result) { | 
|---|
| 47 | auto &strings = args.data[0]; | 
|---|
| 48 | auto &patterns = args.data[1]; | 
|---|
| 49 |  | 
|---|
| 50 | auto &func_expr = (BoundFunctionExpression &)state.expr; | 
|---|
| 51 | auto &info = (RegexpMatchesBindData &)*func_expr.bind_info; | 
|---|
| 52 |  | 
|---|
| 53 | RE2::Options options; | 
|---|
| 54 | options.set_log_errors(false); | 
|---|
| 55 |  | 
|---|
| 56 | if (info.constant_pattern) { | 
|---|
| 57 | // FIXME: this should be a unary loop | 
|---|
| 58 | UnaryExecutor::Execute<string_t, bool, true>(strings, result, args.size(), [&](string_t input) { | 
|---|
| 59 | return OP::Operation(CreateStringPiece(input), *info.constant_pattern); | 
|---|
| 60 | }); | 
|---|
| 61 | } else { | 
|---|
| 62 | BinaryExecutor::Execute<string_t, string_t, bool, true>(strings, patterns, result, args.size(), | 
|---|
| 63 | [&](string_t input, string_t pattern) { | 
|---|
| 64 | RE2 re(CreateStringPiece(pattern), options); | 
|---|
| 65 | if (!re.ok()) { | 
|---|
| 66 | throw Exception(re.error()); | 
|---|
| 67 | } | 
|---|
| 68 | return OP::Operation(CreateStringPiece(input), re); | 
|---|
| 69 | }); | 
|---|
| 70 | } | 
|---|
| 71 | } | 
|---|
| 72 |  | 
|---|
| 73 | static unique_ptr<FunctionData> regexp_matches_get_bind_function(BoundFunctionExpression &expr, | 
|---|
| 74 | ClientContext &context) { | 
|---|
| 75 | // pattern is the second argument. If its constant, we can already prepare the pattern and store it for later. | 
|---|
| 76 | assert(expr.children.size() == 2); | 
|---|
| 77 | if (expr.children[1]->IsScalar()) { | 
|---|
| 78 | Value pattern_str = ExpressionExecutor::EvaluateScalar(*expr.children[1]); | 
|---|
| 79 | if (!pattern_str.is_null && pattern_str.type == TypeId::VARCHAR) { | 
|---|
| 80 | RE2::Options options; | 
|---|
| 81 | options.set_log_errors(false); | 
|---|
| 82 | auto re = make_unique<RE2>(pattern_str.str_value, options); | 
|---|
| 83 | if (!re->ok()) { | 
|---|
| 84 | throw Exception(re->error()); | 
|---|
| 85 | } | 
|---|
| 86 |  | 
|---|
| 87 | string range_min, range_max; | 
|---|
| 88 | auto range_success = re->PossibleMatchRange(&range_min, &range_max, 1000); | 
|---|
| 89 | return make_unique<RegexpMatchesBindData>(move(re), range_min, range_max, range_success); | 
|---|
| 90 | } | 
|---|
| 91 | } | 
|---|
| 92 | return make_unique<RegexpMatchesBindData>(nullptr, "", "", false); | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | static void regexp_replace_function(DataChunk &args, ExpressionState &state, Vector &result) { | 
|---|
| 96 | auto &strings = args.data[0]; | 
|---|
| 97 | auto &patterns = args.data[1]; | 
|---|
| 98 | auto &replaces = args.data[2]; | 
|---|
| 99 |  | 
|---|
| 100 | RE2::Options options; | 
|---|
| 101 | options.set_log_errors(false); | 
|---|
| 102 |  | 
|---|
| 103 | TernaryExecutor::Execute<string_t, string_t, string_t, string_t>( | 
|---|
| 104 | strings, patterns, replaces, result, args.size(), [&](string_t input, string_t pattern, string_t replace) { | 
|---|
| 105 | RE2 re(CreateStringPiece(pattern), options); | 
|---|
| 106 | std::string sstring(input.GetData(), input.GetSize()); | 
|---|
| 107 | RE2::Replace(&sstring, re, CreateStringPiece(replace)); | 
|---|
| 108 | return StringVector::AddString(result, sstring); | 
|---|
| 109 | }); | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | void RegexpFun::RegisterFunction(BuiltinFunctions &set) { | 
|---|
| 113 | set.AddFunction(ScalarFunction( "regexp_full_match", {SQLType::VARCHAR, SQLType::VARCHAR}, SQLType::BOOLEAN, | 
|---|
| 114 | regexp_matches_function<RegexFullMatch>, false, regexp_matches_get_bind_function)); | 
|---|
| 115 | set.AddFunction(ScalarFunction( "regexp_matches", {SQLType::VARCHAR, SQLType::VARCHAR}, SQLType::BOOLEAN, | 
|---|
| 116 | regexp_matches_function<RegexPartialMatch>, false, | 
|---|
| 117 | regexp_matches_get_bind_function)); | 
|---|
| 118 | set.AddFunction(ScalarFunction( "regexp_replace", {SQLType::VARCHAR, SQLType::VARCHAR, SQLType::VARCHAR}, | 
|---|
| 119 | SQLType::VARCHAR, regexp_replace_function)); | 
|---|
| 120 | } | 
|---|
| 121 |  | 
|---|
| 122 | } // namespace duckdb | 
|---|
| 123 |  | 
|---|