1 | #include "duckdb/common/vector.hpp" |
2 | #include <memory> |
3 | |
4 | #include "duckdb/common/re2_regex.hpp" |
5 | #include "re2/re2.h" |
6 | |
7 | namespace duckdb_re2 { |
8 | |
9 | Regex::Regex(const std::string &pattern, RegexOptions options) { |
10 | RE2::Options o; |
11 | o.set_case_sensitive(options == RegexOptions::CASE_INSENSITIVE); |
12 | regex = std::make_shared<duckdb_re2::RE2>(args: StringPiece(pattern), args&: o); |
13 | } |
14 | |
15 | bool RegexSearchInternal(const char *input, Match &match, const Regex &r, RE2::Anchor anchor, size_t start, |
16 | size_t end) { |
17 | auto ®ex = r.GetRegex(); |
18 | duckdb::vector<StringPiece> target_groups; |
19 | auto group_count = regex.NumberOfCapturingGroups() + 1; |
20 | target_groups.resize(new_size: group_count); |
21 | match.groups.clear(); |
22 | if (!regex.Match(text: StringPiece(input), startpos: start, endpos: end, re_anchor: anchor, submatch: target_groups.data(), nsubmatch: group_count)) { |
23 | return false; |
24 | } |
25 | for (auto &group : target_groups) { |
26 | GroupMatch group_match; |
27 | group_match.text = group.ToString(); |
28 | group_match.position = group.data() - input; |
29 | match.groups.emplace_back(args&: group_match); |
30 | } |
31 | return true; |
32 | } |
33 | |
34 | bool RegexSearch(const std::string &input, Match &match, const Regex ®ex) { |
35 | return RegexSearchInternal(input: input.c_str(), match, r: regex, anchor: RE2::UNANCHORED, start: 0, end: input.size()); |
36 | } |
37 | |
38 | bool RegexMatch(const std::string &input, Match &match, const Regex ®ex) { |
39 | return RegexSearchInternal(input: input.c_str(), match, r: regex, anchor: RE2::ANCHOR_BOTH, start: 0, end: input.size()); |
40 | } |
41 | |
42 | bool RegexMatch(const char *start, const char *end, Match &match, const Regex ®ex) { |
43 | return RegexSearchInternal(input: start, match, r: regex, anchor: RE2::ANCHOR_BOTH, start: 0, end: end - start); |
44 | } |
45 | |
46 | bool RegexMatch(const std::string &input, const Regex ®ex) { |
47 | Match nop_match; |
48 | return RegexSearchInternal(input: input.c_str(), match&: nop_match, r: regex, anchor: RE2::ANCHOR_BOTH, start: 0, end: input.size()); |
49 | } |
50 | |
51 | duckdb::vector<Match> RegexFindAll(const std::string &input, const Regex ®ex) { |
52 | duckdb::vector<Match> matches; |
53 | size_t position = 0; |
54 | Match match; |
55 | while (RegexSearchInternal(input: input.c_str(), match, r: regex, anchor: RE2::UNANCHORED, start: position, end: input.size())) { |
56 | position += match.position(index: 0) + match.length(index: 0); |
57 | matches.emplace_back(args: std::move(match)); |
58 | } |
59 | return matches; |
60 | } |
61 | |
62 | } // namespace duckdb_re2 |
63 | |