1 | // Copyright 2010 The RE2 Authors. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | #include "re2/set.h" |
6 | |
7 | #include <stddef.h> |
8 | #include <algorithm> |
9 | #include <memory> |
10 | |
11 | #include "util/util.h" |
12 | #include "util/logging.h" |
13 | #include "re2/pod_array.h" |
14 | #include "re2/prog.h" |
15 | #include "re2/re2.h" |
16 | #include "re2/regexp.h" |
17 | #include "re2/stringpiece.h" |
18 | |
19 | namespace re2 { |
20 | |
21 | RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) { |
22 | options_.Copy(options); |
23 | options_.set_never_capture(true); // might unblock some optimisations |
24 | anchor_ = anchor; |
25 | prog_ = NULL; |
26 | compiled_ = false; |
27 | size_ = 0; |
28 | } |
29 | |
30 | RE2::Set::~Set() { |
31 | for (size_t i = 0; i < elem_.size(); i++) |
32 | elem_[i].second->Decref(); |
33 | delete prog_; |
34 | } |
35 | |
36 | int RE2::Set::Add(const StringPiece& pattern, std::string* error) { |
37 | if (compiled_) { |
38 | LOG(DFATAL) << "RE2::Set::Add() called after compiling" ; |
39 | return -1; |
40 | } |
41 | |
42 | Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( |
43 | options_.ParseFlags()); |
44 | RegexpStatus status; |
45 | re2::Regexp* re = Regexp::Parse(pattern, pf, &status); |
46 | if (re == NULL) { |
47 | if (error != NULL) |
48 | *error = status.Text(); |
49 | if (options_.log_errors()) |
50 | LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text(); |
51 | return -1; |
52 | } |
53 | |
54 | // Concatenate with match index and push on vector. |
55 | int n = static_cast<int>(elem_.size()); |
56 | re2::Regexp* m = re2::Regexp::HaveMatch(n, pf); |
57 | if (re->op() == kRegexpConcat) { |
58 | int nsub = re->nsub(); |
59 | PODArray<re2::Regexp*> sub(nsub + 1); |
60 | for (int i = 0; i < nsub; i++) |
61 | sub[i] = re->sub()[i]->Incref(); |
62 | sub[nsub] = m; |
63 | re->Decref(); |
64 | re = re2::Regexp::Concat(sub.data(), nsub + 1, pf); |
65 | } else { |
66 | re2::Regexp* sub[2]; |
67 | sub[0] = re; |
68 | sub[1] = m; |
69 | re = re2::Regexp::Concat(sub, 2, pf); |
70 | } |
71 | elem_.emplace_back(std::string(pattern), re); |
72 | return n; |
73 | } |
74 | |
75 | bool RE2::Set::Compile() { |
76 | if (compiled_) { |
77 | LOG(DFATAL) << "RE2::Set::Compile() called more than once" ; |
78 | return false; |
79 | } |
80 | compiled_ = true; |
81 | size_ = static_cast<int>(elem_.size()); |
82 | |
83 | // Sort the elements by their patterns. This is good enough for now |
84 | // until we have a Regexp comparison function. (Maybe someday...) |
85 | std::sort(elem_.begin(), elem_.end(), |
86 | [](const Elem& a, const Elem& b) -> bool { |
87 | return a.first < b.first; |
88 | }); |
89 | |
90 | PODArray<re2::Regexp*> sub(size_); |
91 | for (int i = 0; i < size_; i++) |
92 | sub[i] = elem_[i].second; |
93 | elem_.clear(); |
94 | elem_.shrink_to_fit(); |
95 | |
96 | Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( |
97 | options_.ParseFlags()); |
98 | re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf); |
99 | |
100 | prog_ = Prog::CompileSet(re, anchor_, options_.max_mem()); |
101 | re->Decref(); |
102 | return prog_ != NULL; |
103 | } |
104 | |
105 | bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const { |
106 | return Match(text, v, NULL); |
107 | } |
108 | |
109 | bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v, |
110 | ErrorInfo* error_info) const { |
111 | if (!compiled_) { |
112 | LOG(DFATAL) << "RE2::Set::Match() called before compiling" ; |
113 | if (error_info != NULL) |
114 | error_info->kind = kNotCompiled; |
115 | return false; |
116 | } |
117 | bool dfa_failed = false; |
118 | std::unique_ptr<SparseSet> matches; |
119 | if (v != NULL) { |
120 | matches.reset(new SparseSet(size_)); |
121 | v->clear(); |
122 | } |
123 | bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch, |
124 | NULL, &dfa_failed, matches.get()); |
125 | if (dfa_failed) { |
126 | if (options_.log_errors()) |
127 | LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", " |
128 | << "bytemap range " << prog_->bytemap_range() << ", " |
129 | << "list count " << prog_->list_count(); |
130 | if (error_info != NULL) |
131 | error_info->kind = kOutOfMemory; |
132 | return false; |
133 | } |
134 | if (ret == false) { |
135 | if (error_info != NULL) |
136 | error_info->kind = kNoError; |
137 | return false; |
138 | } |
139 | if (v != NULL) { |
140 | if (matches->empty()) { |
141 | LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!" ; |
142 | if (error_info != NULL) |
143 | error_info->kind = kInconsistent; |
144 | return false; |
145 | } |
146 | v->assign(matches->begin(), matches->end()); |
147 | } |
148 | if (error_info != NULL) |
149 | error_info->kind = kNoError; |
150 | return true; |
151 | } |
152 | |
153 | } // namespace re2 |
154 | |