1// Copyright 2010 The RE2 Authors. All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "re2/set.h"
6
7#include <stddef.h>
8#include <algorithm>
9#include <memory>
10
11#include "util/util.h"
12#include "util/logging.h"
13#include "re2/pod_array.h"
14#include "re2/prog.h"
15#include "re2/re2.h"
16#include "re2/regexp.h"
17#include "re2/stringpiece.h"
18
19namespace re2 {
20
21RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
22 options_.Copy(options);
23 options_.set_never_capture(true); // might unblock some optimisations
24 anchor_ = anchor;
25 prog_ = NULL;
26 compiled_ = false;
27 size_ = 0;
28}
29
30RE2::Set::~Set() {
31 for (size_t i = 0; i < elem_.size(); i++)
32 elem_[i].second->Decref();
33 delete prog_;
34}
35
36int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
37 if (compiled_) {
38 LOG(DFATAL) << "RE2::Set::Add() called after compiling";
39 return -1;
40 }
41
42 Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
43 options_.ParseFlags());
44 RegexpStatus status;
45 re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
46 if (re == NULL) {
47 if (error != NULL)
48 *error = status.Text();
49 if (options_.log_errors())
50 LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
51 return -1;
52 }
53
54 // Concatenate with match index and push on vector.
55 int n = static_cast<int>(elem_.size());
56 re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
57 if (re->op() == kRegexpConcat) {
58 int nsub = re->nsub();
59 PODArray<re2::Regexp*> sub(nsub + 1);
60 for (int i = 0; i < nsub; i++)
61 sub[i] = re->sub()[i]->Incref();
62 sub[nsub] = m;
63 re->Decref();
64 re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
65 } else {
66 re2::Regexp* sub[2];
67 sub[0] = re;
68 sub[1] = m;
69 re = re2::Regexp::Concat(sub, 2, pf);
70 }
71 elem_.emplace_back(std::string(pattern), re);
72 return n;
73}
74
75bool RE2::Set::Compile() {
76 if (compiled_) {
77 LOG(DFATAL) << "RE2::Set::Compile() called more than once";
78 return false;
79 }
80 compiled_ = true;
81 size_ = static_cast<int>(elem_.size());
82
83 // Sort the elements by their patterns. This is good enough for now
84 // until we have a Regexp comparison function. (Maybe someday...)
85 std::sort(elem_.begin(), elem_.end(),
86 [](const Elem& a, const Elem& b) -> bool {
87 return a.first < b.first;
88 });
89
90 PODArray<re2::Regexp*> sub(size_);
91 for (int i = 0; i < size_; i++)
92 sub[i] = elem_[i].second;
93 elem_.clear();
94 elem_.shrink_to_fit();
95
96 Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
97 options_.ParseFlags());
98 re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
99
100 prog_ = Prog::CompileSet(re, anchor_, options_.max_mem());
101 re->Decref();
102 return prog_ != NULL;
103}
104
105bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
106 return Match(text, v, NULL);
107}
108
109bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
110 ErrorInfo* error_info) const {
111 if (!compiled_) {
112 LOG(DFATAL) << "RE2::Set::Match() called before compiling";
113 if (error_info != NULL)
114 error_info->kind = kNotCompiled;
115 return false;
116 }
117 bool dfa_failed = false;
118 std::unique_ptr<SparseSet> matches;
119 if (v != NULL) {
120 matches.reset(new SparseSet(size_));
121 v->clear();
122 }
123 bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
124 NULL, &dfa_failed, matches.get());
125 if (dfa_failed) {
126 if (options_.log_errors())
127 LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
128 << "bytemap range " << prog_->bytemap_range() << ", "
129 << "list count " << prog_->list_count();
130 if (error_info != NULL)
131 error_info->kind = kOutOfMemory;
132 return false;
133 }
134 if (ret == false) {
135 if (error_info != NULL)
136 error_info->kind = kNoError;
137 return false;
138 }
139 if (v != NULL) {
140 if (matches->empty()) {
141 LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
142 if (error_info != NULL)
143 error_info->kind = kInconsistent;
144 return false;
145 }
146 v->assign(matches->begin(), matches->end());
147 }
148 if (error_info != NULL)
149 error_info->kind = kNoError;
150 return true;
151}
152
153} // namespace re2
154