1// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5#include "vm/regexp_ast.h"
6
7#include "platform/utils.h"
8#include "vm/os.h"
9
10namespace dart {
11
12#define MAKE_ACCEPT(Name) \
13 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
14 return visitor->Visit##Name(this, data); \
15 }
16FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
17#undef MAKE_ACCEPT
18
19#define MAKE_TYPE_CASE(Name) \
20 RegExp##Name* RegExpTree::As##Name() { return NULL; } \
21 bool RegExpTree::Is##Name() const { return false; }
22FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
23#undef MAKE_TYPE_CASE
24
25#define MAKE_TYPE_CASE(Name) \
26 RegExp##Name* RegExp##Name::As##Name() { return this; } \
27 bool RegExp##Name::Is##Name() const { return true; }
28FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
29#undef MAKE_TYPE_CASE
30
31static Interval ListCaptureRegisters(ZoneGrowableArray<RegExpTree*>* children) {
32 Interval result = Interval::Empty();
33 for (intptr_t i = 0; i < children->length(); i++)
34 result = result.Union(children->At(i)->CaptureRegisters());
35 return result;
36}
37
38Interval RegExpAlternative::CaptureRegisters() const {
39 return ListCaptureRegisters(nodes());
40}
41
42Interval RegExpDisjunction::CaptureRegisters() const {
43 return ListCaptureRegisters(alternatives());
44}
45
46Interval RegExpLookaround::CaptureRegisters() const {
47 return body()->CaptureRegisters();
48}
49
50Interval RegExpCapture::CaptureRegisters() const {
51 Interval self(StartRegister(index()), EndRegister(index()));
52 return self.Union(body()->CaptureRegisters());
53}
54
55Interval RegExpQuantifier::CaptureRegisters() const {
56 return body()->CaptureRegisters();
57}
58
59bool RegExpAssertion::IsAnchoredAtStart() const {
60 return assertion_type() == RegExpAssertion::START_OF_INPUT;
61}
62
63bool RegExpAssertion::IsAnchoredAtEnd() const {
64 return assertion_type() == RegExpAssertion::END_OF_INPUT;
65}
66
67bool RegExpAlternative::IsAnchoredAtStart() const {
68 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes();
69 for (intptr_t i = 0; i < nodes->length(); i++) {
70 RegExpTree* node = nodes->At(i);
71 if (node->IsAnchoredAtStart()) {
72 return true;
73 }
74 if (node->max_match() > 0) {
75 return false;
76 }
77 }
78 return false;
79}
80
81bool RegExpAlternative::IsAnchoredAtEnd() const {
82 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes();
83 for (intptr_t i = nodes->length() - 1; i >= 0; i--) {
84 RegExpTree* node = nodes->At(i);
85 if (node->IsAnchoredAtEnd()) {
86 return true;
87 }
88 if (node->max_match() > 0) {
89 return false;
90 }
91 }
92 return false;
93}
94
95bool RegExpDisjunction::IsAnchoredAtStart() const {
96 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives();
97 for (intptr_t i = 0; i < alternatives->length(); i++) {
98 if (!alternatives->At(i)->IsAnchoredAtStart()) return false;
99 }
100 return true;
101}
102
103bool RegExpDisjunction::IsAnchoredAtEnd() const {
104 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives();
105 for (intptr_t i = 0; i < alternatives->length(); i++) {
106 if (!alternatives->At(i)->IsAnchoredAtEnd()) return false;
107 }
108 return true;
109}
110
111bool RegExpLookaround::IsAnchoredAtStart() const {
112 return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
113}
114
115bool RegExpCapture::IsAnchoredAtStart() const {
116 return body()->IsAnchoredAtStart();
117}
118
119bool RegExpCapture::IsAnchoredAtEnd() const {
120 return body()->IsAnchoredAtEnd();
121}
122
123// Convert regular expression trees to a simple sexp representation.
124// This representation should be different from the input grammar
125// in as many cases as possible, to make it more difficult for incorrect
126// parses to look as correct ones which is likely if the input and
127// output formats are alike.
128class RegExpUnparser : public RegExpVisitor {
129 public:
130 void VisitCharacterRange(CharacterRange that);
131#define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, void* data);
132 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
133#undef MAKE_CASE
134};
135
136void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
137 OS::PrintErr("(|");
138 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {
139 OS::PrintErr(" ");
140 (*that->alternatives())[i]->Accept(this, data);
141 }
142 OS::PrintErr(")");
143 return NULL;
144}
145
146void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
147 OS::PrintErr("(:");
148 for (intptr_t i = 0; i < that->nodes()->length(); i++) {
149 OS::PrintErr(" ");
150 (*that->nodes())[i]->Accept(this, data);
151 }
152 OS::PrintErr(")");
153 return NULL;
154}
155
156void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
157 PrintUtf16(that.from());
158 if (!that.IsSingleton()) {
159 OS::PrintErr("-");
160 PrintUtf16(that.to());
161 }
162}
163
164void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
165 void* data) {
166 if (that->is_negated()) OS::PrintErr("^");
167 OS::PrintErr("[");
168 for (intptr_t i = 0; i < that->ranges()->length(); i++) {
169 if (i > 0) OS::PrintErr(" ");
170 VisitCharacterRange((*that->ranges())[i]);
171 }
172 OS::PrintErr("]");
173 return NULL;
174}
175
176void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
177 switch (that->assertion_type()) {
178 case RegExpAssertion::START_OF_INPUT:
179 OS::PrintErr("@^i");
180 break;
181 case RegExpAssertion::END_OF_INPUT:
182 OS::PrintErr("@$i");
183 break;
184 case RegExpAssertion::START_OF_LINE:
185 OS::PrintErr("@^l");
186 break;
187 case RegExpAssertion::END_OF_LINE:
188 OS::PrintErr("@$l");
189 break;
190 case RegExpAssertion::BOUNDARY:
191 OS::PrintErr("@b");
192 break;
193 case RegExpAssertion::NON_BOUNDARY:
194 OS::PrintErr("@B");
195 break;
196 }
197 return NULL;
198}
199
200void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
201 OS::PrintErr("'");
202 ZoneGrowableArray<uint16_t>* chardata = that->data();
203 for (intptr_t i = 0; i < chardata->length(); i++) {
204 PrintUtf16(chardata->At(i));
205 }
206 OS::PrintErr("'");
207 return NULL;
208}
209
210void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
211 if (that->elements()->length() == 1) {
212 (*that->elements())[0].tree()->Accept(this, data);
213 } else {
214 OS::PrintErr("(!");
215 for (intptr_t i = 0; i < that->elements()->length(); i++) {
216 OS::PrintErr(" ");
217 (*that->elements())[i].tree()->Accept(this, data);
218 }
219 OS::PrintErr(")");
220 }
221 return NULL;
222}
223
224void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
225 OS::PrintErr("(# %" Pd " ", that->min());
226 if (that->max() == RegExpTree::kInfinity) {
227 OS::PrintErr("- ");
228 } else {
229 OS::PrintErr("%" Pd " ", that->max());
230 }
231 OS::PrintErr(that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
232 that->body()->Accept(this, data);
233 OS::PrintErr(")");
234 return NULL;
235}
236
237void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
238 OS::PrintErr("(^ ");
239 that->body()->Accept(this, data);
240 OS::PrintErr(")");
241 return NULL;
242}
243
244void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
245 OS::PrintErr("(");
246 OS::PrintErr("(%s %s",
247 (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-"),
248 (that->is_positive() ? "+ " : "- "));
249 that->body()->Accept(this, data);
250 OS::PrintErr(")");
251 return NULL;
252}
253
254void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, void*) {
255 OS::PrintErr("(<- %" Pd ")", that->index());
256 return NULL;
257}
258
259void* RegExpUnparser::VisitEmpty(RegExpEmpty*, void*) {
260 OS::PrintErr("%%");
261 return NULL;
262}
263
264void RegExpTree::Print() {
265 RegExpUnparser unparser;
266 Accept(&unparser, NULL);
267}
268
269RegExpDisjunction::RegExpDisjunction(
270 ZoneGrowableArray<RegExpTree*>* alternatives)
271 : alternatives_(alternatives) {
272 ASSERT(alternatives->length() > 1);
273 RegExpTree* first_alternative = alternatives->At(0);
274 min_match_ = first_alternative->min_match();
275 max_match_ = first_alternative->max_match();
276 for (intptr_t i = 1; i < alternatives->length(); i++) {
277 RegExpTree* alternative = alternatives->At(i);
278 min_match_ = Utils::Minimum(min_match_, alternative->min_match());
279 max_match_ = Utils::Maximum(max_match_, alternative->max_match());
280 }
281}
282
283static intptr_t IncreaseBy(intptr_t previous, intptr_t increase) {
284 if (RegExpTree::kInfinity - previous < increase) {
285 return RegExpTree::kInfinity;
286 } else {
287 return previous + increase;
288 }
289}
290
291RegExpAlternative::RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes)
292 : nodes_(nodes) {
293 ASSERT(nodes->length() > 1);
294 min_match_ = 0;
295 max_match_ = 0;
296 for (intptr_t i = 0; i < nodes->length(); i++) {
297 RegExpTree* node = nodes->At(i);
298 intptr_t node_min_match = node->min_match();
299 min_match_ = IncreaseBy(min_match_, node_min_match);
300 intptr_t node_max_match = node->max_match();
301 max_match_ = IncreaseBy(max_match_, node_max_match);
302 }
303}
304
305} // namespace dart
306