1 | // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 | // for details. All rights reserved. Use of this source code is governed by a |
3 | // BSD-style license that can be found in the LICENSE file. |
4 | |
5 | #include "vm/regexp_ast.h" |
6 | |
7 | #include "platform/utils.h" |
8 | #include "vm/os.h" |
9 | |
10 | namespace dart { |
11 | |
12 | #define MAKE_ACCEPT(Name) \ |
13 | void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \ |
14 | return visitor->Visit##Name(this, data); \ |
15 | } |
16 | FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT) |
17 | #undef MAKE_ACCEPT |
18 | |
19 | #define MAKE_TYPE_CASE(Name) \ |
20 | RegExp##Name* RegExpTree::As##Name() { return NULL; } \ |
21 | bool RegExpTree::Is##Name() const { return false; } |
22 | FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
23 | #undef MAKE_TYPE_CASE |
24 | |
25 | #define MAKE_TYPE_CASE(Name) \ |
26 | RegExp##Name* RegExp##Name::As##Name() { return this; } \ |
27 | bool RegExp##Name::Is##Name() const { return true; } |
28 | FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
29 | #undef MAKE_TYPE_CASE |
30 | |
31 | static Interval ListCaptureRegisters(ZoneGrowableArray<RegExpTree*>* children) { |
32 | Interval result = Interval::Empty(); |
33 | for (intptr_t i = 0; i < children->length(); i++) |
34 | result = result.Union(children->At(i)->CaptureRegisters()); |
35 | return result; |
36 | } |
37 | |
38 | Interval RegExpAlternative::CaptureRegisters() const { |
39 | return ListCaptureRegisters(nodes()); |
40 | } |
41 | |
42 | Interval RegExpDisjunction::CaptureRegisters() const { |
43 | return ListCaptureRegisters(alternatives()); |
44 | } |
45 | |
46 | Interval RegExpLookaround::CaptureRegisters() const { |
47 | return body()->CaptureRegisters(); |
48 | } |
49 | |
50 | Interval RegExpCapture::CaptureRegisters() const { |
51 | Interval self(StartRegister(index()), EndRegister(index())); |
52 | return self.Union(body()->CaptureRegisters()); |
53 | } |
54 | |
55 | Interval RegExpQuantifier::CaptureRegisters() const { |
56 | return body()->CaptureRegisters(); |
57 | } |
58 | |
59 | bool RegExpAssertion::IsAnchoredAtStart() const { |
60 | return assertion_type() == RegExpAssertion::START_OF_INPUT; |
61 | } |
62 | |
63 | bool RegExpAssertion::IsAnchoredAtEnd() const { |
64 | return assertion_type() == RegExpAssertion::END_OF_INPUT; |
65 | } |
66 | |
67 | bool RegExpAlternative::IsAnchoredAtStart() const { |
68 | ZoneGrowableArray<RegExpTree*>* nodes = this->nodes(); |
69 | for (intptr_t i = 0; i < nodes->length(); i++) { |
70 | RegExpTree* node = nodes->At(i); |
71 | if (node->IsAnchoredAtStart()) { |
72 | return true; |
73 | } |
74 | if (node->max_match() > 0) { |
75 | return false; |
76 | } |
77 | } |
78 | return false; |
79 | } |
80 | |
81 | bool RegExpAlternative::IsAnchoredAtEnd() const { |
82 | ZoneGrowableArray<RegExpTree*>* nodes = this->nodes(); |
83 | for (intptr_t i = nodes->length() - 1; i >= 0; i--) { |
84 | RegExpTree* node = nodes->At(i); |
85 | if (node->IsAnchoredAtEnd()) { |
86 | return true; |
87 | } |
88 | if (node->max_match() > 0) { |
89 | return false; |
90 | } |
91 | } |
92 | return false; |
93 | } |
94 | |
95 | bool RegExpDisjunction::IsAnchoredAtStart() const { |
96 | ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
97 | for (intptr_t i = 0; i < alternatives->length(); i++) { |
98 | if (!alternatives->At(i)->IsAnchoredAtStart()) return false; |
99 | } |
100 | return true; |
101 | } |
102 | |
103 | bool RegExpDisjunction::IsAnchoredAtEnd() const { |
104 | ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
105 | for (intptr_t i = 0; i < alternatives->length(); i++) { |
106 | if (!alternatives->At(i)->IsAnchoredAtEnd()) return false; |
107 | } |
108 | return true; |
109 | } |
110 | |
111 | bool RegExpLookaround::IsAnchoredAtStart() const { |
112 | return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart(); |
113 | } |
114 | |
115 | bool RegExpCapture::IsAnchoredAtStart() const { |
116 | return body()->IsAnchoredAtStart(); |
117 | } |
118 | |
119 | bool RegExpCapture::IsAnchoredAtEnd() const { |
120 | return body()->IsAnchoredAtEnd(); |
121 | } |
122 | |
123 | // Convert regular expression trees to a simple sexp representation. |
124 | // This representation should be different from the input grammar |
125 | // in as many cases as possible, to make it more difficult for incorrect |
126 | // parses to look as correct ones which is likely if the input and |
127 | // output formats are alike. |
128 | class RegExpUnparser : public RegExpVisitor { |
129 | public: |
130 | void VisitCharacterRange(CharacterRange that); |
131 | #define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, void* data); |
132 | FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) |
133 | #undef MAKE_CASE |
134 | }; |
135 | |
136 | void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { |
137 | OS::PrintErr("(|" ); |
138 | for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
139 | OS::PrintErr(" " ); |
140 | (*that->alternatives())[i]->Accept(this, data); |
141 | } |
142 | OS::PrintErr(")" ); |
143 | return NULL; |
144 | } |
145 | |
146 | void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { |
147 | OS::PrintErr("(:" ); |
148 | for (intptr_t i = 0; i < that->nodes()->length(); i++) { |
149 | OS::PrintErr(" " ); |
150 | (*that->nodes())[i]->Accept(this, data); |
151 | } |
152 | OS::PrintErr(")" ); |
153 | return NULL; |
154 | } |
155 | |
156 | void RegExpUnparser::VisitCharacterRange(CharacterRange that) { |
157 | PrintUtf16(that.from()); |
158 | if (!that.IsSingleton()) { |
159 | OS::PrintErr("-" ); |
160 | PrintUtf16(that.to()); |
161 | } |
162 | } |
163 | |
164 | void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, |
165 | void* data) { |
166 | if (that->is_negated()) OS::PrintErr("^" ); |
167 | OS::PrintErr("[" ); |
168 | for (intptr_t i = 0; i < that->ranges()->length(); i++) { |
169 | if (i > 0) OS::PrintErr(" " ); |
170 | VisitCharacterRange((*that->ranges())[i]); |
171 | } |
172 | OS::PrintErr("]" ); |
173 | return NULL; |
174 | } |
175 | |
176 | void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { |
177 | switch (that->assertion_type()) { |
178 | case RegExpAssertion::START_OF_INPUT: |
179 | OS::PrintErr("@^i" ); |
180 | break; |
181 | case RegExpAssertion::END_OF_INPUT: |
182 | OS::PrintErr("@$i" ); |
183 | break; |
184 | case RegExpAssertion::START_OF_LINE: |
185 | OS::PrintErr("@^l" ); |
186 | break; |
187 | case RegExpAssertion::END_OF_LINE: |
188 | OS::PrintErr("@$l" ); |
189 | break; |
190 | case RegExpAssertion::BOUNDARY: |
191 | OS::PrintErr("@b" ); |
192 | break; |
193 | case RegExpAssertion::NON_BOUNDARY: |
194 | OS::PrintErr("@B" ); |
195 | break; |
196 | } |
197 | return NULL; |
198 | } |
199 | |
200 | void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { |
201 | OS::PrintErr("'" ); |
202 | ZoneGrowableArray<uint16_t>* chardata = that->data(); |
203 | for (intptr_t i = 0; i < chardata->length(); i++) { |
204 | PrintUtf16(chardata->At(i)); |
205 | } |
206 | OS::PrintErr("'" ); |
207 | return NULL; |
208 | } |
209 | |
210 | void* RegExpUnparser::VisitText(RegExpText* that, void* data) { |
211 | if (that->elements()->length() == 1) { |
212 | (*that->elements())[0].tree()->Accept(this, data); |
213 | } else { |
214 | OS::PrintErr("(!" ); |
215 | for (intptr_t i = 0; i < that->elements()->length(); i++) { |
216 | OS::PrintErr(" " ); |
217 | (*that->elements())[i].tree()->Accept(this, data); |
218 | } |
219 | OS::PrintErr(")" ); |
220 | } |
221 | return NULL; |
222 | } |
223 | |
224 | void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { |
225 | OS::PrintErr("(# %" Pd " " , that->min()); |
226 | if (that->max() == RegExpTree::kInfinity) { |
227 | OS::PrintErr("- " ); |
228 | } else { |
229 | OS::PrintErr("%" Pd " " , that->max()); |
230 | } |
231 | OS::PrintErr(that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n " ); |
232 | that->body()->Accept(this, data); |
233 | OS::PrintErr(")" ); |
234 | return NULL; |
235 | } |
236 | |
237 | void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { |
238 | OS::PrintErr("(^ " ); |
239 | that->body()->Accept(this, data); |
240 | OS::PrintErr(")" ); |
241 | return NULL; |
242 | } |
243 | |
244 | void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) { |
245 | OS::PrintErr("(" ); |
246 | OS::PrintErr("(%s %s" , |
247 | (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-" ), |
248 | (that->is_positive() ? "+ " : "- " )); |
249 | that->body()->Accept(this, data); |
250 | OS::PrintErr(")" ); |
251 | return NULL; |
252 | } |
253 | |
254 | void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, void*) { |
255 | OS::PrintErr("(<- %" Pd ")" , that->index()); |
256 | return NULL; |
257 | } |
258 | |
259 | void* RegExpUnparser::VisitEmpty(RegExpEmpty*, void*) { |
260 | OS::PrintErr("%%" ); |
261 | return NULL; |
262 | } |
263 | |
264 | void RegExpTree::Print() { |
265 | RegExpUnparser unparser; |
266 | Accept(&unparser, NULL); |
267 | } |
268 | |
269 | RegExpDisjunction::RegExpDisjunction( |
270 | ZoneGrowableArray<RegExpTree*>* alternatives) |
271 | : alternatives_(alternatives) { |
272 | ASSERT(alternatives->length() > 1); |
273 | RegExpTree* first_alternative = alternatives->At(0); |
274 | min_match_ = first_alternative->min_match(); |
275 | max_match_ = first_alternative->max_match(); |
276 | for (intptr_t i = 1; i < alternatives->length(); i++) { |
277 | RegExpTree* alternative = alternatives->At(i); |
278 | min_match_ = Utils::Minimum(min_match_, alternative->min_match()); |
279 | max_match_ = Utils::Maximum(max_match_, alternative->max_match()); |
280 | } |
281 | } |
282 | |
283 | static intptr_t IncreaseBy(intptr_t previous, intptr_t increase) { |
284 | if (RegExpTree::kInfinity - previous < increase) { |
285 | return RegExpTree::kInfinity; |
286 | } else { |
287 | return previous + increase; |
288 | } |
289 | } |
290 | |
291 | RegExpAlternative::RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes) |
292 | : nodes_(nodes) { |
293 | ASSERT(nodes->length() > 1); |
294 | min_match_ = 0; |
295 | max_match_ = 0; |
296 | for (intptr_t i = 0; i < nodes->length(); i++) { |
297 | RegExpTree* node = nodes->At(i); |
298 | intptr_t node_min_match = node->min_match(); |
299 | min_match_ = IncreaseBy(min_match_, node_min_match); |
300 | intptr_t node_max_match = node->max_match(); |
301 | max_match_ = IncreaseBy(max_match_, node_max_match); |
302 | } |
303 | } |
304 | |
305 | } // namespace dart |
306 | |