1 | /* |
2 | * Copyright (c) 2015, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Sequence of Component objects. |
31 | */ |
32 | |
33 | |
34 | #include "ComponentSequence.h" |
35 | |
36 | #include "buildstate.h" |
37 | #include "ComponentAlternation.h" |
38 | #include "ComponentRepeat.h" |
39 | #include "Parser.h" |
40 | #include "ue2common.h" |
41 | #include "parse_error.h" |
42 | #include "position_dump.h" |
43 | #include "position_info.h" |
44 | #include "nfagraph/ng_builder.h" |
45 | #include "util/container.h" |
46 | #include "util/make_unique.h" |
47 | |
48 | #include <algorithm> |
49 | #include <cassert> |
50 | |
51 | using namespace std; |
52 | |
53 | namespace ue2 { |
54 | |
55 | ComponentSequence::ComponentSequence() : capture_index(NOT_CAPTURED) {} |
56 | |
57 | ComponentSequence::~ComponentSequence() {} |
58 | |
59 | ComponentSequence::ComponentSequence(const ComponentSequence &other) |
60 | : Component(other), capture_index(other.capture_index) { |
61 | // Deep copy children. |
62 | for (const auto &c : other.children) { |
63 | assert(c); |
64 | children.push_back(unique_ptr<Component>(c->clone())); |
65 | } |
66 | if (other.alternation) { |
67 | const ComponentAlternation &c = *other.alternation; |
68 | alternation.reset(c.clone()); |
69 | } |
70 | } |
71 | |
72 | ComponentSequence *ComponentSequence::clone() const { |
73 | return new ComponentSequence(*this); |
74 | } |
75 | |
76 | Component *ComponentSequence::accept(ComponentVisitor &v) { |
77 | assert(!alternation); // Sequence must be finalized first. |
78 | |
79 | Component *c = v.visit(this); |
80 | if (c != this) { |
81 | v.post(this); |
82 | return c; |
83 | } |
84 | |
85 | for (auto i = children.begin(), e = children.end(); i != e; ++i) { |
86 | Component *child = i->get(); |
87 | c = (*i)->accept(v); |
88 | if (c != child) { |
89 | // Child has been replaced (new Component pointer) or we've been |
90 | // instructed to delete it (null). |
91 | i->reset(c); |
92 | } |
93 | } |
94 | |
95 | // Remove deleted children. |
96 | children.erase(remove(children.begin(), children.end(), nullptr), |
97 | children.end()); |
98 | |
99 | v.post(this); |
100 | return this; |
101 | } |
102 | |
103 | void ComponentSequence::accept(ConstComponentVisitor &v) const { |
104 | assert(!alternation); // Sequence must be finalized first. |
105 | |
106 | v.pre(*this); |
107 | |
108 | for (auto i = children.begin(), e = children.end(); i != e; ++i) { |
109 | (*i)->accept(v); |
110 | |
111 | if (i + 1 != e) { |
112 | v.during(*this); |
113 | } |
114 | } |
115 | |
116 | v.post(*this); |
117 | } |
118 | |
119 | void ComponentSequence::addComponent(unique_ptr<Component> comp) { |
120 | children.push_back(move(comp)); |
121 | } |
122 | |
123 | bool ComponentSequence::addRepeat(u32 min, u32 max, |
124 | ComponentRepeat::RepeatType type) { |
125 | if (children.empty() || min > max || max == 0) { |
126 | return false; |
127 | } |
128 | |
129 | // We can't apply a repeat to some types of component. |
130 | assert(children.back()); |
131 | if (!children.back()->repeatable()) { |
132 | return false; |
133 | } |
134 | |
135 | children.back() = makeComponentRepeat(move(children.back()), min, max, |
136 | type); |
137 | assert(children.back()); |
138 | return true; |
139 | } |
140 | |
141 | void ComponentSequence::addAlternation() { |
142 | if (!alternation) { |
143 | alternation = ue2::make_unique<ComponentAlternation>(); |
144 | } |
145 | |
146 | auto seq = ue2::make_unique<ComponentSequence>(); |
147 | seq->children.swap(children); |
148 | alternation->append(move(seq)); |
149 | } |
150 | |
151 | void ComponentSequence::finalize() { |
152 | if (alternation) { |
153 | addAlternation(); |
154 | assert(children.empty()); |
155 | children.push_back(move(alternation)); |
156 | alternation = nullptr; |
157 | } |
158 | } |
159 | |
160 | vector<PositionInfo> ComponentSequence::first() const { |
161 | vector<PositionInfo> firsts, subfirsts; |
162 | |
163 | for (const auto &c : children) { |
164 | subfirsts = c->first(); |
165 | replaceEpsilons(firsts, subfirsts); |
166 | if (!c->empty()) { |
167 | break; |
168 | } |
169 | } |
170 | |
171 | if (firsts.empty()) { |
172 | DEBUG_PRINTF("trivial empty sequence %zu\n" , firsts.size()); |
173 | assert(children.empty()); |
174 | firsts.push_back(GlushkovBuildState::POS_EPSILON); |
175 | } |
176 | |
177 | DEBUG_PRINTF("%zu firsts\n" , firsts.size()); |
178 | return firsts; |
179 | } |
180 | |
181 | namespace { |
182 | struct eps_info { |
183 | eps_info() : flags(0U) {} |
184 | u32 flags; |
185 | }; |
186 | } |
187 | |
188 | static |
189 | void epsilonVisit(vector<eps_info> *info, const vector<PositionInfo> &f) { |
190 | vector<eps_info> out; |
191 | out.reserve(info->size()); |
192 | |
193 | set<u32> seen_flags; |
194 | |
195 | assert(!info->empty()); |
196 | for (auto eps = find(f.begin(), f.end(), GlushkovBuildState::POS_EPSILON); |
197 | eps != f.end(); |
198 | eps = find(eps + 1, f.end(), GlushkovBuildState::POS_EPSILON)) { |
199 | for (auto it = info->begin(); it != info->end(); ++it) { |
200 | u32 flags = it->flags | eps->flags; |
201 | if (contains(seen_flags, flags)) { |
202 | continue; |
203 | } |
204 | |
205 | out.push_back(*it); |
206 | out.back().flags = flags; |
207 | seen_flags.insert(flags); |
208 | } |
209 | } |
210 | |
211 | info->swap(out); |
212 | assert(!info->empty()); |
213 | } |
214 | |
215 | static |
216 | void applyEpsilonVisits(vector<PositionInfo> &lasts, |
217 | const vector<eps_info> &eps_visits) { |
218 | vector<PositionInfo> out; |
219 | out.reserve(lasts.size() * eps_visits.size()); |
220 | |
221 | for (const auto &last : lasts) { |
222 | for (const auto &e : eps_visits) { |
223 | out.push_back(last); |
224 | out.back().flags |= e.flags; |
225 | } |
226 | } |
227 | |
228 | cleanupPositions(out); |
229 | lasts.swap(out); |
230 | } |
231 | |
232 | vector<PositionInfo> ComponentSequence::last() const { |
233 | vector<PositionInfo> lasts, sublasts; |
234 | vector<eps_info> visits(1); |
235 | |
236 | auto i = children.rbegin(), e = children.rend(); |
237 | for (; i != e; ++i) { |
238 | sublasts = (*i)->last(); |
239 | applyEpsilonVisits(sublasts, visits); |
240 | lasts.insert(lasts.end(), sublasts.begin(), sublasts.end()); |
241 | if ((*i)->empty()) { |
242 | // this epsilon's flags should propagate to subsequent lasts' |
243 | // enter/exit lists |
244 | epsilonVisit(&visits, (*i)->first()); |
245 | } else { |
246 | break; |
247 | } |
248 | } |
249 | |
250 | DEBUG_PRINTF("lasts = %s\n" , |
251 | dumpPositions(lasts.begin(), lasts.end()).c_str()); |
252 | return lasts; |
253 | } |
254 | |
255 | bool ComponentSequence::empty(void) const { |
256 | // a sequence can be empty if all its subcomponents can be empty |
257 | for (const auto &c : children) { |
258 | if (!c->empty()) { |
259 | return false; |
260 | } |
261 | } |
262 | return true; |
263 | } |
264 | |
265 | void ComponentSequence::notePositions(GlushkovBuildState &bs) { |
266 | u32 pb = bs.getBuilder().numVertices(); |
267 | for (auto &c : children) { |
268 | c->notePositions(bs); |
269 | } |
270 | recordPosBounds(pb, bs.getBuilder().numVertices()); |
271 | } |
272 | |
273 | void ComponentSequence::buildFollowSet(GlushkovBuildState &bs, |
274 | const vector<PositionInfo> &lastPos) { |
275 | DEBUG_PRINTF("sequence of %zu components\n" , children.size()); |
276 | |
277 | // If no components, no work to do. |
278 | if (children.empty()) { |
279 | return; |
280 | } |
281 | |
282 | // First element |
283 | children.front()->buildFollowSet(bs, lastPos); |
284 | if (children.size() == 1) { |
285 | // If our sequence contains precisely one component, then we've done |
286 | // all our work. Hooking up its firsts and lasts will be done by our |
287 | // parent component. |
288 | return; |
289 | } |
290 | |
291 | // Remaining elements, wiring last to first in sequence. |
292 | |
293 | vector<PositionInfo> prevLasts = children.front()->last(); |
294 | |
295 | for (auto it = next(children.begin()), ite = children.end(); it != ite; ++it) { |
296 | assert(*it); |
297 | Component &c = *(*it); |
298 | |
299 | // Build subcomponent follow set |
300 | c.buildFollowSet(bs, prevLasts); |
301 | |
302 | // FIRST(curr) |
303 | vector<PositionInfo> currFirsts(c.first()); |
304 | |
305 | // LAST(prev) => FIRST(curr) |
306 | DEBUG_PRINTF("connecting lasts (|| %zu) to firsts of comp %zd\n" , |
307 | prevLasts.size(), it - children.begin()); |
308 | bs.connectRegions(prevLasts, currFirsts); |
309 | |
310 | // Generate a new LAST(prev) for the next iteration; either c->last() |
311 | // on its own if it can't be empty or c->last unioned with the previous |
312 | // last if c can be empty |
313 | vector<PositionInfo> currLasts(c.last()); |
314 | |
315 | if (!c.empty()) { |
316 | // Current component can't be empty, so use its lasts only |
317 | prevLasts.swap(currLasts); |
318 | DEBUG_PRINTF("swapped lasts\n" ); |
319 | } else { |
320 | // Add current lasts to previous lasts |
321 | DEBUG_PRINTF("doing stuff for empty comp\n" ); |
322 | prevLasts.insert(prevLasts.end(), currLasts.begin(), currLasts.end()); |
323 | DEBUG_PRINTF("done stuff for empty comp\n" ); |
324 | } |
325 | } |
326 | } |
327 | |
328 | bool ComponentSequence::checkEmbeddedStartAnchor(bool at_start) const { |
329 | for (const auto &c : children) { |
330 | at_start = c->checkEmbeddedStartAnchor(at_start); |
331 | } |
332 | |
333 | return at_start; |
334 | } |
335 | |
336 | bool ComponentSequence::checkEmbeddedEndAnchor(bool at_end) const { |
337 | // Note reversed ordering. |
338 | for (auto i = children.rbegin(), e = children.rend(); i != e; ++i) { |
339 | at_end = (*i)->checkEmbeddedEndAnchor(at_end); |
340 | } |
341 | |
342 | return at_end; |
343 | } |
344 | |
345 | bool ComponentSequence::vacuous_everywhere() const { |
346 | for (const auto &c : children) { |
347 | if (!c->vacuous_everywhere()) { |
348 | return false; |
349 | } |
350 | } |
351 | return true; |
352 | } |
353 | |
354 | void ComponentSequence::optimise(bool connected_to_sds) { |
355 | DEBUG_PRINTF("opt %d\n" , (int)connected_to_sds); |
356 | for (u32 i = 0; i < children.size();) { |
357 | DEBUG_PRINTF("opt %u: ctsds: %d\n" , i, (int)connected_to_sds); |
358 | Component &sub = *children[i]; |
359 | |
360 | sub.optimise(connected_to_sds); |
361 | |
362 | bool vacuous = sub.vacuous_everywhere(); |
363 | |
364 | if (connected_to_sds && vacuous) { |
365 | DEBUG_PRINTF("delete opt %u\n" , i); |
366 | auto it = children.begin() + i; |
367 | children.erase(it); |
368 | continue; |
369 | } |
370 | |
371 | connected_to_sds = connected_to_sds && vacuous; |
372 | i++; |
373 | } |
374 | } |
375 | |
376 | } // namespace ue2 |
377 | |