1/*
2 * Copyright (c) 2015, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Sequence of Component objects.
31 */
32
33
34#include "ComponentSequence.h"
35
36#include "buildstate.h"
37#include "ComponentAlternation.h"
38#include "ComponentRepeat.h"
39#include "Parser.h"
40#include "ue2common.h"
41#include "parse_error.h"
42#include "position_dump.h"
43#include "position_info.h"
44#include "nfagraph/ng_builder.h"
45#include "util/container.h"
46#include "util/make_unique.h"
47
48#include <algorithm>
49#include <cassert>
50
51using namespace std;
52
53namespace ue2 {
54
55ComponentSequence::ComponentSequence() : capture_index(NOT_CAPTURED) {}
56
57ComponentSequence::~ComponentSequence() {}
58
59ComponentSequence::ComponentSequence(const ComponentSequence &other)
60 : Component(other), capture_index(other.capture_index) {
61 // Deep copy children.
62 for (const auto &c : other.children) {
63 assert(c);
64 children.push_back(unique_ptr<Component>(c->clone()));
65 }
66 if (other.alternation) {
67 const ComponentAlternation &c = *other.alternation;
68 alternation.reset(c.clone());
69 }
70}
71
72ComponentSequence *ComponentSequence::clone() const {
73 return new ComponentSequence(*this);
74}
75
76Component *ComponentSequence::accept(ComponentVisitor &v) {
77 assert(!alternation); // Sequence must be finalized first.
78
79 Component *c = v.visit(this);
80 if (c != this) {
81 v.post(this);
82 return c;
83 }
84
85 for (auto i = children.begin(), e = children.end(); i != e; ++i) {
86 Component *child = i->get();
87 c = (*i)->accept(v);
88 if (c != child) {
89 // Child has been replaced (new Component pointer) or we've been
90 // instructed to delete it (null).
91 i->reset(c);
92 }
93 }
94
95 // Remove deleted children.
96 children.erase(remove(children.begin(), children.end(), nullptr),
97 children.end());
98
99 v.post(this);
100 return this;
101}
102
103void ComponentSequence::accept(ConstComponentVisitor &v) const {
104 assert(!alternation); // Sequence must be finalized first.
105
106 v.pre(*this);
107
108 for (auto i = children.begin(), e = children.end(); i != e; ++i) {
109 (*i)->accept(v);
110
111 if (i + 1 != e) {
112 v.during(*this);
113 }
114 }
115
116 v.post(*this);
117}
118
119void ComponentSequence::addComponent(unique_ptr<Component> comp) {
120 children.push_back(move(comp));
121}
122
123bool ComponentSequence::addRepeat(u32 min, u32 max,
124 ComponentRepeat::RepeatType type) {
125 if (children.empty() || min > max || max == 0) {
126 return false;
127 }
128
129 // We can't apply a repeat to some types of component.
130 assert(children.back());
131 if (!children.back()->repeatable()) {
132 return false;
133 }
134
135 children.back() = makeComponentRepeat(move(children.back()), min, max,
136 type);
137 assert(children.back());
138 return true;
139}
140
141void ComponentSequence::addAlternation() {
142 if (!alternation) {
143 alternation = ue2::make_unique<ComponentAlternation>();
144 }
145
146 auto seq = ue2::make_unique<ComponentSequence>();
147 seq->children.swap(children);
148 alternation->append(move(seq));
149}
150
151void ComponentSequence::finalize() {
152 if (alternation) {
153 addAlternation();
154 assert(children.empty());
155 children.push_back(move(alternation));
156 alternation = nullptr;
157 }
158}
159
160vector<PositionInfo> ComponentSequence::first() const {
161 vector<PositionInfo> firsts, subfirsts;
162
163 for (const auto &c : children) {
164 subfirsts = c->first();
165 replaceEpsilons(firsts, subfirsts);
166 if (!c->empty()) {
167 break;
168 }
169 }
170
171 if (firsts.empty()) {
172 DEBUG_PRINTF("trivial empty sequence %zu\n", firsts.size());
173 assert(children.empty());
174 firsts.push_back(GlushkovBuildState::POS_EPSILON);
175 }
176
177 DEBUG_PRINTF("%zu firsts\n", firsts.size());
178 return firsts;
179}
180
181namespace {
182struct eps_info {
183 eps_info() : flags(0U) {}
184 u32 flags;
185};
186}
187
188static
189void epsilonVisit(vector<eps_info> *info, const vector<PositionInfo> &f) {
190 vector<eps_info> out;
191 out.reserve(info->size());
192
193 set<u32> seen_flags;
194
195 assert(!info->empty());
196 for (auto eps = find(f.begin(), f.end(), GlushkovBuildState::POS_EPSILON);
197 eps != f.end();
198 eps = find(eps + 1, f.end(), GlushkovBuildState::POS_EPSILON)) {
199 for (auto it = info->begin(); it != info->end(); ++it) {
200 u32 flags = it->flags | eps->flags;
201 if (contains(seen_flags, flags)) {
202 continue;
203 }
204
205 out.push_back(*it);
206 out.back().flags = flags;
207 seen_flags.insert(flags);
208 }
209 }
210
211 info->swap(out);
212 assert(!info->empty());
213}
214
215static
216void applyEpsilonVisits(vector<PositionInfo> &lasts,
217 const vector<eps_info> &eps_visits) {
218 vector<PositionInfo> out;
219 out.reserve(lasts.size() * eps_visits.size());
220
221 for (const auto &last : lasts) {
222 for (const auto &e : eps_visits) {
223 out.push_back(last);
224 out.back().flags |= e.flags;
225 }
226 }
227
228 cleanupPositions(out);
229 lasts.swap(out);
230}
231
232vector<PositionInfo> ComponentSequence::last() const {
233 vector<PositionInfo> lasts, sublasts;
234 vector<eps_info> visits(1);
235
236 auto i = children.rbegin(), e = children.rend();
237 for (; i != e; ++i) {
238 sublasts = (*i)->last();
239 applyEpsilonVisits(sublasts, visits);
240 lasts.insert(lasts.end(), sublasts.begin(), sublasts.end());
241 if ((*i)->empty()) {
242 // this epsilon's flags should propagate to subsequent lasts'
243 // enter/exit lists
244 epsilonVisit(&visits, (*i)->first());
245 } else {
246 break;
247 }
248 }
249
250 DEBUG_PRINTF("lasts = %s\n",
251 dumpPositions(lasts.begin(), lasts.end()).c_str());
252 return lasts;
253}
254
255bool ComponentSequence::empty(void) const {
256 // a sequence can be empty if all its subcomponents can be empty
257 for (const auto &c : children) {
258 if (!c->empty()) {
259 return false;
260 }
261 }
262 return true;
263}
264
265void ComponentSequence::notePositions(GlushkovBuildState &bs) {
266 u32 pb = bs.getBuilder().numVertices();
267 for (auto &c : children) {
268 c->notePositions(bs);
269 }
270 recordPosBounds(pb, bs.getBuilder().numVertices());
271}
272
273void ComponentSequence::buildFollowSet(GlushkovBuildState &bs,
274 const vector<PositionInfo> &lastPos) {
275 DEBUG_PRINTF("sequence of %zu components\n", children.size());
276
277 // If no components, no work to do.
278 if (children.empty()) {
279 return;
280 }
281
282 // First element
283 children.front()->buildFollowSet(bs, lastPos);
284 if (children.size() == 1) {
285 // If our sequence contains precisely one component, then we've done
286 // all our work. Hooking up its firsts and lasts will be done by our
287 // parent component.
288 return;
289 }
290
291 // Remaining elements, wiring last to first in sequence.
292
293 vector<PositionInfo> prevLasts = children.front()->last();
294
295 for (auto it = next(children.begin()), ite = children.end(); it != ite; ++it) {
296 assert(*it);
297 Component &c = *(*it);
298
299 // Build subcomponent follow set
300 c.buildFollowSet(bs, prevLasts);
301
302 // FIRST(curr)
303 vector<PositionInfo> currFirsts(c.first());
304
305 // LAST(prev) => FIRST(curr)
306 DEBUG_PRINTF("connecting lasts (|| %zu) to firsts of comp %zd\n",
307 prevLasts.size(), it - children.begin());
308 bs.connectRegions(prevLasts, currFirsts);
309
310 // Generate a new LAST(prev) for the next iteration; either c->last()
311 // on its own if it can't be empty or c->last unioned with the previous
312 // last if c can be empty
313 vector<PositionInfo> currLasts(c.last());
314
315 if (!c.empty()) {
316 // Current component can't be empty, so use its lasts only
317 prevLasts.swap(currLasts);
318 DEBUG_PRINTF("swapped lasts\n");
319 } else {
320 // Add current lasts to previous lasts
321 DEBUG_PRINTF("doing stuff for empty comp\n");
322 prevLasts.insert(prevLasts.end(), currLasts.begin(), currLasts.end());
323 DEBUG_PRINTF("done stuff for empty comp\n");
324 }
325 }
326}
327
328bool ComponentSequence::checkEmbeddedStartAnchor(bool at_start) const {
329 for (const auto &c : children) {
330 at_start = c->checkEmbeddedStartAnchor(at_start);
331 }
332
333 return at_start;
334}
335
336bool ComponentSequence::checkEmbeddedEndAnchor(bool at_end) const {
337 // Note reversed ordering.
338 for (auto i = children.rbegin(), e = children.rend(); i != e; ++i) {
339 at_end = (*i)->checkEmbeddedEndAnchor(at_end);
340 }
341
342 return at_end;
343}
344
345bool ComponentSequence::vacuous_everywhere() const {
346 for (const auto &c : children) {
347 if (!c->vacuous_everywhere()) {
348 return false;
349 }
350 }
351 return true;
352}
353
354void ComponentSequence::optimise(bool connected_to_sds) {
355 DEBUG_PRINTF("opt %d\n", (int)connected_to_sds);
356 for (u32 i = 0; i < children.size();) {
357 DEBUG_PRINTF("opt %u: ctsds: %d\n", i, (int)connected_to_sds);
358 Component &sub = *children[i];
359
360 sub.optimise(connected_to_sds);
361
362 bool vacuous = sub.vacuous_everywhere();
363
364 if (connected_to_sds && vacuous) {
365 DEBUG_PRINTF("delete opt %u\n", i);
366 auto it = children.begin() + i;
367 children.erase(it);
368 continue;
369 }
370
371 connected_to_sds = connected_to_sds && vacuous;
372 i++;
373 }
374}
375
376} // namespace ue2
377