1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief NFA graph reductions.
31 *
32 * This code attempts to make the NFA graph smaller by performing a number of
33 * local transformations:
34 *
35 * ### (1) removal of redundant vertices:
36 *
37 * v is redundant wrt to u if succ(v) is a subset of succ(u)
38 * AND pred(v) is a subset of pred(u)
39 * AND cr(v) is a subset of cr(u)
40 *
41 * ### (2) 'diamond' transformation:
42 *
43 * given succ(v) == succ(u) and pred(v) == pred(u),
44 * v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v),
45 * and cr(w) = union(cr(v), cr(u))
46 *
47 * ### (3) locally identifiable left equivalence:
48 *
49 * given pred(v) == pred(u) (**) and cr(v) == cr(u),
50 * v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v),
51 * and succ(w) = union(succ(v), succ(u))
52 *
53 * ### (4) locally identifiable right equivalence:
54 *
55 * given succ(v) == succ(u) (**) and cr(v) == cr(u),
56 * v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v),
57 * and pred(w) = union(pred(v), pred(u))
58 *
59 * NOTE (**): for left and right equivalence, we can also do the transform if
60 * set(u) contains u, set(v) contains v and the sets are otherwise equal. This
61 * enables equivalent vertices with self-loops to be merged.
62 *
63 * If v and u raise accepts, they can only be merged if they raise the same
64 * report IDs.
65 *
66 * Transformations are applied repeatedly until the graph stops changing.
67 *
68 * Note that the final graph may depend on the order in which these
69 * transformations are applied. In order to reduce the non-determinism the
70 * following order is imposed: (1); (2); (3) + (4).
71 */
72#include "ng_redundancy.h"
73
74#include "ng_holder.h"
75#include "ng_calc_components.h"
76#include "ng_dominators.h"
77#include "ng_prune.h"
78#include "ng_util.h"
79#include "ue2common.h"
80#include "util/container.h"
81#include "util/flat_containers.h"
82#include "util/graph_range.h"
83
84#include <algorithm>
85#include <cassert>
86#include <map>
87#include <set>
88#include <vector>
89
90#include <boost/graph/depth_first_search.hpp>
91#include <boost/graph/reverse_graph.hpp>
92
93using namespace std;
94
95namespace ue2 {
96
97namespace {
98
99/** Precalculated (and maintained) information about a vertex. */
100class VertexInfo {
101public:
102 flat_set<NFAVertex> pred; //!< predecessors of this vertex
103 flat_set<NFAVertex> succ; //!< successors of this vertex
104 bool isAccept = false; //!< does this vertex lead to accept?
105 bool isRemoved = false; //!< have we already removed this vertex?
106
107 size_t inDegree() const { return pred.size(); }
108 size_t outDegree() const { return succ.size(); }
109};
110
111class VertexInfoMap {
112public:
113 explicit VertexInfoMap(const NGHolder &gg)
114 : g(gg), infos(num_vertices(gg)) {}
115 VertexInfo &operator[](NFAVertex v) {
116 u32 i = g[v].index;
117 assert(i < infos.size());
118 return infos[i];
119 }
120
121 const VertexInfo &operator[](NFAVertex v) const {
122 u32 i = g[v].index;
123 assert(i < infos.size());
124 return infos[i];
125 }
126
127private:
128 const NGHolder &g;
129 vector<VertexInfo> infos;
130};
131
132} // namespace
133
134/** Populates the info map with their predecessor and successor states, and
135 * whether they are accept states. */
136static
137void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) {
138 for (auto v : vertices_range(g)) {
139 VertexInfo &info = infoMap[v];
140 assert(info.pred.empty() && info.succ.empty());
141
142 // Build successor and predecessor sets
143 insert(&info.pred, inv_adjacent_vertices(v, g));
144 insert(&info.succ, adjacent_vertices(v, g));
145
146 // Note whether the vertex is an accept state
147 if (!is_special(v, g)) {
148 if (contains(info.succ, g.accept)
149 || contains(info.succ, g.acceptEod)) {
150 info.isAccept = true;
151 }
152 }
153 }
154}
155
156/** Helper function to take the intersection of two sorted vertex sets
157 * in-place. */
158static
159void inplaceIntersection(vector<NFAVertex> &vset1,
160 const flat_set<NFAVertex> &vset2) {
161 const NFAVertex GONE = NGHolder::null_vertex();
162
163 vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end();
164 flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end();
165
166 while ((it != ite) && (jt != jte)) {
167 assert(*it != GONE);
168
169 if (*it < *jt) {
170 // present in vset1 but not in vset2. Set to null, remove in a
171 // second pass.
172 *it = GONE;
173 ++it;
174 } else if (*jt < *it) {
175 // present in vset2 but not in vset1, skip.
176 ++jt;
177 } else {
178 // present in both sets.
179 ++it; ++jt;
180 }
181 }
182
183 // Left overs are only in that set.
184 vset1.erase(it, ite);
185
186 // Remove nulls created above.
187 vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end());
188}
189
190/** Find the intersection of the successors of our predecessors. */
191static
192void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet,
193 const VertexInfoMap &infoMap,
194 vector<NFAVertex> &intersection,
195 bool considerSelf = true /* follow self loops */) {
196 /* find a good seed for the intersection */
197 const flat_set<NFAVertex> *best = nullptr;
198 for (auto u : predSet) {
199 if (!considerSelf && u == v) {
200 continue;
201 }
202
203 const flat_set<NFAVertex> &succSet = infoMap[u].succ;
204 if (!best || succSet.size() <= best->size()) {
205 best = &succSet;
206
207 // Break out if we've reduced our intersection to [v]
208 if (best->size() == 1) {
209 assert(*(best->begin()) == v);
210 intersection.push_back(v);
211 return;
212 }
213 }
214 }
215
216 if (best) {
217 insert(&intersection, intersection.end(), *best);
218 }
219
220 for (auto u : predSet) {
221 if (!considerSelf && u == v) {
222 continue;
223 }
224
225 inplaceIntersection(intersection, infoMap[u].succ);
226
227 // Check: intersection should always be at least size 1
228 assert(!intersection.empty());
229
230 // Break out if we've reduced our intersection to [v]
231 if (intersection.size() == 1) {
232 assert(*intersection.begin() == v);
233 return;
234 }
235 }
236}
237
238/** Find the intersection of the predecessors of our successors. */
239static
240void predSuccIntersection(const NFAVertex v,
241 const flat_set<NFAVertex> &succSet,
242 const VertexInfoMap &infoMap,
243 vector<NFAVertex> &intersection,
244 bool considerSelf = true /* follow self loops */) {
245 /* find a good seed for the intersection */
246 const flat_set<NFAVertex> *best = nullptr;
247 for (auto w : succSet) {
248 if (!considerSelf && w == v) {
249 continue;
250 }
251
252 const flat_set<NFAVertex> &predSet = infoMap[w].pred;
253 if (!best || predSet.size() <= best->size()) {
254 best = &predSet;
255
256 // Break out if we've reduced our intersection to [v]
257 if (best->size() == 1) {
258 assert(*(best->begin()) == v);
259 intersection.push_back(v);
260 return;
261 }
262 }
263 }
264
265 if (best) {
266 insert(&intersection, intersection.end(), *best);
267 }
268
269 for (auto w : succSet) {
270 if (!considerSelf && w == v) {
271 continue;
272 }
273
274 inplaceIntersection(intersection, infoMap[w].pred);
275
276 // Check: intersection should always be at least size 1
277 assert(!intersection.empty());
278
279 // Break out if we've reduced our intersection to [v]
280 if (intersection.size() == 1) {
281 assert(*intersection.begin() == v);
282 return;
283 }
284 }
285}
286
287/** Update containers to take into account the removal of vertex v. */
288static
289void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap,
290 set<NFAVertex> &removable) {
291 VertexInfo &info = infoMap[v];
292 assert(!info.isRemoved);
293 assert(!contains(removable, v));
294 info.isRemoved = true;
295 removable.insert(v);
296
297 // remove v from its predecessors' successors
298 for (auto u : info.pred) {
299 infoMap[u].succ.erase(v);
300 }
301
302 // remove v from its successors' predecessors
303 for (auto w : info.succ) {
304 infoMap[w].pred.erase(v);
305 }
306}
307
308static
309bool hasInEdgeTops(const NGHolder &g, NFAVertex v) {
310 NFAEdge e = edge(g.start, v, g);
311 return e && !g[e].tops.empty();
312}
313
314/** Transform (1), removal of redundant vertices. */
315static
316bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
317 set<NFAVertex> &removable) {
318 /* useless merges can be done in any order, no need to take any care with
319 * ordering */
320
321 // Temporary vectors used for intersections below
322 vector<NFAVertex> succPredSet, predSuccSet, intersection;
323
324 bool changed = false;
325 for (auto v : vertices_range(g)) {
326 VertexInfo &info = infoMap[v];
327
328 if (info.isRemoved) {
329 continue;
330 }
331
332 assert(!contains(removable, v));
333
334 if (is_special(v, g)) {
335 continue;
336 }
337
338 /* we do not need to check for out edge tops - as only specials (start)
339 * can have tops and they are already disqualified. */
340 if (hasInEdgeTops(g, v)) {
341 continue; // Conservatively skip anything with nonzero tops.
342 }
343
344 if (info.pred.empty() || info.succ.empty()) {
345 DEBUG_PRINTF("vertex %zu has empty pred/succ list\n", g[v].index);
346 assert(0); // non-special states should always have succ/pred lists
347 continue;
348 }
349
350 // The following cases are more complex and rely on the intersection of
351 // Succ(Pred(v)) and Pred(Succ(v))
352
353 // Compute intersections, operating on the smaller set first
354 // Note that we use vectors here, as set_intersection underneath
355 // guarantees sorted output, and vectors were quite a bit
356 // faster than sets or lists.
357
358 succPredSet.clear();
359 predSuccSet.clear();
360
361 if (info.pred.size() <= info.succ.size()) {
362 succPredIntersection(v, info.pred, infoMap, succPredSet);
363 if (succPredSet.size() == 1) {
364 // nobody in here but us chickens
365 assert(*succPredSet.begin() == v);
366 continue;
367 }
368 predSuccIntersection(v, info.succ, infoMap, predSuccSet);
369 if (predSuccSet.size() == 1) {
370 assert(*predSuccSet.begin() == v);
371 continue;
372 }
373 } else {
374 predSuccIntersection(v, info.succ, infoMap, predSuccSet);
375 if (predSuccSet.size() == 1) {
376 assert(*predSuccSet.begin() == v);
377 continue;
378 }
379 succPredIntersection(v, info.pred, infoMap, succPredSet);
380 if (succPredSet.size() == 1) {
381 assert(*succPredSet.begin() == v);
382 continue;
383 }
384 }
385
386 // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
387 intersection.clear();
388 set_intersection(succPredSet.begin(), succPredSet.end(),
389 predSuccSet.begin(), predSuccSet.end(),
390 back_inserter(intersection));
391
392 /* Boring if it is just us in the intersection */
393 if (intersection.size() < 2) {
394 continue;
395 }
396
397 // Compare char_reach, mark v for removal if any members of
398 // the intersection have an equal or greater reach
399 const CharReach &currReach = g[v].char_reach;
400 const auto &currReports = g[v].reports;
401 for (auto t : intersection) {
402 const VertexInfo &info2 = infoMap[t];
403
404 /* start is never a succ of a state, so will never be in the
405 * predsucc/succpred intersection */
406 assert(t != g.start);
407
408 if (t == v || info2.isRemoved) {
409 continue;
410 }
411
412 // For each candidate C to make V redundant, check:
413 // if V is an accept state, C must be an accept state for
414 // the same pattern
415 // pred(C) is a superset of pred(V)
416 // succ(C) is a superset of succ(V)
417 // reach(C) is a superset of reach(V)
418 //
419 // Note: pred/sec tests are covered by the intersections
420 // calculated above.
421
422 /* note: links to accepts are also tracked in succs */
423 if (info.isAccept && currReports != g[t].reports) {
424 continue;
425 }
426
427 if (som) {
428 if (t == g.startDs) {
429 continue;
430 }
431 if (is_virtual_start(t, g) != is_virtual_start(v, g)) {
432 continue;
433 }
434 }
435
436 /* we do not need to check for out edge tops - as only start
437 * can have tops and it has already been ruled out. */
438 if (hasInEdgeTops(g, t)) {
439 continue; // Conservatively skip anything with nonzero tops.
440 }
441
442 CharReach &otherReach = g[t].char_reach;
443 if (currReach.isSubsetOf(otherReach)) {
444 DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n",
445 g[v].index, g[t].index);
446 markForRemoval(v, infoMap, removable);
447 changed = true;
448 break;
449 }
450 }
451 }
452
453 return changed;
454}
455
456/** Transform (2), diamond merge pass. */
457static
458bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
459 set<NFAVertex> &removable) {
460 // Temporary vectors used for intersections below
461 vector<NFAVertex> succPredSet, predSuccSet, intersection;
462
463 bool changed = false;
464 for (auto v : vertices_range(g)) {
465 VertexInfo &info = infoMap[v];
466
467 if (info.isRemoved) {
468 continue;
469 }
470
471 assert(!contains(removable, v));
472
473 if (is_special(v, g)) {
474 continue;
475 }
476
477 /* we do not need to check for out edge tops - as only specials (start)
478 * can have tops and they are already disqualified. */
479 if (hasInEdgeTops(g, v)) {
480 continue; // Conservatively skip anything with nonzero tops.
481 }
482
483 if (info.pred.empty() || info.succ.empty()) {
484 assert(0); // non-special states should always have succ/pred lists
485 continue;
486 }
487
488 // The following cases are more complex and rely on the intersection of
489 // Succ(Pred(v)) and Pred(Succ(v))
490
491 // Compute intersections, operating on the smaller set first
492 // Note that we use vectors here, as set_intersection underneath
493 // guarantees sorted output, and vectors were quite a bit faster than
494 // sets or lists.
495
496 succPredSet.clear();
497 predSuccSet.clear();
498
499 if (info.pred.size() <= info.succ.size()) {
500 succPredIntersection(v, info.pred, infoMap, succPredSet);
501 if (succPredSet.size() == 1) {
502 // nobody in here but us chickens
503 assert(*succPredSet.begin() == v);
504 continue;
505 }
506 predSuccIntersection(v, info.succ, infoMap, predSuccSet);
507 if (predSuccSet.size() == 1) {
508 assert(*predSuccSet.begin() == v);
509 continue;
510 }
511 } else {
512 predSuccIntersection(v, info.succ, infoMap, predSuccSet);
513 if (predSuccSet.size() == 1) {
514 assert(*predSuccSet.begin() == v);
515 continue;
516 }
517 succPredIntersection(v, info.pred, infoMap, succPredSet);
518 if (succPredSet.size() == 1) {
519 assert(*succPredSet.begin() == v);
520 continue;
521 }
522 }
523
524 // Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
525 intersection.clear();
526 set_intersection(succPredSet.begin(), succPredSet.end(),
527 predSuccSet.begin(), predSuccSet.end(),
528 back_inserter(intersection));
529
530 /* Boring if it is just us in the intersection */
531 if (intersection.size() < 2) {
532 continue;
533 }
534
535 const CharReach &currReach = g[v].char_reach;
536 const auto &currReports = g[v].reports;
537 for (auto t : intersection) {
538 const VertexInfo &info2 = infoMap[t];
539
540 if (t == v || info2.isRemoved || is_special(t, g)) {
541 continue;
542 }
543
544 /* note: links to accepts are also tracked in succs */
545 if (info.isAccept && currReports != g[t].reports) {
546 continue;
547 }
548
549 /* we do not need to check for out edge tops - as only specials
550 * (start) can have tops and they are already disqualified. */
551 if (hasInEdgeTops(g, t)) {
552 continue; // Conservatively skip anything with nonzero tops.
553 }
554
555 if (som) {
556 if (is_virtual_start(v, g) != is_virtual_start(t, g)) {
557 continue; // can only merge like with like.
558 }
559 }
560
561 // If in-degree of v == in-degree of target
562 // and out-degree of v == out-degree of target
563 // (because pred and succ are supersets)
564 // then combine charreach of v into target and remove v
565 if (info.inDegree() == info2.inDegree()
566 && info.outDegree() == info2.outDegree()) {
567 // add character reachability of v into target
568 CharReach &otherReach = g[t].char_reach;
569 otherReach |= currReach;
570 // v can be removed
571 DEBUG_PRINTF("removing redundant vertex %zu and merging "
572 "reachability with vertex %zu\n",
573 g[v].index, g[t].index);
574 markForRemoval(v, infoMap, removable);
575 changed = true;
576 break;
577 }
578 }
579 }
580
581 return changed;
582}
583
584namespace {
585
586struct ReachMismatch {};
587
588class ReachSubsetVisitor : public boost::default_dfs_visitor {
589public:
590 explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {}
591
592 template <class Graph, class Vertex>
593 void discover_vertex(const Vertex &v, const Graph &g) const {
594 if (is_any_start(v, g)) {
595 return; // start vertices are OK
596 } else if (is_special(v, g)) {
597 assert(0);
598 throw ReachMismatch(); // other special nodes??
599 }
600
601 const CharReach &vcr = g[v].char_reach;
602 DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n", vcr.count(),
603 cr.count());
604 if (vcr != (vcr & cr)) {
605 throw ReachMismatch();
606 }
607 }
608
609private:
610 const CharReach &cr;
611};
612
613/** Terminator function for DFS used in pathReachSubset. */
614template <class Graph, class Vertex> class VertexIs {
615public:
616 explicit VertexIs(const Vertex &v) : vertex(v) {}
617 bool operator()(const Vertex &v, const Graph &) const {
618 return v == vertex;
619 }
620
621private:
622 Vertex vertex;
623};
624
625} // namespace
626
627/** Returns true if every vertex on paths leading to edge \p e has reachability
628 * which is a subset of the reachability of \p dom */
629static
630bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom,
631 const NGHolder &g) {
632 const CharReach &domReach = g[dom].char_reach;
633 if (domReach.all()) {
634 return true;
635 }
636
637 NFAVertex start = source(e, g);
638 using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>;
639 map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
640
641 // Walk the graph backwards from v, examining each node. We fail (return
642 // false) if we encounter a node with reach NOT a subset of domReach, and
643 // we stop searching at dom.
644 try {
645 depth_first_visit(RevGraph(g), start,
646 ReachSubsetVisitor(domReach),
647 make_assoc_property_map(vertexColor),
648 VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom));
649 } catch(ReachMismatch&) {
650 return false;
651 }
652
653 return true;
654}
655
656/** Returns true if every vertex on paths leading from edge \p e has
657 * reachability which is a subset of the reachability of \p dom */
658static
659bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom,
660 const NGHolder &g) {
661 const CharReach &domReach = g[dom].char_reach;
662 if (domReach.all()) {
663 return true;
664 }
665
666 NFAVertex start = target(e, g);
667 map<NFAVertex, boost::default_color_type> vertexColor;
668
669 // Walk the graph forward from v, examining each node. We fail (return
670 // false) if we encounter a node with reach NOT a subset of domReach, and
671 // we stop searching at dom.
672 try {
673 depth_first_visit(g, start, ReachSubsetVisitor(domReach),
674 make_assoc_property_map(vertexColor),
675 VertexIs<NGHolder, NFAVertex>(dom));
676 } catch(ReachMismatch&) {
677 return false;
678 }
679
680 return true;
681}
682
683static
684bool allOutsSpecial(NFAVertex v, const NGHolder &g) {
685 for (auto w : adjacent_vertices_range(v, g)) {
686 if (!is_special(w, g)) {
687 return false;
688 }
689 }
690 return true;
691}
692
693static
694bool allInsSpecial(NFAVertex v, const NGHolder &g) {
695 for (auto u : inv_adjacent_vertices_range(v, g)) {
696 if (!is_special(u, g)) {
697 return false;
698 }
699 }
700 return true;
701}
702
703/** Cheaply check whether this graph can't be reduced at all, because it is
704 * just a chain of vertices with no other edges. */
705static
706bool isIrreducible(const NGHolder &g) {
707 for (auto v : vertices_range(g)) {
708 // skip specials
709 if (is_special(v, g)) {
710 continue;
711 }
712
713 if (in_degree(v, g) != 1 && !allInsSpecial(v, g)) {
714 return false;
715 }
716 if (out_degree(v, g) != 1 && !allOutsSpecial(v, g)) {
717 return false;
718 }
719 }
720
721 /* if calcComponents got sleepy and went home, the above checks don't hold
722 * as it assumes there is only one connected component. */
723 if (isAlternationOfClasses(g)) {
724 return false;
725 }
726
727 return true;
728}
729
730static
731u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) {
732 u32 count = 0;
733
734 cyclic.resize(num_vertices(g));
735
736 for (auto v : vertices_range(g)) {
737 assert(g[v].index < cyclic.size());
738 if (hasSelfLoop(v, g)) {
739 count++;
740 cyclic[g[v].index] = true;
741 }
742 }
743
744 return count;
745}
746
747static
748void findCyclicDom(NGHolder &g, vector<bool> &cyclic,
749 set<NFAEdge> &dead, som_type som) {
750 auto dominators = findDominators(g);
751
752 for (auto v : vertices_range(g)) {
753 if (is_special(v, g)) {
754 continue;
755 }
756
757 // Path in through a dominator (e.g. '.+a?foobar')
758 NFAVertex dom = dominators[v];
759 if (dom && cyclic[g[dom].index]
760 && edge(dom, v, g).second) {
761
762 if (som && dom == g.startDs) {
763 continue;
764 }
765
766 DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic "
767 "vertex %zu\n", g[v].index, g[dom].index);
768
769 // iff all paths through in-edge e of v involve vertices whose
770 // reachability is a subset of reach(dom), we can delete edge e.
771 for (const auto &e : in_edges_range(v, g)) {
772 if (source(e, g) == dom) {
773 continue;
774 }
775
776 if (reversePathReachSubset(e, dom, g)) {
777 DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading "
778 "paths share dom reach\n",
779 g[source(e, g)].index, g[target(e, g)].index);
780 dead.insert(e);
781 if (source(e, g) == v) {
782 cyclic[g[v].index] = false;
783 }
784 continue;
785 }
786 }
787 }
788 }
789}
790
791static
792void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic,
793 set<NFAEdge> &dead) {
794 auto postdominators = findPostDominators(g);
795
796 for (auto v : vertices_range(g)) {
797 if (is_special(v, g)) {
798 continue;
799 }
800
801 // Path out through a post-dominator (e.g. a?.+foobar')
802 NFAVertex postdom = postdominators[v];
803 if (postdom && cyclic[g[postdom].index] && edge(v, postdom, g).second) {
804 DEBUG_PRINTF("vertex %zu is postdominated by directly-connected "
805 "cyclic vertex %zu\n", g[v].index, g[postdom].index);
806
807 // iff all paths through in-edge e of v involve vertices whose
808 // reachability is a subset of reach(dom), we can delete edge e.
809 for (const auto &e : out_edges_range(v, g)) {
810 if (target(e, g) == postdom) {
811 continue;
812 }
813
814 if (forwardPathReachSubset(e, postdom, g)) {
815 DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing "
816 "paths share postdom reach\n",
817 g[source(e, g)].index, g[target(e, g)].index);
818 if (target(e, g) == v) {
819 cyclic[g[v].index] = false;
820 }
821 dead.insert(e);
822 continue;
823 }
824 }
825 }
826 }
827}
828
829bool removeRedundancy(NGHolder &g, som_type som) {
830 DEBUG_PRINTF("rr som = %d\n", (int)som);
831 renumber_vertices(g);
832
833 // Cheap check: if all the non-special vertices have in-degree one and
834 // out-degree one, there's no redundancy in this here graph and we can
835 // vamoose.
836 if (isIrreducible(g)) {
837 return false;
838 }
839
840 VertexInfoMap infoMap(g);
841
842 // Populate maps of successors and predecessors, and accept status
843 populateContainers(g, infoMap);
844
845 /* Run multiple passes: terminate when a full pass doesn't remove
846 * any vertices */
847 bool doUseless = true;
848 bool doDiamond = true;
849 set<NFAVertex> removable;
850 while (doUseless || doDiamond) {
851 if (doUseless
852 && doUselessMergePass(g, som, infoMap, removable)) {
853 doDiamond = true;
854 }
855 doUseless = false;
856
857 if (doDiamond
858 && doDiamondMergePass(g, som, infoMap, removable)) {
859 doUseless = true;
860 }
861 doDiamond = false;
862 }
863 DEBUG_PRINTF("found %zu removable vertices overall.\n", removable.size());
864 remove_vertices(removable, g);
865
866 return !removable.empty();
867}
868
869/** UE-524: remove edges into nodes that are dominated by cyclic nodes with
870 * reachability that is a superset of all paths feeding into that edge. */
871bool removeCyclicDominated(NGHolder &g, som_type som) {
872 set<NFAEdge> dead;
873 vector<bool> cyclic;
874 bool changed = false;
875
876 findCyclic(g, cyclic);
877
878 findCyclicDom(g, cyclic, dead, som);
879 if (!dead.empty()) {
880 remove_edges(dead, g);
881 pruneUseless(g);
882 dead.clear();
883 cyclic.clear(); // need to recalculate cyclic as ids have changed
884 findCyclic(g, cyclic);
885 changed = true;
886 }
887
888 findCyclicPostDom(g, cyclic, dead);
889 if (!dead.empty()) {
890 remove_edges(dead, g);
891 pruneUseless(g);
892 dead.clear();
893 changed = true;
894 }
895
896 return changed;
897}
898
899} // namespace ue2
900