1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief NFA graph reductions. |
31 | * |
32 | * This code attempts to make the NFA graph smaller by performing a number of |
33 | * local transformations: |
34 | * |
35 | * ### (1) removal of redundant vertices: |
36 | * |
37 | * v is redundant wrt to u if succ(v) is a subset of succ(u) |
38 | * AND pred(v) is a subset of pred(u) |
39 | * AND cr(v) is a subset of cr(u) |
40 | * |
41 | * ### (2) 'diamond' transformation: |
42 | * |
43 | * given succ(v) == succ(u) and pred(v) == pred(u), |
44 | * v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v), |
45 | * and cr(w) = union(cr(v), cr(u)) |
46 | * |
47 | * ### (3) locally identifiable left equivalence: |
48 | * |
49 | * given pred(v) == pred(u) (**) and cr(v) == cr(u), |
50 | * v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v), |
51 | * and succ(w) = union(succ(v), succ(u)) |
52 | * |
53 | * ### (4) locally identifiable right equivalence: |
54 | * |
55 | * given succ(v) == succ(u) (**) and cr(v) == cr(u), |
56 | * v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v), |
57 | * and pred(w) = union(pred(v), pred(u)) |
58 | * |
59 | * NOTE (**): for left and right equivalence, we can also do the transform if |
60 | * set(u) contains u, set(v) contains v and the sets are otherwise equal. This |
61 | * enables equivalent vertices with self-loops to be merged. |
62 | * |
63 | * If v and u raise accepts, they can only be merged if they raise the same |
64 | * report IDs. |
65 | * |
66 | * Transformations are applied repeatedly until the graph stops changing. |
67 | * |
68 | * Note that the final graph may depend on the order in which these |
69 | * transformations are applied. In order to reduce the non-determinism the |
70 | * following order is imposed: (1); (2); (3) + (4). |
71 | */ |
72 | #include "ng_redundancy.h" |
73 | |
74 | #include "ng_holder.h" |
75 | #include "ng_calc_components.h" |
76 | #include "ng_dominators.h" |
77 | #include "ng_prune.h" |
78 | #include "ng_util.h" |
79 | #include "ue2common.h" |
80 | #include "util/container.h" |
81 | #include "util/flat_containers.h" |
82 | #include "util/graph_range.h" |
83 | |
84 | #include <algorithm> |
85 | #include <cassert> |
86 | #include <map> |
87 | #include <set> |
88 | #include <vector> |
89 | |
90 | #include <boost/graph/depth_first_search.hpp> |
91 | #include <boost/graph/reverse_graph.hpp> |
92 | |
93 | using namespace std; |
94 | |
95 | namespace ue2 { |
96 | |
97 | namespace { |
98 | |
99 | /** Precalculated (and maintained) information about a vertex. */ |
100 | class VertexInfo { |
101 | public: |
102 | flat_set<NFAVertex> pred; //!< predecessors of this vertex |
103 | flat_set<NFAVertex> succ; //!< successors of this vertex |
104 | bool isAccept = false; //!< does this vertex lead to accept? |
105 | bool isRemoved = false; //!< have we already removed this vertex? |
106 | |
107 | size_t inDegree() const { return pred.size(); } |
108 | size_t outDegree() const { return succ.size(); } |
109 | }; |
110 | |
111 | class VertexInfoMap { |
112 | public: |
113 | explicit VertexInfoMap(const NGHolder &gg) |
114 | : g(gg), infos(num_vertices(gg)) {} |
115 | VertexInfo &operator[](NFAVertex v) { |
116 | u32 i = g[v].index; |
117 | assert(i < infos.size()); |
118 | return infos[i]; |
119 | } |
120 | |
121 | const VertexInfo &operator[](NFAVertex v) const { |
122 | u32 i = g[v].index; |
123 | assert(i < infos.size()); |
124 | return infos[i]; |
125 | } |
126 | |
127 | private: |
128 | const NGHolder &g; |
129 | vector<VertexInfo> infos; |
130 | }; |
131 | |
132 | } // namespace |
133 | |
134 | /** Populates the info map with their predecessor and successor states, and |
135 | * whether they are accept states. */ |
136 | static |
137 | void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) { |
138 | for (auto v : vertices_range(g)) { |
139 | VertexInfo &info = infoMap[v]; |
140 | assert(info.pred.empty() && info.succ.empty()); |
141 | |
142 | // Build successor and predecessor sets |
143 | insert(&info.pred, inv_adjacent_vertices(v, g)); |
144 | insert(&info.succ, adjacent_vertices(v, g)); |
145 | |
146 | // Note whether the vertex is an accept state |
147 | if (!is_special(v, g)) { |
148 | if (contains(info.succ, g.accept) |
149 | || contains(info.succ, g.acceptEod)) { |
150 | info.isAccept = true; |
151 | } |
152 | } |
153 | } |
154 | } |
155 | |
156 | /** Helper function to take the intersection of two sorted vertex sets |
157 | * in-place. */ |
158 | static |
159 | void inplaceIntersection(vector<NFAVertex> &vset1, |
160 | const flat_set<NFAVertex> &vset2) { |
161 | const NFAVertex GONE = NGHolder::null_vertex(); |
162 | |
163 | vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end(); |
164 | flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end(); |
165 | |
166 | while ((it != ite) && (jt != jte)) { |
167 | assert(*it != GONE); |
168 | |
169 | if (*it < *jt) { |
170 | // present in vset1 but not in vset2. Set to null, remove in a |
171 | // second pass. |
172 | *it = GONE; |
173 | ++it; |
174 | } else if (*jt < *it) { |
175 | // present in vset2 but not in vset1, skip. |
176 | ++jt; |
177 | } else { |
178 | // present in both sets. |
179 | ++it; ++jt; |
180 | } |
181 | } |
182 | |
183 | // Left overs are only in that set. |
184 | vset1.erase(it, ite); |
185 | |
186 | // Remove nulls created above. |
187 | vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end()); |
188 | } |
189 | |
190 | /** Find the intersection of the successors of our predecessors. */ |
191 | static |
192 | void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet, |
193 | const VertexInfoMap &infoMap, |
194 | vector<NFAVertex> &intersection, |
195 | bool considerSelf = true /* follow self loops */) { |
196 | /* find a good seed for the intersection */ |
197 | const flat_set<NFAVertex> *best = nullptr; |
198 | for (auto u : predSet) { |
199 | if (!considerSelf && u == v) { |
200 | continue; |
201 | } |
202 | |
203 | const flat_set<NFAVertex> &succSet = infoMap[u].succ; |
204 | if (!best || succSet.size() <= best->size()) { |
205 | best = &succSet; |
206 | |
207 | // Break out if we've reduced our intersection to [v] |
208 | if (best->size() == 1) { |
209 | assert(*(best->begin()) == v); |
210 | intersection.push_back(v); |
211 | return; |
212 | } |
213 | } |
214 | } |
215 | |
216 | if (best) { |
217 | insert(&intersection, intersection.end(), *best); |
218 | } |
219 | |
220 | for (auto u : predSet) { |
221 | if (!considerSelf && u == v) { |
222 | continue; |
223 | } |
224 | |
225 | inplaceIntersection(intersection, infoMap[u].succ); |
226 | |
227 | // Check: intersection should always be at least size 1 |
228 | assert(!intersection.empty()); |
229 | |
230 | // Break out if we've reduced our intersection to [v] |
231 | if (intersection.size() == 1) { |
232 | assert(*intersection.begin() == v); |
233 | return; |
234 | } |
235 | } |
236 | } |
237 | |
238 | /** Find the intersection of the predecessors of our successors. */ |
239 | static |
240 | void predSuccIntersection(const NFAVertex v, |
241 | const flat_set<NFAVertex> &succSet, |
242 | const VertexInfoMap &infoMap, |
243 | vector<NFAVertex> &intersection, |
244 | bool considerSelf = true /* follow self loops */) { |
245 | /* find a good seed for the intersection */ |
246 | const flat_set<NFAVertex> *best = nullptr; |
247 | for (auto w : succSet) { |
248 | if (!considerSelf && w == v) { |
249 | continue; |
250 | } |
251 | |
252 | const flat_set<NFAVertex> &predSet = infoMap[w].pred; |
253 | if (!best || predSet.size() <= best->size()) { |
254 | best = &predSet; |
255 | |
256 | // Break out if we've reduced our intersection to [v] |
257 | if (best->size() == 1) { |
258 | assert(*(best->begin()) == v); |
259 | intersection.push_back(v); |
260 | return; |
261 | } |
262 | } |
263 | } |
264 | |
265 | if (best) { |
266 | insert(&intersection, intersection.end(), *best); |
267 | } |
268 | |
269 | for (auto w : succSet) { |
270 | if (!considerSelf && w == v) { |
271 | continue; |
272 | } |
273 | |
274 | inplaceIntersection(intersection, infoMap[w].pred); |
275 | |
276 | // Check: intersection should always be at least size 1 |
277 | assert(!intersection.empty()); |
278 | |
279 | // Break out if we've reduced our intersection to [v] |
280 | if (intersection.size() == 1) { |
281 | assert(*intersection.begin() == v); |
282 | return; |
283 | } |
284 | } |
285 | } |
286 | |
287 | /** Update containers to take into account the removal of vertex v. */ |
288 | static |
289 | void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap, |
290 | set<NFAVertex> &removable) { |
291 | VertexInfo &info = infoMap[v]; |
292 | assert(!info.isRemoved); |
293 | assert(!contains(removable, v)); |
294 | info.isRemoved = true; |
295 | removable.insert(v); |
296 | |
297 | // remove v from its predecessors' successors |
298 | for (auto u : info.pred) { |
299 | infoMap[u].succ.erase(v); |
300 | } |
301 | |
302 | // remove v from its successors' predecessors |
303 | for (auto w : info.succ) { |
304 | infoMap[w].pred.erase(v); |
305 | } |
306 | } |
307 | |
308 | static |
309 | bool hasInEdgeTops(const NGHolder &g, NFAVertex v) { |
310 | NFAEdge e = edge(g.start, v, g); |
311 | return e && !g[e].tops.empty(); |
312 | } |
313 | |
314 | /** Transform (1), removal of redundant vertices. */ |
315 | static |
316 | bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, |
317 | set<NFAVertex> &removable) { |
318 | /* useless merges can be done in any order, no need to take any care with |
319 | * ordering */ |
320 | |
321 | // Temporary vectors used for intersections below |
322 | vector<NFAVertex> succPredSet, predSuccSet, intersection; |
323 | |
324 | bool changed = false; |
325 | for (auto v : vertices_range(g)) { |
326 | VertexInfo &info = infoMap[v]; |
327 | |
328 | if (info.isRemoved) { |
329 | continue; |
330 | } |
331 | |
332 | assert(!contains(removable, v)); |
333 | |
334 | if (is_special(v, g)) { |
335 | continue; |
336 | } |
337 | |
338 | /* we do not need to check for out edge tops - as only specials (start) |
339 | * can have tops and they are already disqualified. */ |
340 | if (hasInEdgeTops(g, v)) { |
341 | continue; // Conservatively skip anything with nonzero tops. |
342 | } |
343 | |
344 | if (info.pred.empty() || info.succ.empty()) { |
345 | DEBUG_PRINTF("vertex %zu has empty pred/succ list\n" , g[v].index); |
346 | assert(0); // non-special states should always have succ/pred lists |
347 | continue; |
348 | } |
349 | |
350 | // The following cases are more complex and rely on the intersection of |
351 | // Succ(Pred(v)) and Pred(Succ(v)) |
352 | |
353 | // Compute intersections, operating on the smaller set first |
354 | // Note that we use vectors here, as set_intersection underneath |
355 | // guarantees sorted output, and vectors were quite a bit |
356 | // faster than sets or lists. |
357 | |
358 | succPredSet.clear(); |
359 | predSuccSet.clear(); |
360 | |
361 | if (info.pred.size() <= info.succ.size()) { |
362 | succPredIntersection(v, info.pred, infoMap, succPredSet); |
363 | if (succPredSet.size() == 1) { |
364 | // nobody in here but us chickens |
365 | assert(*succPredSet.begin() == v); |
366 | continue; |
367 | } |
368 | predSuccIntersection(v, info.succ, infoMap, predSuccSet); |
369 | if (predSuccSet.size() == 1) { |
370 | assert(*predSuccSet.begin() == v); |
371 | continue; |
372 | } |
373 | } else { |
374 | predSuccIntersection(v, info.succ, infoMap, predSuccSet); |
375 | if (predSuccSet.size() == 1) { |
376 | assert(*predSuccSet.begin() == v); |
377 | continue; |
378 | } |
379 | succPredIntersection(v, info.pred, infoMap, succPredSet); |
380 | if (succPredSet.size() == 1) { |
381 | assert(*succPredSet.begin() == v); |
382 | continue; |
383 | } |
384 | } |
385 | |
386 | // Find the intersection of Succ(Pred(v)) and Pred(Succ(v)) |
387 | intersection.clear(); |
388 | set_intersection(succPredSet.begin(), succPredSet.end(), |
389 | predSuccSet.begin(), predSuccSet.end(), |
390 | back_inserter(intersection)); |
391 | |
392 | /* Boring if it is just us in the intersection */ |
393 | if (intersection.size() < 2) { |
394 | continue; |
395 | } |
396 | |
397 | // Compare char_reach, mark v for removal if any members of |
398 | // the intersection have an equal or greater reach |
399 | const CharReach &currReach = g[v].char_reach; |
400 | const auto &currReports = g[v].reports; |
401 | for (auto t : intersection) { |
402 | const VertexInfo &info2 = infoMap[t]; |
403 | |
404 | /* start is never a succ of a state, so will never be in the |
405 | * predsucc/succpred intersection */ |
406 | assert(t != g.start); |
407 | |
408 | if (t == v || info2.isRemoved) { |
409 | continue; |
410 | } |
411 | |
412 | // For each candidate C to make V redundant, check: |
413 | // if V is an accept state, C must be an accept state for |
414 | // the same pattern |
415 | // pred(C) is a superset of pred(V) |
416 | // succ(C) is a superset of succ(V) |
417 | // reach(C) is a superset of reach(V) |
418 | // |
419 | // Note: pred/sec tests are covered by the intersections |
420 | // calculated above. |
421 | |
422 | /* note: links to accepts are also tracked in succs */ |
423 | if (info.isAccept && currReports != g[t].reports) { |
424 | continue; |
425 | } |
426 | |
427 | if (som) { |
428 | if (t == g.startDs) { |
429 | continue; |
430 | } |
431 | if (is_virtual_start(t, g) != is_virtual_start(v, g)) { |
432 | continue; |
433 | } |
434 | } |
435 | |
436 | /* we do not need to check for out edge tops - as only start |
437 | * can have tops and it has already been ruled out. */ |
438 | if (hasInEdgeTops(g, t)) { |
439 | continue; // Conservatively skip anything with nonzero tops. |
440 | } |
441 | |
442 | CharReach &otherReach = g[t].char_reach; |
443 | if (currReach.isSubsetOf(otherReach)) { |
444 | DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n" , |
445 | g[v].index, g[t].index); |
446 | markForRemoval(v, infoMap, removable); |
447 | changed = true; |
448 | break; |
449 | } |
450 | } |
451 | } |
452 | |
453 | return changed; |
454 | } |
455 | |
456 | /** Transform (2), diamond merge pass. */ |
457 | static |
458 | bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, |
459 | set<NFAVertex> &removable) { |
460 | // Temporary vectors used for intersections below |
461 | vector<NFAVertex> succPredSet, predSuccSet, intersection; |
462 | |
463 | bool changed = false; |
464 | for (auto v : vertices_range(g)) { |
465 | VertexInfo &info = infoMap[v]; |
466 | |
467 | if (info.isRemoved) { |
468 | continue; |
469 | } |
470 | |
471 | assert(!contains(removable, v)); |
472 | |
473 | if (is_special(v, g)) { |
474 | continue; |
475 | } |
476 | |
477 | /* we do not need to check for out edge tops - as only specials (start) |
478 | * can have tops and they are already disqualified. */ |
479 | if (hasInEdgeTops(g, v)) { |
480 | continue; // Conservatively skip anything with nonzero tops. |
481 | } |
482 | |
483 | if (info.pred.empty() || info.succ.empty()) { |
484 | assert(0); // non-special states should always have succ/pred lists |
485 | continue; |
486 | } |
487 | |
488 | // The following cases are more complex and rely on the intersection of |
489 | // Succ(Pred(v)) and Pred(Succ(v)) |
490 | |
491 | // Compute intersections, operating on the smaller set first |
492 | // Note that we use vectors here, as set_intersection underneath |
493 | // guarantees sorted output, and vectors were quite a bit faster than |
494 | // sets or lists. |
495 | |
496 | succPredSet.clear(); |
497 | predSuccSet.clear(); |
498 | |
499 | if (info.pred.size() <= info.succ.size()) { |
500 | succPredIntersection(v, info.pred, infoMap, succPredSet); |
501 | if (succPredSet.size() == 1) { |
502 | // nobody in here but us chickens |
503 | assert(*succPredSet.begin() == v); |
504 | continue; |
505 | } |
506 | predSuccIntersection(v, info.succ, infoMap, predSuccSet); |
507 | if (predSuccSet.size() == 1) { |
508 | assert(*predSuccSet.begin() == v); |
509 | continue; |
510 | } |
511 | } else { |
512 | predSuccIntersection(v, info.succ, infoMap, predSuccSet); |
513 | if (predSuccSet.size() == 1) { |
514 | assert(*predSuccSet.begin() == v); |
515 | continue; |
516 | } |
517 | succPredIntersection(v, info.pred, infoMap, succPredSet); |
518 | if (succPredSet.size() == 1) { |
519 | assert(*succPredSet.begin() == v); |
520 | continue; |
521 | } |
522 | } |
523 | |
524 | // Find the intersection of Succ(Pred(v)) and Pred(Succ(v)) |
525 | intersection.clear(); |
526 | set_intersection(succPredSet.begin(), succPredSet.end(), |
527 | predSuccSet.begin(), predSuccSet.end(), |
528 | back_inserter(intersection)); |
529 | |
530 | /* Boring if it is just us in the intersection */ |
531 | if (intersection.size() < 2) { |
532 | continue; |
533 | } |
534 | |
535 | const CharReach &currReach = g[v].char_reach; |
536 | const auto &currReports = g[v].reports; |
537 | for (auto t : intersection) { |
538 | const VertexInfo &info2 = infoMap[t]; |
539 | |
540 | if (t == v || info2.isRemoved || is_special(t, g)) { |
541 | continue; |
542 | } |
543 | |
544 | /* note: links to accepts are also tracked in succs */ |
545 | if (info.isAccept && currReports != g[t].reports) { |
546 | continue; |
547 | } |
548 | |
549 | /* we do not need to check for out edge tops - as only specials |
550 | * (start) can have tops and they are already disqualified. */ |
551 | if (hasInEdgeTops(g, t)) { |
552 | continue; // Conservatively skip anything with nonzero tops. |
553 | } |
554 | |
555 | if (som) { |
556 | if (is_virtual_start(v, g) != is_virtual_start(t, g)) { |
557 | continue; // can only merge like with like. |
558 | } |
559 | } |
560 | |
561 | // If in-degree of v == in-degree of target |
562 | // and out-degree of v == out-degree of target |
563 | // (because pred and succ are supersets) |
564 | // then combine charreach of v into target and remove v |
565 | if (info.inDegree() == info2.inDegree() |
566 | && info.outDegree() == info2.outDegree()) { |
567 | // add character reachability of v into target |
568 | CharReach &otherReach = g[t].char_reach; |
569 | otherReach |= currReach; |
570 | // v can be removed |
571 | DEBUG_PRINTF("removing redundant vertex %zu and merging " |
572 | "reachability with vertex %zu\n" , |
573 | g[v].index, g[t].index); |
574 | markForRemoval(v, infoMap, removable); |
575 | changed = true; |
576 | break; |
577 | } |
578 | } |
579 | } |
580 | |
581 | return changed; |
582 | } |
583 | |
584 | namespace { |
585 | |
586 | struct ReachMismatch {}; |
587 | |
588 | class ReachSubsetVisitor : public boost::default_dfs_visitor { |
589 | public: |
590 | explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {} |
591 | |
592 | template <class Graph, class Vertex> |
593 | void discover_vertex(const Vertex &v, const Graph &g) const { |
594 | if (is_any_start(v, g)) { |
595 | return; // start vertices are OK |
596 | } else if (is_special(v, g)) { |
597 | assert(0); |
598 | throw ReachMismatch(); // other special nodes?? |
599 | } |
600 | |
601 | const CharReach &vcr = g[v].char_reach; |
602 | DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n" , vcr.count(), |
603 | cr.count()); |
604 | if (vcr != (vcr & cr)) { |
605 | throw ReachMismatch(); |
606 | } |
607 | } |
608 | |
609 | private: |
610 | const CharReach &cr; |
611 | }; |
612 | |
613 | /** Terminator function for DFS used in pathReachSubset. */ |
614 | template <class Graph, class Vertex> class VertexIs { |
615 | public: |
616 | explicit VertexIs(const Vertex &v) : vertex(v) {} |
617 | bool operator()(const Vertex &v, const Graph &) const { |
618 | return v == vertex; |
619 | } |
620 | |
621 | private: |
622 | Vertex vertex; |
623 | }; |
624 | |
625 | } // namespace |
626 | |
627 | /** Returns true if every vertex on paths leading to edge \p e has reachability |
628 | * which is a subset of the reachability of \p dom */ |
629 | static |
630 | bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom, |
631 | const NGHolder &g) { |
632 | const CharReach &domReach = g[dom].char_reach; |
633 | if (domReach.all()) { |
634 | return true; |
635 | } |
636 | |
637 | NFAVertex start = source(e, g); |
638 | using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>; |
639 | map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor; |
640 | |
641 | // Walk the graph backwards from v, examining each node. We fail (return |
642 | // false) if we encounter a node with reach NOT a subset of domReach, and |
643 | // we stop searching at dom. |
644 | try { |
645 | depth_first_visit(RevGraph(g), start, |
646 | ReachSubsetVisitor(domReach), |
647 | make_assoc_property_map(vertexColor), |
648 | VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom)); |
649 | } catch(ReachMismatch&) { |
650 | return false; |
651 | } |
652 | |
653 | return true; |
654 | } |
655 | |
656 | /** Returns true if every vertex on paths leading from edge \p e has |
657 | * reachability which is a subset of the reachability of \p dom */ |
658 | static |
659 | bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom, |
660 | const NGHolder &g) { |
661 | const CharReach &domReach = g[dom].char_reach; |
662 | if (domReach.all()) { |
663 | return true; |
664 | } |
665 | |
666 | NFAVertex start = target(e, g); |
667 | map<NFAVertex, boost::default_color_type> vertexColor; |
668 | |
669 | // Walk the graph forward from v, examining each node. We fail (return |
670 | // false) if we encounter a node with reach NOT a subset of domReach, and |
671 | // we stop searching at dom. |
672 | try { |
673 | depth_first_visit(g, start, ReachSubsetVisitor(domReach), |
674 | make_assoc_property_map(vertexColor), |
675 | VertexIs<NGHolder, NFAVertex>(dom)); |
676 | } catch(ReachMismatch&) { |
677 | return false; |
678 | } |
679 | |
680 | return true; |
681 | } |
682 | |
683 | static |
684 | bool allOutsSpecial(NFAVertex v, const NGHolder &g) { |
685 | for (auto w : adjacent_vertices_range(v, g)) { |
686 | if (!is_special(w, g)) { |
687 | return false; |
688 | } |
689 | } |
690 | return true; |
691 | } |
692 | |
693 | static |
694 | bool allInsSpecial(NFAVertex v, const NGHolder &g) { |
695 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
696 | if (!is_special(u, g)) { |
697 | return false; |
698 | } |
699 | } |
700 | return true; |
701 | } |
702 | |
703 | /** Cheaply check whether this graph can't be reduced at all, because it is |
704 | * just a chain of vertices with no other edges. */ |
705 | static |
706 | bool isIrreducible(const NGHolder &g) { |
707 | for (auto v : vertices_range(g)) { |
708 | // skip specials |
709 | if (is_special(v, g)) { |
710 | continue; |
711 | } |
712 | |
713 | if (in_degree(v, g) != 1 && !allInsSpecial(v, g)) { |
714 | return false; |
715 | } |
716 | if (out_degree(v, g) != 1 && !allOutsSpecial(v, g)) { |
717 | return false; |
718 | } |
719 | } |
720 | |
721 | /* if calcComponents got sleepy and went home, the above checks don't hold |
722 | * as it assumes there is only one connected component. */ |
723 | if (isAlternationOfClasses(g)) { |
724 | return false; |
725 | } |
726 | |
727 | return true; |
728 | } |
729 | |
730 | static |
731 | u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) { |
732 | u32 count = 0; |
733 | |
734 | cyclic.resize(num_vertices(g)); |
735 | |
736 | for (auto v : vertices_range(g)) { |
737 | assert(g[v].index < cyclic.size()); |
738 | if (hasSelfLoop(v, g)) { |
739 | count++; |
740 | cyclic[g[v].index] = true; |
741 | } |
742 | } |
743 | |
744 | return count; |
745 | } |
746 | |
747 | static |
748 | void findCyclicDom(NGHolder &g, vector<bool> &cyclic, |
749 | set<NFAEdge> &dead, som_type som) { |
750 | auto dominators = findDominators(g); |
751 | |
752 | for (auto v : vertices_range(g)) { |
753 | if (is_special(v, g)) { |
754 | continue; |
755 | } |
756 | |
757 | // Path in through a dominator (e.g. '.+a?foobar') |
758 | NFAVertex dom = dominators[v]; |
759 | if (dom && cyclic[g[dom].index] |
760 | && edge(dom, v, g).second) { |
761 | |
762 | if (som && dom == g.startDs) { |
763 | continue; |
764 | } |
765 | |
766 | DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic " |
767 | "vertex %zu\n" , g[v].index, g[dom].index); |
768 | |
769 | // iff all paths through in-edge e of v involve vertices whose |
770 | // reachability is a subset of reach(dom), we can delete edge e. |
771 | for (const auto &e : in_edges_range(v, g)) { |
772 | if (source(e, g) == dom) { |
773 | continue; |
774 | } |
775 | |
776 | if (reversePathReachSubset(e, dom, g)) { |
777 | DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading " |
778 | "paths share dom reach\n" , |
779 | g[source(e, g)].index, g[target(e, g)].index); |
780 | dead.insert(e); |
781 | if (source(e, g) == v) { |
782 | cyclic[g[v].index] = false; |
783 | } |
784 | continue; |
785 | } |
786 | } |
787 | } |
788 | } |
789 | } |
790 | |
791 | static |
792 | void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic, |
793 | set<NFAEdge> &dead) { |
794 | auto postdominators = findPostDominators(g); |
795 | |
796 | for (auto v : vertices_range(g)) { |
797 | if (is_special(v, g)) { |
798 | continue; |
799 | } |
800 | |
801 | // Path out through a post-dominator (e.g. a?.+foobar') |
802 | NFAVertex postdom = postdominators[v]; |
803 | if (postdom && cyclic[g[postdom].index] && edge(v, postdom, g).second) { |
804 | DEBUG_PRINTF("vertex %zu is postdominated by directly-connected " |
805 | "cyclic vertex %zu\n" , g[v].index, g[postdom].index); |
806 | |
807 | // iff all paths through in-edge e of v involve vertices whose |
808 | // reachability is a subset of reach(dom), we can delete edge e. |
809 | for (const auto &e : out_edges_range(v, g)) { |
810 | if (target(e, g) == postdom) { |
811 | continue; |
812 | } |
813 | |
814 | if (forwardPathReachSubset(e, postdom, g)) { |
815 | DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing " |
816 | "paths share postdom reach\n" , |
817 | g[source(e, g)].index, g[target(e, g)].index); |
818 | if (target(e, g) == v) { |
819 | cyclic[g[v].index] = false; |
820 | } |
821 | dead.insert(e); |
822 | continue; |
823 | } |
824 | } |
825 | } |
826 | } |
827 | } |
828 | |
829 | bool removeRedundancy(NGHolder &g, som_type som) { |
830 | DEBUG_PRINTF("rr som = %d\n" , (int)som); |
831 | renumber_vertices(g); |
832 | |
833 | // Cheap check: if all the non-special vertices have in-degree one and |
834 | // out-degree one, there's no redundancy in this here graph and we can |
835 | // vamoose. |
836 | if (isIrreducible(g)) { |
837 | return false; |
838 | } |
839 | |
840 | VertexInfoMap infoMap(g); |
841 | |
842 | // Populate maps of successors and predecessors, and accept status |
843 | populateContainers(g, infoMap); |
844 | |
845 | /* Run multiple passes: terminate when a full pass doesn't remove |
846 | * any vertices */ |
847 | bool doUseless = true; |
848 | bool doDiamond = true; |
849 | set<NFAVertex> removable; |
850 | while (doUseless || doDiamond) { |
851 | if (doUseless |
852 | && doUselessMergePass(g, som, infoMap, removable)) { |
853 | doDiamond = true; |
854 | } |
855 | doUseless = false; |
856 | |
857 | if (doDiamond |
858 | && doDiamondMergePass(g, som, infoMap, removable)) { |
859 | doUseless = true; |
860 | } |
861 | doDiamond = false; |
862 | } |
863 | DEBUG_PRINTF("found %zu removable vertices overall.\n" , removable.size()); |
864 | remove_vertices(removable, g); |
865 | |
866 | return !removable.empty(); |
867 | } |
868 | |
869 | /** UE-524: remove edges into nodes that are dominated by cyclic nodes with |
870 | * reachability that is a superset of all paths feeding into that edge. */ |
871 | bool removeCyclicDominated(NGHolder &g, som_type som) { |
872 | set<NFAEdge> dead; |
873 | vector<bool> cyclic; |
874 | bool changed = false; |
875 | |
876 | findCyclic(g, cyclic); |
877 | |
878 | findCyclicDom(g, cyclic, dead, som); |
879 | if (!dead.empty()) { |
880 | remove_edges(dead, g); |
881 | pruneUseless(g); |
882 | dead.clear(); |
883 | cyclic.clear(); // need to recalculate cyclic as ids have changed |
884 | findCyclic(g, cyclic); |
885 | changed = true; |
886 | } |
887 | |
888 | findCyclicPostDom(g, cyclic, dead); |
889 | if (!dead.empty()) { |
890 | remove_edges(dead, g); |
891 | pruneUseless(g); |
892 | dead.clear(); |
893 | changed = true; |
894 | } |
895 | |
896 | return changed; |
897 | } |
898 | |
899 | } // namespace ue2 |
900 | |