1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief NFA graph state squashing analysis. |
31 | * |
32 | * The basic idea behind the state squashing is that when we are in a cyclic |
33 | * state v there are certain other states which are completely irrelevant. This |
34 | * is used primarily by the determinisation process to produce smaller DFAs by |
35 | * not tracking irrelevant states. It's also used by the LimEx NFA model. |
36 | * |
37 | * Working out which states we can ignore mainly uses the post-dominator |
38 | * analysis. |
39 | * |
40 | * ### Dot Squash Masks: |
41 | * |
42 | * The following vertices are added to the squash mask: |
43 | * - (1) Any vertex post-dominated by the cyclic dot state |
44 | * - (2) Any other vertex post-dominated by the cyclic dot state's successors |
45 | * - (3) Any vertex post-dominated by a predecessor of the cyclic dot state - |
46 | * provided the predecessor's successors are a subset of the cyclic state's |
47 | * successors [For (3), the term successor also includes report information] |
48 | * |
49 | * (2) and (3) allow us to get squash masks from .* as well as .+ |
50 | * |
51 | * The squash masks are not optimal especially in the case where there |
52 | * alternations on both sides - for example in: |
53 | * |
54 | * /foo(bar|baz).*(abc|xyz)/s |
55 | * |
56 | * 'foo' is irrelevant once the dot star is hit, but it has no post-dominators |
57 | * so isn't picked up ('bar' and 'baz' are picked up by (2)). We may be able to |
58 | * do a more complete analysis based on cutting the graph and seeing which |
59 | * vertices are unreachable but the current approach is quick and probably |
60 | * adequate. |
61 | * |
62 | * |
63 | * ### Non-Dot Squash Masks: |
64 | * |
65 | * As for dot states. However, if anything in a pdom tree falls outside the |
66 | * character range of the cyclic state the whole pdom tree is ignored. Also when |
67 | * considering the predecessor's pdom tree it is necessary to verify that the |
68 | * predecessor's character reachability falls within that of the cyclic state. |
69 | * |
70 | * We could do better in this case by not throwing away the whole pdom tree - |
71 | * however the bits which we can keep are not clear from the pdom tree of the |
72 | * cyclic state - it probably can be based on the dom or pdom tree of the bad |
73 | * vertex. |
74 | * |
75 | * An example of us doing badly is: |
76 | * |
77 | * /HTTP.*Referer[^\n]*google/s |
78 | * |
79 | * as '[\\n]*' doesn't get a squash mask at all due to .* but we should be able |
80 | * to squash 'Referer'. |
81 | * |
82 | * ### Extension: |
83 | * |
84 | * If a state leads solely to a squashable state (or its immediate successors) |
85 | * with the same reachability we can make this state a squash state of any of |
86 | * the original states squashees which we postdominate. Could probably tighten |
87 | * this up but it would require thought. May not need to keep the original |
88 | * squasher around but that would also require thought. |
89 | * |
90 | * ### SOM Notes: |
91 | * |
92 | * If (left) start of match is required, it is illegal to squash any state which |
93 | * may result in an early start of match reaching the squashing state. |
94 | */ |
95 | |
96 | #include "config.h" |
97 | |
98 | #include "ng_squash.h" |
99 | |
100 | #include "ng_dominators.h" |
101 | #include "ng_dump.h" |
102 | #include "ng_holder.h" |
103 | #include "ng_prune.h" |
104 | #include "ng_region.h" |
105 | #include "ng_som_util.h" |
106 | #include "ng_util.h" |
107 | #include "util/container.h" |
108 | #include "util/graph_range.h" |
109 | #include "util/report_manager.h" |
110 | #include "ue2common.h" |
111 | |
112 | #include <deque> |
113 | #include <map> |
114 | #include <unordered_map> |
115 | #include <unordered_set> |
116 | |
117 | #include <boost/graph/depth_first_search.hpp> |
118 | #include <boost/graph/reverse_graph.hpp> |
119 | |
120 | using namespace std; |
121 | |
122 | namespace ue2 { |
123 | |
124 | using PostDomTree = unordered_map<NFAVertex, unordered_set<NFAVertex>>; |
125 | |
126 | static |
127 | PostDomTree buildPDomTree(const NGHolder &g) { |
128 | PostDomTree tree; |
129 | tree.reserve(num_vertices(g)); |
130 | |
131 | auto postdominators = findPostDominators(g); |
132 | |
133 | for (auto v : vertices_range(g)) { |
134 | if (is_special(v, g)) { |
135 | continue; |
136 | } |
137 | NFAVertex pdom = postdominators[v]; |
138 | if (pdom) { |
139 | DEBUG_PRINTF("vertex %zu -> %zu\n" , g[pdom].index, g[v].index); |
140 | tree[pdom].insert(v); |
141 | } |
142 | } |
143 | return tree; |
144 | } |
145 | |
146 | /** |
147 | * Builds a squash mask based on the pdom tree of v and the given char reach. |
148 | * The built squash mask is a bit conservative for non-dot cases and could |
149 | * be improved with a bit of thought. |
150 | */ |
151 | static |
152 | void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v, |
153 | const CharReach &cr, const NFAStateSet &init, |
154 | const vector<NFAVertex> &vByIndex, const PostDomTree &tree, |
155 | som_type som, const vector<DepthMinMax> &som_depths, |
156 | const unordered_map<NFAVertex, u32> ®ion_map, |
157 | smgb_cache &cache) { |
158 | DEBUG_PRINTF("build base squash mask for vertex %zu)\n" , g[v].index); |
159 | |
160 | vector<NFAVertex> q; |
161 | |
162 | auto it = tree.find(v); |
163 | if (it != tree.end()) { |
164 | q.insert(q.end(), it->second.begin(), it->second.end()); |
165 | } |
166 | |
167 | const u32 v_index = g[v].index; |
168 | |
169 | while (!q.empty()) { |
170 | NFAVertex u = q.back(); |
171 | q.pop_back(); |
172 | const CharReach &cru = g[u].char_reach; |
173 | |
174 | if ((cru & ~cr).any()) { |
175 | /* bail: bad cr on vertex u */ |
176 | /* TODO: this could be better |
177 | * |
178 | * we still need to ensure that we record any paths leading to u. |
179 | * Hence all vertices R which can reach u must be excluded from the |
180 | * squash mask. Note: R != pdom(u) and there may exist an x in (R - |
181 | * pdom(u)) which is in pdom(y) where y is in q. Clear ? |
182 | */ |
183 | mask.set(); |
184 | return; |
185 | } |
186 | |
187 | const u32 u_index = g[u].index; |
188 | |
189 | if (som) { |
190 | /* We cannot add a state u to the squash mask of v if it may have an |
191 | * earlier start of match offset. ie for us to add a state u to v |
192 | * maxSomDist(u) <= minSomDist(v) |
193 | */ |
194 | const depth &max_som_dist_u = som_depths[u_index].max; |
195 | const depth &min_som_dist_v = som_depths[v_index].min; |
196 | |
197 | if (max_som_dist_u.is_infinite()) { |
198 | /* it is hard to tell due to the INF if u can actually store an |
199 | * earlier SOM than w (state we are building the squash mask |
200 | * for) - need to think more deeply |
201 | */ |
202 | |
203 | if (mustBeSetBefore(u, v, g, cache) |
204 | && !somMayGoBackwards(u, g, region_map, cache)) { |
205 | DEBUG_PRINTF("u %u v %u\n" , u_index, v_index); |
206 | goto squash_ok; |
207 | } |
208 | } |
209 | |
210 | if (max_som_dist_u > min_som_dist_v) { |
211 | /* u can't be squashed as it may be storing an earlier SOM */ |
212 | goto add_children_to_queue; |
213 | } |
214 | |
215 | } |
216 | |
217 | squash_ok: |
218 | mask.set(u_index); |
219 | DEBUG_PRINTF("pdom'ed %u\n" , u_index); |
220 | add_children_to_queue: |
221 | it = tree.find(u); |
222 | if (it != tree.end()) { |
223 | q.insert(q.end(), it->second.begin(), it->second.end()); |
224 | } |
225 | } |
226 | |
227 | if (cr.all()) { |
228 | /* the init states aren't in the pdom tree. If all their succ states |
229 | * are set (or v), we can consider them post dominated */ |
230 | |
231 | /* Note: init states will always result in a later som */ |
232 | for (size_t i = init.find_first(); i != init.npos; |
233 | i = init.find_next(i)) { |
234 | /* Yes vacuous patterns do exist */ |
235 | NFAVertex iv = vByIndex[i]; |
236 | for (auto w : adjacent_vertices_range(iv, g)) { |
237 | if (w == g.accept || w == g.acceptEod) { |
238 | DEBUG_PRINTF("skipping %zu due to vacuous accept\n" , i); |
239 | goto next_init_state; |
240 | } |
241 | |
242 | u32 vert_id = g[w].index; |
243 | if (w != iv && w != v && !mask.test(vert_id)) { |
244 | DEBUG_PRINTF("skipping %zu due to %u\n" , i, vert_id); |
245 | goto next_init_state; |
246 | } |
247 | } |
248 | DEBUG_PRINTF("pdom'ed %zu\n" , i); |
249 | mask.set(i); |
250 | next_init_state:; |
251 | } |
252 | } |
253 | |
254 | mask.flip(); |
255 | } |
256 | |
257 | static |
258 | void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) { |
259 | for (auto w : adjacent_vertices_range(v, g)) { |
260 | if (!is_special(w, g)) { |
261 | succ.set(g[w].index); |
262 | } |
263 | } |
264 | } |
265 | |
266 | static |
267 | void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) { |
268 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
269 | if (!is_special(u, g)) { |
270 | pred.set(g[u].index); |
271 | } |
272 | } |
273 | } |
274 | |
275 | static |
276 | void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, |
277 | const PostDomTree &pdom_tree, const NFAStateSet &init, |
278 | unordered_map<NFAVertex, NFAStateSet> *squash, |
279 | som_type som, const vector<DepthMinMax> &som_depths, |
280 | const unordered_map<NFAVertex, u32> ®ion_map, |
281 | smgb_cache &cache) { |
282 | deque<NFAVertex> remaining; |
283 | for (const auto &m : *squash) { |
284 | remaining.push_back(m.first); |
285 | } |
286 | |
287 | while (!remaining.empty()) { |
288 | NFAVertex v = remaining.back(); |
289 | remaining.pop_back(); |
290 | |
291 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
292 | if (is_special(u, g)) { |
293 | continue; |
294 | } |
295 | |
296 | if (g[v].char_reach != g[u].char_reach) { |
297 | continue; |
298 | } |
299 | |
300 | if (out_degree(u, g) != 1) { |
301 | continue; |
302 | } |
303 | |
304 | NFAStateSet u_squash(init.size()); |
305 | size_t u_index = g[u].index; |
306 | |
307 | buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex, |
308 | pdom_tree, som, som_depths, region_map, cache); |
309 | |
310 | u_squash.set(u_index); /* never clear ourselves */ |
311 | |
312 | if ((~u_squash).any()) { // i.e. some bits unset in mask |
313 | DEBUG_PRINTF("%zu is an upstream squasher of %zu\n" , u_index, |
314 | g[v].index); |
315 | (*squash)[u] = u_squash; |
316 | remaining.push_back(u); |
317 | } |
318 | } |
319 | } |
320 | } |
321 | |
322 | /* If there are redundant states in the graph, it may be possible for two |
323 | * sibling .* states to try to squash each other -- which should be prevented. |
324 | * |
325 | * Note: this situation should only happen if ng_equivalence has not been run. |
326 | */ |
327 | static |
328 | void clearMutualSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, |
329 | unordered_map<NFAVertex, NFAStateSet> &squash) { |
330 | for (auto it = squash.begin(); it != squash.end();) { |
331 | NFAVertex a = it->first; |
332 | u32 a_index = g[a].index; |
333 | |
334 | NFAStateSet a_squash = ~it->second; /* default is mask of survivors */ |
335 | for (auto b_index = a_squash.find_first(); b_index != a_squash.npos; |
336 | b_index = a_squash.find_next(b_index)) { |
337 | assert(b_index != a_index); |
338 | NFAVertex b = vByIndex[b_index]; |
339 | |
340 | auto b_it = squash.find(b); |
341 | if (b_it == squash.end()) { |
342 | continue; |
343 | } |
344 | auto &b_squash = b_it->second; |
345 | if (!b_squash.test(a_index)) { |
346 | /* b and a squash each other, prevent this */ |
347 | DEBUG_PRINTF("removing mutual squash %u %zu\n" , |
348 | a_index, b_index); |
349 | b_squash.set(a_index); |
350 | it->second.set(b_index); |
351 | } |
352 | } |
353 | |
354 | if (it->second.all()) { |
355 | DEBUG_PRINTF("%u is no longer an effective squash state\n" , |
356 | a_index); |
357 | it = squash.erase(it); |
358 | } else { |
359 | ++it; |
360 | } |
361 | } |
362 | } |
363 | |
364 | unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, |
365 | som_type som) { |
366 | unordered_map<NFAVertex, NFAStateSet> squash; |
367 | |
368 | // Number of bits to use for all our masks. If we're a triggered graph, |
369 | // tops have already been assigned, so we don't have to account for them. |
370 | const u32 numStates = num_vertices(g); |
371 | |
372 | // Build post-dominator tree. |
373 | auto pdom_tree = buildPDomTree(g); |
374 | |
375 | // Build list of vertices by state ID and a set of init states. |
376 | vector<NFAVertex> vByIndex(numStates, NGHolder::null_vertex()); |
377 | NFAStateSet initStates(numStates); |
378 | smgb_cache cache(g); |
379 | |
380 | // Mappings used for SOM mode calculations, otherwise left empty. |
381 | unordered_map<NFAVertex, u32> region_map; |
382 | vector<DepthMinMax> som_depths; |
383 | if (som) { |
384 | region_map = assignRegions(g); |
385 | som_depths = getDistancesFromSOM(g); |
386 | } |
387 | |
388 | for (auto v : vertices_range(g)) { |
389 | const u32 vert_id = g[v].index; |
390 | DEBUG_PRINTF("vertex %u/%u\n" , vert_id, numStates); |
391 | assert(vert_id < numStates); |
392 | vByIndex[vert_id] = v; |
393 | |
394 | if (is_any_start(v, g) || !in_degree(v, g)) { |
395 | initStates.set(vert_id); |
396 | } |
397 | } |
398 | |
399 | for (u32 i = 0; i < numStates; i++) { |
400 | NFAVertex v = vByIndex[i]; |
401 | assert(v != NGHolder::null_vertex()); |
402 | const CharReach &cr = g[v].char_reach; |
403 | |
404 | /* only non-init cyclics can be squashers */ |
405 | if (!hasSelfLoop(v, g) || initStates.test(i)) { |
406 | continue; |
407 | } |
408 | |
409 | DEBUG_PRINTF("state %u is cyclic\n" , i); |
410 | |
411 | NFAStateSet mask(numStates), succ(numStates), pred(numStates); |
412 | buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som, |
413 | som_depths, region_map, cache); |
414 | buildSucc(succ, g, v); |
415 | buildPred(pred, g, v); |
416 | const auto &reports = g[v].reports; |
417 | |
418 | for (size_t j = succ.find_first(); j != succ.npos; |
419 | j = succ.find_next(j)) { |
420 | NFAVertex vj = vByIndex[j]; |
421 | NFAStateSet pred2(numStates); |
422 | buildPred(pred2, g, vj); |
423 | if (pred2 == pred) { |
424 | DEBUG_PRINTF("adding the sm from %zu to %u's sm\n" , j, i); |
425 | NFAStateSet tmp(numStates); |
426 | buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, |
427 | som, som_depths, region_map, cache); |
428 | mask &= tmp; |
429 | } |
430 | } |
431 | |
432 | for (size_t j = pred.find_first(); j != pred.npos; |
433 | j = pred.find_next(j)) { |
434 | NFAVertex vj = vByIndex[j]; |
435 | NFAStateSet succ2(numStates); |
436 | buildSucc(succ2, g, vj); |
437 | /* we can use j as a basis for squashing if its succs are a subset |
438 | * of ours */ |
439 | if ((succ2 & ~succ).any()) { |
440 | continue; |
441 | } |
442 | |
443 | if (som) { |
444 | /* We cannot use j to add to the squash mask of v if it may |
445 | * have an earlier start of match offset. ie for us j as a |
446 | * basis for the squash mask of v we require: |
447 | * maxSomDist(j) <= minSomDist(v) |
448 | */ |
449 | |
450 | /* ** TODO ** */ |
451 | |
452 | const depth &max_som_dist_j = |
453 | som_depths[g[vj].index].max; |
454 | const depth &min_som_dist_v = |
455 | som_depths[g[v].index].min; |
456 | if (max_som_dist_j > min_som_dist_v || |
457 | max_som_dist_j.is_infinite()) { |
458 | /* j can't be used as it may be storing an earlier SOM */ |
459 | continue; |
460 | } |
461 | } |
462 | |
463 | const CharReach &crv = g[vj].char_reach; |
464 | |
465 | /* we also require that j's report information be a subset of ours |
466 | */ |
467 | bool seen_special = false; |
468 | for (auto w : adjacent_vertices_range(vj, g)) { |
469 | if (is_special(w, g)) { |
470 | if (!edge(v, w, g).second) { |
471 | goto next_j; |
472 | } |
473 | seen_special = true; |
474 | } |
475 | } |
476 | |
477 | // FIXME: should be subset check? |
478 | if (seen_special && g[vj].reports != reports) { |
479 | continue; |
480 | } |
481 | |
482 | /* ok we can use j */ |
483 | if ((crv & ~cr).none()) { |
484 | NFAStateSet tmp(numStates); |
485 | buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree, |
486 | som, som_depths, region_map, cache); |
487 | mask &= tmp; |
488 | mask.reset(j); |
489 | } |
490 | |
491 | next_j:; |
492 | } |
493 | |
494 | mask.set(i); /* never clear ourselves */ |
495 | |
496 | if ((~mask).any()) { // i.e. some bits unset in mask |
497 | DEBUG_PRINTF("%u squashes %zu other states\n" , i, (~mask).count()); |
498 | squash.emplace(v, mask); |
499 | } |
500 | } |
501 | |
502 | findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som, |
503 | som_depths, region_map, cache); |
504 | |
505 | clearMutualSquashers(g, vByIndex, squash); |
506 | |
507 | return squash; |
508 | } |
509 | |
510 | #define MIN_PURE_ACYCLIC_SQUASH 10 /** magic number */ |
511 | |
512 | /** Some squash states are clearly not advantageous in the NFA, as they do |
513 | * incur the cost of an exception: |
514 | * -# acyclic states |
515 | * -# squash only a few acyclic states |
516 | */ |
517 | void filterSquashers(const NGHolder &g, |
518 | unordered_map<NFAVertex, NFAStateSet> &squash) { |
519 | assert(hasCorrectlyNumberedVertices(g)); |
520 | |
521 | DEBUG_PRINTF("filtering\n" ); |
522 | vector<NFAVertex> rev(num_vertices(g)); /* vertex_index -> vertex */ |
523 | for (auto v : vertices_range(g)) { |
524 | rev[g[v].index] = v; |
525 | } |
526 | |
527 | for (auto v : vertices_range(g)) { |
528 | if (!contains(squash, v)) { |
529 | continue; |
530 | } |
531 | DEBUG_PRINTF("looking at squash set for vertex %zu\n" , g[v].index); |
532 | |
533 | if (!hasSelfLoop(v, g)) { |
534 | DEBUG_PRINTF("acyclic\n" ); |
535 | squash.erase(v); |
536 | continue; |
537 | } |
538 | |
539 | NFAStateSet squashed = squash[v]; |
540 | squashed.flip(); /* default sense for mask of survivors */ |
541 | for (auto sq = squashed.find_first(); sq != squashed.npos; |
542 | sq = squashed.find_next(sq)) { |
543 | NFAVertex u = rev[sq]; |
544 | if (hasSelfLoop(u, g)) { |
545 | DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n" , sq); |
546 | goto next_vertex; |
547 | } |
548 | } |
549 | |
550 | if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) { |
551 | DEBUG_PRINTF("squash set too small\n" ); |
552 | squash.erase(v); |
553 | continue; |
554 | } |
555 | |
556 | next_vertex:; |
557 | DEBUG_PRINTF("squash set ok\n" ); |
558 | } |
559 | } |
560 | |
561 | static |
562 | void getHighlanderReporters(const NGHolder &g, const NFAVertex accept, |
563 | const ReportManager &rm, |
564 | set<NFAVertex> &verts) { |
565 | for (auto v : inv_adjacent_vertices_range(accept, g)) { |
566 | if (v == g.accept) { |
567 | continue; |
568 | } |
569 | |
570 | const auto &reports = g[v].reports; |
571 | if (reports.empty()) { |
572 | assert(0); |
573 | continue; |
574 | } |
575 | |
576 | // Must be _all_ highlander callback reports. |
577 | for (auto report : reports) { |
578 | const Report &ir = rm.getReport(report); |
579 | if (ir.ekey == INVALID_EKEY || ir.type != EXTERNAL_CALLBACK) { |
580 | goto next_vertex; |
581 | } |
582 | |
583 | // If there's any bounds, these are handled outside the NFA and |
584 | // probably shouldn't be pre-empted. |
585 | if (ir.hasBounds()) { |
586 | goto next_vertex; |
587 | } |
588 | } |
589 | |
590 | verts.insert(v); |
591 | next_vertex: |
592 | continue; |
593 | } |
594 | } |
595 | |
596 | static |
597 | void removeEdgesToAccept(NGHolder &g, NFAVertex v) { |
598 | const auto &reports = g[v].reports; |
599 | assert(!reports.empty()); |
600 | |
601 | // We remove any accept edge with a non-empty subset of the reports of v. |
602 | |
603 | set<NFAEdge> dead; |
604 | |
605 | for (const auto &e : in_edges_range(g.accept, g)) { |
606 | NFAVertex u = source(e, g); |
607 | const auto &r = g[u].reports; |
608 | if (!r.empty() && is_subset_of(r, reports)) { |
609 | DEBUG_PRINTF("vertex %zu\n" , g[u].index); |
610 | dead.insert(e); |
611 | } |
612 | } |
613 | |
614 | for (const auto &e : in_edges_range(g.acceptEod, g)) { |
615 | NFAVertex u = source(e, g); |
616 | const auto &r = g[u].reports; |
617 | if (!r.empty() && is_subset_of(r, reports)) { |
618 | DEBUG_PRINTF("vertex %zu\n" , g[u].index); |
619 | dead.insert(e); |
620 | } |
621 | } |
622 | |
623 | assert(!dead.empty()); |
624 | remove_edges(dead, g); |
625 | } |
626 | |
627 | static |
628 | vector<NFAVertex> findUnreachable(const NGHolder &g) { |
629 | const boost::reverse_graph<NGHolder, const NGHolder &> revg(g); |
630 | |
631 | unordered_map<NFAVertex, boost::default_color_type> colours; |
632 | colours.reserve(num_vertices(g)); |
633 | |
634 | depth_first_visit(revg, g.acceptEod, |
635 | make_dfs_visitor(boost::null_visitor()), |
636 | make_assoc_property_map(colours)); |
637 | |
638 | // Unreachable vertices are not in the colour map. |
639 | vector<NFAVertex> unreach; |
640 | for (auto v : vertices_range(revg)) { |
641 | if (!contains(colours, v)) { |
642 | unreach.push_back(NFAVertex(v)); |
643 | } |
644 | } |
645 | return unreach; |
646 | } |
647 | |
648 | /** Populates squash masks for states that can be switched off by highlander |
649 | * (single match) reporters. */ |
650 | unordered_map<NFAVertex, NFAStateSet> |
651 | findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { |
652 | unordered_map<NFAVertex, NFAStateSet> squash; |
653 | |
654 | set<NFAVertex> verts; |
655 | getHighlanderReporters(g, g.accept, rm, verts); |
656 | getHighlanderReporters(g, g.acceptEod, rm, verts); |
657 | if (verts.empty()) { |
658 | DEBUG_PRINTF("no highlander reports\n" ); |
659 | return squash; |
660 | } |
661 | |
662 | const u32 numStates = num_vertices(g); |
663 | |
664 | for (auto v : verts) { |
665 | DEBUG_PRINTF("vertex %zu with %zu reports\n" , g[v].index, |
666 | g[v].reports.size()); |
667 | |
668 | // Find the set of vertices that lead to v or any other reporter with a |
669 | // subset of v's reports. We do this by creating a copy of the graph, |
670 | // cutting the appropriate out-edges to accept and seeing which |
671 | // vertices become unreachable. |
672 | |
673 | unordered_map<NFAVertex, NFAVertex> orig_to_copy; |
674 | NGHolder h; |
675 | cloneHolder(h, g, &orig_to_copy); |
676 | removeEdgesToAccept(h, orig_to_copy[v]); |
677 | |
678 | vector<NFAVertex> unreach = findUnreachable(h); |
679 | DEBUG_PRINTF("can squash %zu vertices\n" , unreach.size()); |
680 | if (unreach.empty()) { |
681 | continue; |
682 | } |
683 | |
684 | if (!contains(squash, v)) { |
685 | squash[v] = NFAStateSet(numStates); |
686 | squash[v].set(); |
687 | } |
688 | |
689 | NFAStateSet &mask = squash[v]; |
690 | |
691 | for (auto uv : unreach) { |
692 | DEBUG_PRINTF("squashes index %zu\n" , h[uv].index); |
693 | mask.reset(h[uv].index); |
694 | } |
695 | } |
696 | |
697 | return squash; |
698 | } |
699 | |
700 | } // namespace ue2 |
701 | |