1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Edge redundancy graph reductions. |
31 | */ |
32 | #include "ng_edge_redundancy.h" |
33 | |
34 | #include "ng_holder.h" |
35 | #include "ng_prune.h" |
36 | #include "ng_util.h" |
37 | #include "ue2common.h" |
38 | #include "parser/position.h" |
39 | #include "util/compile_context.h" |
40 | #include "util/container.h" |
41 | #include "util/flat_containers.h" |
42 | #include "util/graph_range.h" |
43 | |
44 | #include <set> |
45 | #include <vector> |
46 | |
47 | using namespace std; |
48 | |
49 | namespace ue2 { |
50 | |
51 | /* reverse edge redundancy removal is possible but is not implemented as it |
52 | * regressed rose pattern support in the regression suite: 19026 - 19027 |
53 | * (foo.{1,5}b?ar) |
54 | * |
55 | * If rose becomes smarter we can reimplement. |
56 | */ |
57 | |
58 | static never_inline |
59 | bool checkVerticesFwd(const NGHolder &g, const set<NFAVertex> &sad, |
60 | const set<NFAVertex> &happy) { |
61 | /* need to check if for each vertex in sad if it has an edge to a happy |
62 | * vertex */ |
63 | for (auto u : sad) { |
64 | bool ok = false; |
65 | for (auto v : adjacent_vertices_range(u, g)) { |
66 | if (contains(happy, v)) { |
67 | ok = true; |
68 | break; |
69 | } |
70 | } |
71 | |
72 | if (!ok) { |
73 | return false; |
74 | } |
75 | } |
76 | |
77 | return true; |
78 | } |
79 | |
80 | static never_inline |
81 | bool checkVerticesRev(const NGHolder &g, const set<NFAVertex> &sad, |
82 | const set<NFAVertex> &happy) { |
83 | /* need to check if for each vertex in sad if it has an edge to a happy |
84 | * vertex */ |
85 | for (auto v : sad) { |
86 | bool ok = false; |
87 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
88 | if (contains(happy, u)) { |
89 | ok = true; |
90 | break; |
91 | } |
92 | } |
93 | |
94 | if (!ok) { |
95 | return false; |
96 | } |
97 | } |
98 | |
99 | return true; |
100 | } |
101 | |
102 | /** \brief Redundant self-loop removal. |
103 | * |
104 | * A self loop on a vertex v can be removed if: |
105 | * |
106 | * For every vertex u in pred(v) either: |
107 | * 1: u has a self loop and cr(v) subset of cr(u) |
108 | * OR |
109 | * 2: u has an edge to vertex satisfying criterion 1 |
110 | * |
111 | * Note: we remove all dead loops at the end of the pass and do not check the |
112 | * live status of the loops we are depending on during the analysis. |
113 | * |
114 | * We don't end up in situations where we remove a group of loops which depend |
115 | * on each other as: |
116 | * |
117 | * - there must be at least one vertex not in the group which is a pred of some |
118 | * member of the group (as we don't remove loops on specials) |
119 | * |
120 | * For each pred vertex of the group: |
121 | * - the vertex must be 'sad' as it is not part of the group |
122 | * - therefore it must have edges to each member of the group (to happy, trans) |
123 | * - therefore the group is enabled simultaneously |
124 | * - due to internal group edges, all members will still be active after the |
125 | * next character. |
126 | * |
127 | * Actually, the vertex redundancy code will merge the entire group into one |
128 | * cyclic state. |
129 | */ |
130 | static |
131 | bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) { |
132 | unsigned dead_count = 0; |
133 | |
134 | set<NFAVertex> happy; |
135 | set<NFAVertex> sad; |
136 | |
137 | for (auto v : vertices_range(g)) { |
138 | if (is_special(v, g) || !hasSelfLoop(v, g)) { |
139 | continue; |
140 | } |
141 | |
142 | const CharReach &cr_v = g[v].char_reach; |
143 | |
144 | happy.clear(); |
145 | sad.clear(); |
146 | |
147 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
148 | if (u == v) { |
149 | continue; |
150 | } |
151 | |
152 | if (!hasSelfLoop(u, g)) { |
153 | sad.insert(u); |
154 | continue; |
155 | } |
156 | |
157 | if (ignore_starts) { |
158 | if (u == g.startDs || is_virtual_start(u, g)) { |
159 | sad.insert(u); |
160 | continue; |
161 | } |
162 | } |
163 | |
164 | const CharReach &cr_u = g[u].char_reach; |
165 | |
166 | if ((cr_u & cr_v) != cr_v) { |
167 | sad.insert(u); |
168 | continue; |
169 | } |
170 | |
171 | happy.insert(u); |
172 | } |
173 | |
174 | if (!happy.empty() && checkVerticesFwd(g, sad, happy)) { |
175 | dead_count++; |
176 | remove_edge(v, v, g); |
177 | } |
178 | } |
179 | |
180 | DEBUG_PRINTF("found %u removable edges.\n" , dead_count); |
181 | return dead_count; |
182 | } |
183 | |
184 | static |
185 | bool checkReportsRev(const NGHolder &g, NFAVertex v, |
186 | const set<NFAVertex> &happy) { |
187 | if (g[v].reports.empty()) { |
188 | return true; |
189 | } |
190 | |
191 | assert(edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second); |
192 | |
193 | /* an edge to accept takes priority over eod only accept */ |
194 | NFAVertex accept = edge(v, g.accept, g).second ? g.accept : g.acceptEod; |
195 | |
196 | flat_set<ReportID> happy_reports; |
197 | for (NFAVertex u : happy) { |
198 | if (edge(u, accept, g).second) { |
199 | insert(&happy_reports, g[u].reports); |
200 | } |
201 | } |
202 | |
203 | return is_subset_of(g[v].reports, happy_reports); |
204 | } |
205 | |
206 | /** \brief Redundant self-loop removal (reverse version). |
207 | * |
208 | * A self loop on a vertex v can be removed if: |
209 | * |
210 | * For every vertex u in succ(v) either: |
211 | * 1: u has a self loop and cr(v) is a subset of cr(u). |
212 | * OR |
213 | * 2: u is not an accept and u has an edge from a vertex satisfying |
214 | * criterion 1. |
215 | * OR |
216 | * 3: u is in an accept and u has an edge from a vertex v' satisfying |
217 | * criterion 1 and report(v) == report(v'). |
218 | */ |
219 | static |
220 | bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) { |
221 | unsigned dead_count = 0; |
222 | |
223 | set<NFAVertex> happy; |
224 | set<NFAVertex> sad; |
225 | |
226 | for (auto v : vertices_range(g)) { |
227 | if (is_special(v, g) || !hasSelfLoop(v, g)) { |
228 | continue; |
229 | } |
230 | |
231 | const CharReach &cr_v = g[v].char_reach; |
232 | |
233 | happy.clear(); |
234 | sad.clear(); |
235 | |
236 | for (auto u : adjacent_vertices_range(v, g)) { |
237 | if (u == v) { |
238 | continue; |
239 | } |
240 | |
241 | if (!hasSelfLoop(u, g)) { |
242 | sad.insert(u); |
243 | continue; |
244 | } |
245 | |
246 | assert(!is_special(u, g)); |
247 | |
248 | const CharReach &cr_u = g[u].char_reach; |
249 | |
250 | if (!cr_v.isSubsetOf(cr_u)) { |
251 | sad.insert(u); |
252 | continue; |
253 | } |
254 | |
255 | happy.insert(u); |
256 | } |
257 | |
258 | if (!happy.empty() && checkVerticesRev(g, sad, happy) |
259 | && checkReportsRev(g, v, happy)) { |
260 | dead_count++; |
261 | remove_edge(v, v, g); |
262 | } |
263 | } |
264 | |
265 | DEBUG_PRINTF("found %u removable edges.\n" , dead_count); |
266 | return dead_count; |
267 | } |
268 | |
269 | static |
270 | bool parentsSubsetOf(const NGHolder &g, NFAVertex v, |
271 | const flat_set<NFAVertex> &other_parents, NFAVertex other, |
272 | map<NFAVertex, bool> &done) { |
273 | map<NFAVertex, bool>::const_iterator dit = done.find(v); |
274 | if (dit != done.end()) { |
275 | return dit->second; |
276 | } |
277 | |
278 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
279 | if (u == v && contains(other_parents, other)) { |
280 | continue; |
281 | } |
282 | |
283 | if (!contains(other_parents, u)) { |
284 | done[v] = false; |
285 | return false; |
286 | } |
287 | } |
288 | |
289 | done[v] = true; |
290 | return true; |
291 | } |
292 | |
293 | static |
294 | bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src, |
295 | const flat_set<NFAVertex> &fixed_parents, |
296 | const NFAEdge &candidate, |
297 | map<NFAVertex, bool> &done) { |
298 | NFAVertex w = source(candidate, g); |
299 | NFAVertex v = target(candidate, g); |
300 | const CharReach &cr_w = g[w].char_reach; |
301 | const CharReach &cr_u = g[fixed_src].char_reach; |
302 | |
303 | /* There is no reason why self loops cannot be considered by this |
304 | * transformation but the removal is already handled by many other |
305 | * transformations. */ |
306 | if (w == v) { |
307 | return false; |
308 | } |
309 | |
310 | if (is_special(w, g)) { |
311 | return false; |
312 | } |
313 | |
314 | if (!cr_w.isSubsetOf(cr_u)) { |
315 | return false; |
316 | } |
317 | |
318 | /* check that each parent of w is also a parent of u */ |
319 | if (!parentsSubsetOf(g, w, fixed_parents, fixed_src, done)) { |
320 | return false; |
321 | } |
322 | |
323 | DEBUG_PRINTF("edge (%zu, %zu) killed by edge (%zu, %zu)\n" , |
324 | g[w].index, g[v].index, g[fixed_src].index, g[v].index); |
325 | return true; |
326 | } |
327 | |
328 | static never_inline |
329 | void checkLargeOutU(const NGHolder &g, NFAVertex u, |
330 | const flat_set<NFAVertex> &parents_u, |
331 | flat_set<NFAVertex> &possible_w, |
332 | map<NFAVertex, bool> &done, |
333 | set<NFAEdge> *dead) { |
334 | /* only vertices with at least one parent in common with u need to be |
335 | * considered, and we also only consider potential siblings with subset |
336 | * reach. */ |
337 | possible_w.clear(); |
338 | const CharReach &cr_u = g[u].char_reach; |
339 | for (auto p : parents_u) { |
340 | for (auto v : adjacent_vertices_range(p, g)) { |
341 | const CharReach &cr_w = g[v].char_reach; |
342 | if (cr_w.isSubsetOf(cr_u)) { |
343 | possible_w.insert(v); |
344 | } |
345 | } |
346 | } |
347 | |
348 | // If there's only one, it's us, and we have no work to do. |
349 | if (possible_w.size() <= 1) { |
350 | assert(possible_w.empty() || *possible_w.begin() == u); |
351 | return; |
352 | } |
353 | |
354 | for (const auto &e : out_edges_range(u, g)) { |
355 | const NFAVertex v = target(e, g); |
356 | |
357 | if (is_special(v, g)) { |
358 | continue; |
359 | } |
360 | |
361 | if (contains(*dead, e)) { |
362 | continue; |
363 | } |
364 | |
365 | /* Now need check to find any edges which can be removed due to the |
366 | * existence of edge e */ |
367 | for (const auto &e2 : in_edges_range(v, g)) { |
368 | if (e == e2 || contains(*dead, e2)) { |
369 | continue; |
370 | } |
371 | |
372 | const NFAVertex w = source(e2, g); |
373 | if (!contains(possible_w, w)) { |
374 | continue; |
375 | } |
376 | |
377 | if (checkFwdCandidate(g, u, parents_u, e2, done)) { |
378 | dead->insert(e2); |
379 | } |
380 | } |
381 | } |
382 | } |
383 | |
384 | static never_inline |
385 | void checkSmallOutU(const NGHolder &g, NFAVertex u, |
386 | const flat_set<NFAVertex> &parents_u, |
387 | map<NFAVertex, bool> &done, |
388 | set<NFAEdge> *dead) { |
389 | for (const auto &e : out_edges_range(u, g)) { |
390 | const NFAVertex v = target(e, g); |
391 | |
392 | if (is_special(v, g)) { |
393 | continue; |
394 | } |
395 | |
396 | if (contains(*dead, e)) { |
397 | continue; |
398 | } |
399 | |
400 | /* Now need check to find any edges which can be removed due to the |
401 | * existence of edge e */ |
402 | for (const auto &e2 : in_edges_range(v, g)) { |
403 | if (e == e2 || contains(*dead, e2)) { |
404 | continue; |
405 | } |
406 | |
407 | if (checkFwdCandidate(g, u, parents_u, e2, done)) { |
408 | dead->insert(e2); |
409 | } |
410 | } |
411 | } |
412 | } |
413 | |
414 | /** \brief Forward edge redundancy pass. |
415 | * |
416 | * An edge e from w to v is redundant if there exists an edge e' such that: |
417 | * e' is from u to v |
418 | * and: reach(w) is a subset of reach(u) |
419 | * and: proper_pred(w) is a subset of pred(u) |
420 | * and: self_loop(w) implies self_loop(u) or edge from (w to u) |
421 | * |
422 | * Note: edges to accepts also require report ID checks. |
423 | */ |
424 | static |
425 | bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) { |
426 | set<NFAEdge> dead; |
427 | map<NFAVertex, bool> done; |
428 | flat_set<NFAVertex> parents_u; |
429 | flat_set<NFAVertex> possible_w; |
430 | |
431 | for (auto u : vertices_range(g)) { |
432 | if (ignore_starts && (u == g.startDs || is_virtual_start(u, g))) { |
433 | continue; |
434 | } |
435 | |
436 | parents_u.clear(); |
437 | pred(g, u, &parents_u); |
438 | |
439 | done.clear(); |
440 | if (out_degree(u, g) > 1) { |
441 | checkLargeOutU(g, u, parents_u, possible_w, done, &dead); |
442 | } else { |
443 | checkSmallOutU(g, u, parents_u, done, &dead); |
444 | } |
445 | } |
446 | |
447 | if (dead.empty()) { |
448 | return false; |
449 | } |
450 | |
451 | DEBUG_PRINTF("found %zu removable non-selfloops.\n" , dead.size()); |
452 | remove_edges(dead, g); |
453 | pruneUseless(g); |
454 | return true; |
455 | } |
456 | |
457 | /** Entry point: Runs all the edge redundancy passes. If SoM is tracked, |
458 | * don't consider startDs or virtual starts as cyclic vertices. */ |
459 | bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc) { |
460 | if (!cc.grey.removeEdgeRedundancy) { |
461 | return false; |
462 | } |
463 | |
464 | bool changed = false; |
465 | changed |= removeEdgeRedundancyNearCyclesFwd(g, som); |
466 | changed |= removeEdgeRedundancyNearCyclesRev(g); |
467 | changed |= removeEdgeRedundancyFwd(g, som); |
468 | return changed; |
469 | } |
470 | |
471 | /** \brief Removes optional stuff from the front of floating patterns, since it's |
472 | * redundant with startDs. |
473 | * |
474 | * For each successor of startDs, remove any in-edges that aren't from either |
475 | * start or startDs. This allows us to prune redundant vertices at the start of |
476 | * a pattern: |
477 | * |
478 | * /(hat)?stand --> /stand/ |
479 | * |
480 | */ |
481 | bool removeSiblingsOfStartDotStar(NGHolder &g) { |
482 | vector<NFAEdge> dead; |
483 | |
484 | for (auto v : adjacent_vertices_range(g.startDs, g)) { |
485 | DEBUG_PRINTF("checking %zu\n" , g[v].index); |
486 | if (is_special(v, g)) { |
487 | continue; |
488 | } |
489 | |
490 | for (const auto &e : in_edges_range(v, g)) { |
491 | NFAVertex u = source(e, g); |
492 | if (is_special(u, g)) { |
493 | continue; |
494 | } |
495 | DEBUG_PRINTF("removing %zu->%zu\n" , g[u].index, g[v].index); |
496 | dead.push_back(e); |
497 | } |
498 | } |
499 | |
500 | if (dead.empty()) { |
501 | return false; |
502 | } |
503 | |
504 | DEBUG_PRINTF("found %zu removable edges.\n" , dead.size()); |
505 | remove_edges(dead, g); |
506 | pruneUseless(g); |
507 | return true; |
508 | } |
509 | |
510 | /** Removes all edges into virtual starts other than those from start/startDs, |
511 | * providing there is an edge from startDs. This operation is an optimisation |
512 | * for SOM mode. (see UE-1544) */ |
513 | bool optimiseVirtualStarts(NGHolder &g) { |
514 | vector<NFAEdge> dead; |
515 | for (auto v : adjacent_vertices_range(g.startDs, g)) { |
516 | u32 flags = g[v].assert_flags; |
517 | if (!(flags & POS_FLAG_VIRTUAL_START)) { |
518 | continue; |
519 | } |
520 | |
521 | for (const auto &e : in_edges_range(v, g)) { |
522 | if (!is_any_start(source(e, g), g)) { |
523 | dead.push_back(e); |
524 | } |
525 | } |
526 | } |
527 | |
528 | if (dead.empty()) { |
529 | return false; |
530 | } |
531 | |
532 | DEBUG_PRINTF("removing %zu edges into virtual starts\n" , dead.size()); |
533 | remove_edges(dead, g); |
534 | pruneUseless(g); |
535 | return true; |
536 | } |
537 | |
538 | } // namespace ue2 |
539 | |