1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Edge redundancy graph reductions.
31 */
32#include "ng_edge_redundancy.h"
33
34#include "ng_holder.h"
35#include "ng_prune.h"
36#include "ng_util.h"
37#include "ue2common.h"
38#include "parser/position.h"
39#include "util/compile_context.h"
40#include "util/container.h"
41#include "util/flat_containers.h"
42#include "util/graph_range.h"
43
44#include <set>
45#include <vector>
46
47using namespace std;
48
49namespace ue2 {
50
51/* reverse edge redundancy removal is possible but is not implemented as it
52 * regressed rose pattern support in the regression suite: 19026 - 19027
53 * (foo.{1,5}b?ar)
54 *
55 * If rose becomes smarter we can reimplement.
56 */
57
58static never_inline
59bool checkVerticesFwd(const NGHolder &g, const set<NFAVertex> &sad,
60 const set<NFAVertex> &happy) {
61 /* need to check if for each vertex in sad if it has an edge to a happy
62 * vertex */
63 for (auto u : sad) {
64 bool ok = false;
65 for (auto v : adjacent_vertices_range(u, g)) {
66 if (contains(happy, v)) {
67 ok = true;
68 break;
69 }
70 }
71
72 if (!ok) {
73 return false;
74 }
75 }
76
77 return true;
78}
79
80static never_inline
81bool checkVerticesRev(const NGHolder &g, const set<NFAVertex> &sad,
82 const set<NFAVertex> &happy) {
83 /* need to check if for each vertex in sad if it has an edge to a happy
84 * vertex */
85 for (auto v : sad) {
86 bool ok = false;
87 for (auto u : inv_adjacent_vertices_range(v, g)) {
88 if (contains(happy, u)) {
89 ok = true;
90 break;
91 }
92 }
93
94 if (!ok) {
95 return false;
96 }
97 }
98
99 return true;
100}
101
102/** \brief Redundant self-loop removal.
103 *
104 * A self loop on a vertex v can be removed if:
105 *
106 * For every vertex u in pred(v) either:
107 * 1: u has a self loop and cr(v) subset of cr(u)
108 * OR
109 * 2: u has an edge to vertex satisfying criterion 1
110 *
111 * Note: we remove all dead loops at the end of the pass and do not check the
112 * live status of the loops we are depending on during the analysis.
113 *
114 * We don't end up in situations where we remove a group of loops which depend
115 * on each other as:
116 *
117 * - there must be at least one vertex not in the group which is a pred of some
118 * member of the group (as we don't remove loops on specials)
119 *
120 * For each pred vertex of the group:
121 * - the vertex must be 'sad' as it is not part of the group
122 * - therefore it must have edges to each member of the group (to happy, trans)
123 * - therefore the group is enabled simultaneously
124 * - due to internal group edges, all members will still be active after the
125 * next character.
126 *
127 * Actually, the vertex redundancy code will merge the entire group into one
128 * cyclic state.
129 */
130static
131bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) {
132 unsigned dead_count = 0;
133
134 set<NFAVertex> happy;
135 set<NFAVertex> sad;
136
137 for (auto v : vertices_range(g)) {
138 if (is_special(v, g) || !hasSelfLoop(v, g)) {
139 continue;
140 }
141
142 const CharReach &cr_v = g[v].char_reach;
143
144 happy.clear();
145 sad.clear();
146
147 for (auto u : inv_adjacent_vertices_range(v, g)) {
148 if (u == v) {
149 continue;
150 }
151
152 if (!hasSelfLoop(u, g)) {
153 sad.insert(u);
154 continue;
155 }
156
157 if (ignore_starts) {
158 if (u == g.startDs || is_virtual_start(u, g)) {
159 sad.insert(u);
160 continue;
161 }
162 }
163
164 const CharReach &cr_u = g[u].char_reach;
165
166 if ((cr_u & cr_v) != cr_v) {
167 sad.insert(u);
168 continue;
169 }
170
171 happy.insert(u);
172 }
173
174 if (!happy.empty() && checkVerticesFwd(g, sad, happy)) {
175 dead_count++;
176 remove_edge(v, v, g);
177 }
178 }
179
180 DEBUG_PRINTF("found %u removable edges.\n", dead_count);
181 return dead_count;
182}
183
184static
185bool checkReportsRev(const NGHolder &g, NFAVertex v,
186 const set<NFAVertex> &happy) {
187 if (g[v].reports.empty()) {
188 return true;
189 }
190
191 assert(edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second);
192
193 /* an edge to accept takes priority over eod only accept */
194 NFAVertex accept = edge(v, g.accept, g).second ? g.accept : g.acceptEod;
195
196 flat_set<ReportID> happy_reports;
197 for (NFAVertex u : happy) {
198 if (edge(u, accept, g).second) {
199 insert(&happy_reports, g[u].reports);
200 }
201 }
202
203 return is_subset_of(g[v].reports, happy_reports);
204}
205
206/** \brief Redundant self-loop removal (reverse version).
207 *
208 * A self loop on a vertex v can be removed if:
209 *
210 * For every vertex u in succ(v) either:
211 * 1: u has a self loop and cr(v) is a subset of cr(u).
212 * OR
213 * 2: u is not an accept and u has an edge from a vertex satisfying
214 * criterion 1.
215 * OR
216 * 3: u is in an accept and u has an edge from a vertex v' satisfying
217 * criterion 1 and report(v) == report(v').
218 */
219static
220bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) {
221 unsigned dead_count = 0;
222
223 set<NFAVertex> happy;
224 set<NFAVertex> sad;
225
226 for (auto v : vertices_range(g)) {
227 if (is_special(v, g) || !hasSelfLoop(v, g)) {
228 continue;
229 }
230
231 const CharReach &cr_v = g[v].char_reach;
232
233 happy.clear();
234 sad.clear();
235
236 for (auto u : adjacent_vertices_range(v, g)) {
237 if (u == v) {
238 continue;
239 }
240
241 if (!hasSelfLoop(u, g)) {
242 sad.insert(u);
243 continue;
244 }
245
246 assert(!is_special(u, g));
247
248 const CharReach &cr_u = g[u].char_reach;
249
250 if (!cr_v.isSubsetOf(cr_u)) {
251 sad.insert(u);
252 continue;
253 }
254
255 happy.insert(u);
256 }
257
258 if (!happy.empty() && checkVerticesRev(g, sad, happy)
259 && checkReportsRev(g, v, happy)) {
260 dead_count++;
261 remove_edge(v, v, g);
262 }
263 }
264
265 DEBUG_PRINTF("found %u removable edges.\n", dead_count);
266 return dead_count;
267}
268
269static
270bool parentsSubsetOf(const NGHolder &g, NFAVertex v,
271 const flat_set<NFAVertex> &other_parents, NFAVertex other,
272 map<NFAVertex, bool> &done) {
273 map<NFAVertex, bool>::const_iterator dit = done.find(v);
274 if (dit != done.end()) {
275 return dit->second;
276 }
277
278 for (auto u : inv_adjacent_vertices_range(v, g)) {
279 if (u == v && contains(other_parents, other)) {
280 continue;
281 }
282
283 if (!contains(other_parents, u)) {
284 done[v] = false;
285 return false;
286 }
287 }
288
289 done[v] = true;
290 return true;
291}
292
293static
294bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src,
295 const flat_set<NFAVertex> &fixed_parents,
296 const NFAEdge &candidate,
297 map<NFAVertex, bool> &done) {
298 NFAVertex w = source(candidate, g);
299 NFAVertex v = target(candidate, g);
300 const CharReach &cr_w = g[w].char_reach;
301 const CharReach &cr_u = g[fixed_src].char_reach;
302
303 /* There is no reason why self loops cannot be considered by this
304 * transformation but the removal is already handled by many other
305 * transformations. */
306 if (w == v) {
307 return false;
308 }
309
310 if (is_special(w, g)) {
311 return false;
312 }
313
314 if (!cr_w.isSubsetOf(cr_u)) {
315 return false;
316 }
317
318 /* check that each parent of w is also a parent of u */
319 if (!parentsSubsetOf(g, w, fixed_parents, fixed_src, done)) {
320 return false;
321 }
322
323 DEBUG_PRINTF("edge (%zu, %zu) killed by edge (%zu, %zu)\n",
324 g[w].index, g[v].index, g[fixed_src].index, g[v].index);
325 return true;
326}
327
328static never_inline
329void checkLargeOutU(const NGHolder &g, NFAVertex u,
330 const flat_set<NFAVertex> &parents_u,
331 flat_set<NFAVertex> &possible_w,
332 map<NFAVertex, bool> &done,
333 set<NFAEdge> *dead) {
334 /* only vertices with at least one parent in common with u need to be
335 * considered, and we also only consider potential siblings with subset
336 * reach. */
337 possible_w.clear();
338 const CharReach &cr_u = g[u].char_reach;
339 for (auto p : parents_u) {
340 for (auto v : adjacent_vertices_range(p, g)) {
341 const CharReach &cr_w = g[v].char_reach;
342 if (cr_w.isSubsetOf(cr_u)) {
343 possible_w.insert(v);
344 }
345 }
346 }
347
348 // If there's only one, it's us, and we have no work to do.
349 if (possible_w.size() <= 1) {
350 assert(possible_w.empty() || *possible_w.begin() == u);
351 return;
352 }
353
354 for (const auto &e : out_edges_range(u, g)) {
355 const NFAVertex v = target(e, g);
356
357 if (is_special(v, g)) {
358 continue;
359 }
360
361 if (contains(*dead, e)) {
362 continue;
363 }
364
365 /* Now need check to find any edges which can be removed due to the
366 * existence of edge e */
367 for (const auto &e2 : in_edges_range(v, g)) {
368 if (e == e2 || contains(*dead, e2)) {
369 continue;
370 }
371
372 const NFAVertex w = source(e2, g);
373 if (!contains(possible_w, w)) {
374 continue;
375 }
376
377 if (checkFwdCandidate(g, u, parents_u, e2, done)) {
378 dead->insert(e2);
379 }
380 }
381 }
382}
383
384static never_inline
385void checkSmallOutU(const NGHolder &g, NFAVertex u,
386 const flat_set<NFAVertex> &parents_u,
387 map<NFAVertex, bool> &done,
388 set<NFAEdge> *dead) {
389 for (const auto &e : out_edges_range(u, g)) {
390 const NFAVertex v = target(e, g);
391
392 if (is_special(v, g)) {
393 continue;
394 }
395
396 if (contains(*dead, e)) {
397 continue;
398 }
399
400 /* Now need check to find any edges which can be removed due to the
401 * existence of edge e */
402 for (const auto &e2 : in_edges_range(v, g)) {
403 if (e == e2 || contains(*dead, e2)) {
404 continue;
405 }
406
407 if (checkFwdCandidate(g, u, parents_u, e2, done)) {
408 dead->insert(e2);
409 }
410 }
411 }
412}
413
414/** \brief Forward edge redundancy pass.
415 *
416 * An edge e from w to v is redundant if there exists an edge e' such that:
417 * e' is from u to v
418 * and: reach(w) is a subset of reach(u)
419 * and: proper_pred(w) is a subset of pred(u)
420 * and: self_loop(w) implies self_loop(u) or edge from (w to u)
421 *
422 * Note: edges to accepts also require report ID checks.
423 */
424static
425bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) {
426 set<NFAEdge> dead;
427 map<NFAVertex, bool> done;
428 flat_set<NFAVertex> parents_u;
429 flat_set<NFAVertex> possible_w;
430
431 for (auto u : vertices_range(g)) {
432 if (ignore_starts && (u == g.startDs || is_virtual_start(u, g))) {
433 continue;
434 }
435
436 parents_u.clear();
437 pred(g, u, &parents_u);
438
439 done.clear();
440 if (out_degree(u, g) > 1) {
441 checkLargeOutU(g, u, parents_u, possible_w, done, &dead);
442 } else {
443 checkSmallOutU(g, u, parents_u, done, &dead);
444 }
445 }
446
447 if (dead.empty()) {
448 return false;
449 }
450
451 DEBUG_PRINTF("found %zu removable non-selfloops.\n", dead.size());
452 remove_edges(dead, g);
453 pruneUseless(g);
454 return true;
455}
456
457/** Entry point: Runs all the edge redundancy passes. If SoM is tracked,
458 * don't consider startDs or virtual starts as cyclic vertices. */
459bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc) {
460 if (!cc.grey.removeEdgeRedundancy) {
461 return false;
462 }
463
464 bool changed = false;
465 changed |= removeEdgeRedundancyNearCyclesFwd(g, som);
466 changed |= removeEdgeRedundancyNearCyclesRev(g);
467 changed |= removeEdgeRedundancyFwd(g, som);
468 return changed;
469}
470
471/** \brief Removes optional stuff from the front of floating patterns, since it's
472 * redundant with startDs.
473 *
474 * For each successor of startDs, remove any in-edges that aren't from either
475 * start or startDs. This allows us to prune redundant vertices at the start of
476 * a pattern:
477 *
478 * /(hat)?stand --> /stand/
479 *
480 */
481bool removeSiblingsOfStartDotStar(NGHolder &g) {
482 vector<NFAEdge> dead;
483
484 for (auto v : adjacent_vertices_range(g.startDs, g)) {
485 DEBUG_PRINTF("checking %zu\n", g[v].index);
486 if (is_special(v, g)) {
487 continue;
488 }
489
490 for (const auto &e : in_edges_range(v, g)) {
491 NFAVertex u = source(e, g);
492 if (is_special(u, g)) {
493 continue;
494 }
495 DEBUG_PRINTF("removing %zu->%zu\n", g[u].index, g[v].index);
496 dead.push_back(e);
497 }
498 }
499
500 if (dead.empty()) {
501 return false;
502 }
503
504 DEBUG_PRINTF("found %zu removable edges.\n", dead.size());
505 remove_edges(dead, g);
506 pruneUseless(g);
507 return true;
508}
509
510/** Removes all edges into virtual starts other than those from start/startDs,
511 * providing there is an edge from startDs. This operation is an optimisation
512 * for SOM mode. (see UE-1544) */
513bool optimiseVirtualStarts(NGHolder &g) {
514 vector<NFAEdge> dead;
515 for (auto v : adjacent_vertices_range(g.startDs, g)) {
516 u32 flags = g[v].assert_flags;
517 if (!(flags & POS_FLAG_VIRTUAL_START)) {
518 continue;
519 }
520
521 for (const auto &e : in_edges_range(v, g)) {
522 if (!is_any_start(source(e, g), g)) {
523 dead.push_back(e);
524 }
525 }
526 }
527
528 if (dead.empty()) {
529 return false;
530 }
531
532 DEBUG_PRINTF("removing %zu edges into virtual starts\n", dead.size());
533 remove_edges(dead, g);
534 pruneUseless(g);
535 return true;
536}
537
538} // namespace ue2
539