1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Functions for pruning unreachable vertices or reports from the graph.
31 */
32#include "ng_prune.h"
33
34#include "ng_dominators.h"
35#include "ng_holder.h"
36#include "ng_reports.h"
37#include "ng_util.h"
38#include "util/container.h"
39#include "util/graph.h"
40#include "util/graph_range.h"
41#include "util/graph_small_color_map.h"
42#include "util/report_manager.h"
43
44#include <deque>
45#include <map>
46
47#include <boost/graph/depth_first_search.hpp>
48#include <boost/graph/reverse_graph.hpp>
49
50using namespace std;
51using boost::default_color_type;
52using boost::reverse_graph;
53
54namespace ue2 {
55
56/** Remove any vertices that can't be reached by traversing the graph in
57 * reverse from acceptEod. */
58void pruneUnreachable(NGHolder &g) {
59 deque<NFAVertex> dead;
60
61 if (in_degree(g.acceptEod, g) == 1 && !in_degree(g.accept, g)
62 && edge(g.accept, g.acceptEod, g).second) {
63 // Trivial case: there are no in-edges to our accepts (other than
64 // accept->acceptEod), so all non-specials are unreachable.
65 for (auto v : vertices_range(g)) {
66 if (!is_special(v, g)) {
67 dead.push_back(v);
68 }
69 }
70 } else {
71 // Walk a reverse graph from acceptEod with Boost's depth_first_visit
72 // call.
73 typedef reverse_graph<NGHolder, NGHolder &> RevNFAGraph;
74 RevNFAGraph revg(g);
75
76 map<RevNFAGraph::vertex_descriptor, default_color_type> colours;
77
78 depth_first_visit(revg, g.acceptEod,
79 make_dfs_visitor(boost::null_visitor()),
80 make_assoc_property_map(colours));
81
82 DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size());
83
84 // All non-special vertices that aren't in the colour map (because they
85 // weren't reached) can be removed.
86 for (auto v : vertices_range(revg)) {
87 if (is_special(v, revg)) {
88 continue;
89 }
90 if (!contains(colours, v)) {
91 dead.push_back(v);
92 }
93 }
94 }
95
96 if (dead.empty()) {
97 DEBUG_PRINTF("no unreachable vertices\n");
98 return;
99 }
100
101 remove_vertices(dead, g, false);
102 DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size());
103}
104
105template<class nfag_t>
106static
107bool pruneForwardUseless(NGHolder &h, const nfag_t &g,
108 typename nfag_t::vertex_descriptor s,
109 decltype(make_small_color_map(NGHolder())) &colors) {
110 // Begin with all vertices set to white, as DFV only marks visited
111 // vertices.
112 colors.fill(small_color::white);
113
114 depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), colors);
115
116 vector<NFAVertex> dead;
117
118 // All non-special vertices that are still white can be removed.
119 for (auto v : vertices_range(g)) {
120 if (!is_special(v, g) && get(colors, v) == small_color::white) {
121 DEBUG_PRINTF("vertex %zu is unreachable from %zu\n",
122 g[v].index, g[s].index);
123 dead.push_back(NFAVertex(v));
124 }
125 }
126
127 if (dead.empty()) {
128 return false;
129 }
130
131 DEBUG_PRINTF("removing %zu vertices\n", dead.size());
132 remove_vertices(dead, h, false);
133 return true;
134}
135
136/** Remove any vertices which can't be reached by traversing the graph forward
137 * from start or in reverse from acceptEod. If \p renumber is false, no
138 * vertex/edge renumbering is done. */
139void pruneUseless(NGHolder &g, bool renumber) {
140 DEBUG_PRINTF("pruning useless vertices\n");
141 assert(hasCorrectlyNumberedVertices(g));
142 auto colors = make_small_color_map(g);
143
144 bool work_done = pruneForwardUseless(g, g, g.start, colors);
145 work_done |= pruneForwardUseless(g, reverse_graph<NGHolder, NGHolder &>(g),
146 g.acceptEod, colors);
147
148 if (!work_done) {
149 return;
150 }
151
152 if (renumber) {
153 renumber_edges(g);
154 renumber_vertices(g);
155 }
156}
157
158/** This code removes any vertices which do not accept any symbols. Any
159 * vertices which no longer lie on a path from a start to an accept are also
160 * pruned. */
161void pruneEmptyVertices(NGHolder &g) {
162 DEBUG_PRINTF("pruning empty vertices\n");
163 vector<NFAVertex> dead;
164 for (auto v : vertices_range(g)) {
165 if (is_special(v, g)) {
166 continue;
167 }
168
169 const CharReach &cr = g[v].char_reach;
170 if (cr.none()) {
171 DEBUG_PRINTF("empty: %zu\n", g[v].index);
172 dead.push_back(v);
173 }
174 }
175
176 if (dead.empty()) {
177 return;
178 }
179
180 remove_vertices(dead, g);
181 pruneUseless(g);
182}
183
184/** Remove any edges from vertices that generate accepts (for Highlander
185 * graphs). */
186void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
187 // Safety check: all reports must be simple exhaustible reports, or this is
188 // not safe. This optimisation should be called early enough that no
189 // internal reports have been added.
190 for (auto report_id : all_reports(g)) {
191 const Report &ir = rm.getReport(report_id);
192
193 if (ir.ekey == INVALID_EKEY || ir.hasBounds() ||
194 !isExternalReport(ir)) {
195 DEBUG_PRINTF("report %u is not external highlander with "
196 "no bounds\n", report_id);
197 return;
198 }
199 }
200
201 vector<NFAEdge> dead;
202 for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
203 if (is_special(u, g)) {
204 continue;
205 }
206
207 // We can prune any out-edges that aren't accepts
208 for (const auto &e : out_edges_range(u, g)) {
209 if (!is_any_accept(target(e, g), g)) {
210 dead.push_back(e);
211 }
212 }
213 }
214
215 if (dead.empty()) {
216 return;
217 }
218
219 DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size());
220 remove_edges(dead, g);
221 pruneUseless(g);
222}
223
224static
225bool isDominatedByReporter(const NGHolder &g,
226 const unordered_map<NFAVertex, NFAVertex> &dom,
227 NFAVertex v, ReportID report_id) {
228 for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) {
229 NFAVertex u = it->second;
230 // Note: reporters with edges only to acceptEod are not considered to
231 // dominate.
232 if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) {
233 DEBUG_PRINTF("%zu is dominated by %zu, and both report %u\n",
234 g[v].index, g[u].index, report_id);
235 return true;
236 }
237 v = u;
238 }
239 return false;
240}
241
242/**
243 * True if the vertex has (a) a self-loop, (b) only out-edges to accept and
244 * itself and (c) only simple exhaustible reports.
245 */
246static
247bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g,
248 const ReportManager &rm,
249 NFAVertex v) {
250 if (!edge(v, v, g).second) {
251 return false;
252 }
253
254 for (auto w : adjacent_vertices_range(v, g)) {
255 if (w != v && w != g.accept) {
256 return false;
257 }
258 }
259
260 for (const auto &report_id : g[v].reports) {
261 if (!isSimpleExhaustible(rm.getReport(report_id))) {
262 return false;
263 }
264 }
265
266 return true;
267}
268
269void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) {
270 vector<NFAVertex> reporters;
271 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
272 for (const auto &report_id : g[v].reports) {
273 const Report &r = rm.getReport(report_id);
274 if (isSimpleExhaustible(r)) {
275 reporters.push_back(v);
276 break;
277 }
278 }
279 }
280 for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
281 for (const auto &report_id : g[v].reports) {
282 const Report &r = rm.getReport(report_id);
283 if (isSimpleExhaustible(r)) {
284 reporters.push_back(v);
285 break;
286 }
287 }
288 }
289
290 if (reporters.empty()) {
291 return;
292 }
293
294
295 sort(begin(reporters), end(reporters));
296 reporters.erase(unique(begin(reporters), end(reporters)), end(reporters));
297
298 DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n",
299 reporters.size());
300
301 const auto &dom = findDominators(g);
302 bool modified = false;
303
304 // If a reporter vertex is dominated by another with the same report, we
305 // can remove that report; if all reports are removed, we can remove the
306 // vertex entirely.
307 for (const auto v : reporters) {
308 const auto reports = g[v].reports; // copy, as we're going to mutate
309 for (const auto &report_id : reports) {
310 if (!isSimpleExhaustible(rm.getReport(report_id))) {
311 continue;
312 }
313 if (isDominatedByReporter(g, dom, v, report_id)) {
314 DEBUG_PRINTF("removed dominated report %u from vertex %zu\n",
315 report_id, g[v].index);
316 g[v].reports.erase(report_id);
317 }
318 }
319
320 if (g[v].reports.empty()) {
321 DEBUG_PRINTF("removed edges to accepts from %zu, no reports left\n",
322 g[v].index);
323 remove_edge(v, g.accept, g);
324 remove_edge(v, g.acceptEod, g);
325 modified = true;
326 }
327 }
328
329 // If a reporter vertex has a self-loop, but otherwise only leads to accept
330 // (note: NOT acceptEod) and has simple exhaustible reports, we can delete
331 // the self-loop.
332 for (const auto v : reporters) {
333 if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) {
334 remove_edge(v, v, g);
335 modified = true;
336 DEBUG_PRINTF("removed self-loop on %zu\n", g[v].index);
337 }
338 }
339
340 if (!modified) {
341 return;
342 }
343
344 pruneUseless(g);
345
346 // We may have only removed self-loops, in which case pruneUseless wouldn't
347 // renumber, so we do edge renumbering explicitly here.
348 renumber_edges(g);
349}
350
351/** Removes the given Report ID from vertices connected to accept, and then
352 * prunes useless vertices that have had their report sets reduced to empty. */
353void pruneReport(NGHolder &g, ReportID report) {
354 set<NFAEdge> dead;
355
356 for (const auto &e : in_edges_range(g.accept, g)) {
357 NFAVertex u = source(e, g);
358 auto &reports = g[u].reports;
359 if (contains(reports, report)) {
360 reports.erase(report);
361 if (reports.empty()) {
362 dead.insert(e);
363 }
364 }
365 }
366
367 for (const auto &e : in_edges_range(g.acceptEod, g)) {
368 NFAVertex u = source(e, g);
369 if (u == g.accept) {
370 continue;
371 }
372 auto &reports = g[u].reports;
373 if (contains(reports, report)) {
374 reports.erase(report);
375 if (reports.empty()) {
376 dead.insert(e);
377 }
378 }
379 }
380
381 if (dead.empty()) {
382 return;
383 }
384
385 remove_edges(dead, g);
386 pruneUnreachable(g);
387 renumber_vertices(g);
388 renumber_edges(g);
389}
390
391/** Removes all Report IDs bar the given one from vertices connected to accept,
392 * and then prunes useless vertices that have had their report sets reduced to
393 * empty. */
394void pruneAllOtherReports(NGHolder &g, ReportID report) {
395 set<NFAEdge> dead;
396
397 for (const auto &e : in_edges_range(g.accept, g)) {
398 NFAVertex u = source(e, g);
399 auto &reports = g[u].reports;
400 if (contains(reports, report)) {
401 reports.clear();
402 reports.insert(report);
403 } else {
404 reports.clear();
405 dead.insert(e);
406 }
407 }
408
409 for (const auto &e : in_edges_range(g.acceptEod, g)) {
410 NFAVertex u = source(e, g);
411 if (u == g.accept) {
412 continue;
413 }
414 auto &reports = g[u].reports;
415 if (contains(reports, report)) {
416 reports.clear();
417 reports.insert(report);
418 } else {
419 reports.clear();
420 dead.insert(e);
421 }
422 }
423
424 if (dead.empty()) {
425 return;
426 }
427
428 remove_edges(dead, g);
429 pruneUnreachable(g);
430 renumber_vertices(g);
431 renumber_edges(g);
432}
433
434} // namespace ue2
435