1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Functions for pruning unreachable vertices or reports from the graph. |
31 | */ |
32 | #include "ng_prune.h" |
33 | |
34 | #include "ng_dominators.h" |
35 | #include "ng_holder.h" |
36 | #include "ng_reports.h" |
37 | #include "ng_util.h" |
38 | #include "util/container.h" |
39 | #include "util/graph.h" |
40 | #include "util/graph_range.h" |
41 | #include "util/graph_small_color_map.h" |
42 | #include "util/report_manager.h" |
43 | |
44 | #include <deque> |
45 | #include <map> |
46 | |
47 | #include <boost/graph/depth_first_search.hpp> |
48 | #include <boost/graph/reverse_graph.hpp> |
49 | |
50 | using namespace std; |
51 | using boost::default_color_type; |
52 | using boost::reverse_graph; |
53 | |
54 | namespace ue2 { |
55 | |
56 | /** Remove any vertices that can't be reached by traversing the graph in |
57 | * reverse from acceptEod. */ |
58 | void pruneUnreachable(NGHolder &g) { |
59 | deque<NFAVertex> dead; |
60 | |
61 | if (in_degree(g.acceptEod, g) == 1 && !in_degree(g.accept, g) |
62 | && edge(g.accept, g.acceptEod, g).second) { |
63 | // Trivial case: there are no in-edges to our accepts (other than |
64 | // accept->acceptEod), so all non-specials are unreachable. |
65 | for (auto v : vertices_range(g)) { |
66 | if (!is_special(v, g)) { |
67 | dead.push_back(v); |
68 | } |
69 | } |
70 | } else { |
71 | // Walk a reverse graph from acceptEod with Boost's depth_first_visit |
72 | // call. |
73 | typedef reverse_graph<NGHolder, NGHolder &> RevNFAGraph; |
74 | RevNFAGraph revg(g); |
75 | |
76 | map<RevNFAGraph::vertex_descriptor, default_color_type> colours; |
77 | |
78 | depth_first_visit(revg, g.acceptEod, |
79 | make_dfs_visitor(boost::null_visitor()), |
80 | make_assoc_property_map(colours)); |
81 | |
82 | DEBUG_PRINTF("color map has %zu entries after DFV\n" , colours.size()); |
83 | |
84 | // All non-special vertices that aren't in the colour map (because they |
85 | // weren't reached) can be removed. |
86 | for (auto v : vertices_range(revg)) { |
87 | if (is_special(v, revg)) { |
88 | continue; |
89 | } |
90 | if (!contains(colours, v)) { |
91 | dead.push_back(v); |
92 | } |
93 | } |
94 | } |
95 | |
96 | if (dead.empty()) { |
97 | DEBUG_PRINTF("no unreachable vertices\n" ); |
98 | return; |
99 | } |
100 | |
101 | remove_vertices(dead, g, false); |
102 | DEBUG_PRINTF("removed %zu unreachable vertices\n" , dead.size()); |
103 | } |
104 | |
105 | template<class nfag_t> |
106 | static |
107 | bool pruneForwardUseless(NGHolder &h, const nfag_t &g, |
108 | typename nfag_t::vertex_descriptor s, |
109 | decltype(make_small_color_map(NGHolder())) &colors) { |
110 | // Begin with all vertices set to white, as DFV only marks visited |
111 | // vertices. |
112 | colors.fill(small_color::white); |
113 | |
114 | depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()), colors); |
115 | |
116 | vector<NFAVertex> dead; |
117 | |
118 | // All non-special vertices that are still white can be removed. |
119 | for (auto v : vertices_range(g)) { |
120 | if (!is_special(v, g) && get(colors, v) == small_color::white) { |
121 | DEBUG_PRINTF("vertex %zu is unreachable from %zu\n" , |
122 | g[v].index, g[s].index); |
123 | dead.push_back(NFAVertex(v)); |
124 | } |
125 | } |
126 | |
127 | if (dead.empty()) { |
128 | return false; |
129 | } |
130 | |
131 | DEBUG_PRINTF("removing %zu vertices\n" , dead.size()); |
132 | remove_vertices(dead, h, false); |
133 | return true; |
134 | } |
135 | |
136 | /** Remove any vertices which can't be reached by traversing the graph forward |
137 | * from start or in reverse from acceptEod. If \p renumber is false, no |
138 | * vertex/edge renumbering is done. */ |
139 | void pruneUseless(NGHolder &g, bool renumber) { |
140 | DEBUG_PRINTF("pruning useless vertices\n" ); |
141 | assert(hasCorrectlyNumberedVertices(g)); |
142 | auto colors = make_small_color_map(g); |
143 | |
144 | bool work_done = pruneForwardUseless(g, g, g.start, colors); |
145 | work_done |= pruneForwardUseless(g, reverse_graph<NGHolder, NGHolder &>(g), |
146 | g.acceptEod, colors); |
147 | |
148 | if (!work_done) { |
149 | return; |
150 | } |
151 | |
152 | if (renumber) { |
153 | renumber_edges(g); |
154 | renumber_vertices(g); |
155 | } |
156 | } |
157 | |
158 | /** This code removes any vertices which do not accept any symbols. Any |
159 | * vertices which no longer lie on a path from a start to an accept are also |
160 | * pruned. */ |
161 | void pruneEmptyVertices(NGHolder &g) { |
162 | DEBUG_PRINTF("pruning empty vertices\n" ); |
163 | vector<NFAVertex> dead; |
164 | for (auto v : vertices_range(g)) { |
165 | if (is_special(v, g)) { |
166 | continue; |
167 | } |
168 | |
169 | const CharReach &cr = g[v].char_reach; |
170 | if (cr.none()) { |
171 | DEBUG_PRINTF("empty: %zu\n" , g[v].index); |
172 | dead.push_back(v); |
173 | } |
174 | } |
175 | |
176 | if (dead.empty()) { |
177 | return; |
178 | } |
179 | |
180 | remove_vertices(dead, g); |
181 | pruneUseless(g); |
182 | } |
183 | |
184 | /** Remove any edges from vertices that generate accepts (for Highlander |
185 | * graphs). */ |
186 | void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) { |
187 | // Safety check: all reports must be simple exhaustible reports, or this is |
188 | // not safe. This optimisation should be called early enough that no |
189 | // internal reports have been added. |
190 | for (auto report_id : all_reports(g)) { |
191 | const Report &ir = rm.getReport(report_id); |
192 | |
193 | if (ir.ekey == INVALID_EKEY || ir.hasBounds() || |
194 | !isExternalReport(ir)) { |
195 | DEBUG_PRINTF("report %u is not external highlander with " |
196 | "no bounds\n" , report_id); |
197 | return; |
198 | } |
199 | } |
200 | |
201 | vector<NFAEdge> dead; |
202 | for (auto u : inv_adjacent_vertices_range(g.accept, g)) { |
203 | if (is_special(u, g)) { |
204 | continue; |
205 | } |
206 | |
207 | // We can prune any out-edges that aren't accepts |
208 | for (const auto &e : out_edges_range(u, g)) { |
209 | if (!is_any_accept(target(e, g), g)) { |
210 | dead.push_back(e); |
211 | } |
212 | } |
213 | } |
214 | |
215 | if (dead.empty()) { |
216 | return; |
217 | } |
218 | |
219 | DEBUG_PRINTF("found %zu removable edges due to single match\n" , dead.size()); |
220 | remove_edges(dead, g); |
221 | pruneUseless(g); |
222 | } |
223 | |
224 | static |
225 | bool isDominatedByReporter(const NGHolder &g, |
226 | const unordered_map<NFAVertex, NFAVertex> &dom, |
227 | NFAVertex v, ReportID report_id) { |
228 | for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) { |
229 | NFAVertex u = it->second; |
230 | // Note: reporters with edges only to acceptEod are not considered to |
231 | // dominate. |
232 | if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) { |
233 | DEBUG_PRINTF("%zu is dominated by %zu, and both report %u\n" , |
234 | g[v].index, g[u].index, report_id); |
235 | return true; |
236 | } |
237 | v = u; |
238 | } |
239 | return false; |
240 | } |
241 | |
242 | /** |
243 | * True if the vertex has (a) a self-loop, (b) only out-edges to accept and |
244 | * itself and (c) only simple exhaustible reports. |
245 | */ |
246 | static |
247 | bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g, |
248 | const ReportManager &rm, |
249 | NFAVertex v) { |
250 | if (!edge(v, v, g).second) { |
251 | return false; |
252 | } |
253 | |
254 | for (auto w : adjacent_vertices_range(v, g)) { |
255 | if (w != v && w != g.accept) { |
256 | return false; |
257 | } |
258 | } |
259 | |
260 | for (const auto &report_id : g[v].reports) { |
261 | if (!isSimpleExhaustible(rm.getReport(report_id))) { |
262 | return false; |
263 | } |
264 | } |
265 | |
266 | return true; |
267 | } |
268 | |
269 | void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) { |
270 | vector<NFAVertex> reporters; |
271 | for (auto v : inv_adjacent_vertices_range(g.accept, g)) { |
272 | for (const auto &report_id : g[v].reports) { |
273 | const Report &r = rm.getReport(report_id); |
274 | if (isSimpleExhaustible(r)) { |
275 | reporters.push_back(v); |
276 | break; |
277 | } |
278 | } |
279 | } |
280 | for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { |
281 | for (const auto &report_id : g[v].reports) { |
282 | const Report &r = rm.getReport(report_id); |
283 | if (isSimpleExhaustible(r)) { |
284 | reporters.push_back(v); |
285 | break; |
286 | } |
287 | } |
288 | } |
289 | |
290 | if (reporters.empty()) { |
291 | return; |
292 | } |
293 | |
294 | |
295 | sort(begin(reporters), end(reporters)); |
296 | reporters.erase(unique(begin(reporters), end(reporters)), end(reporters)); |
297 | |
298 | DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n" , |
299 | reporters.size()); |
300 | |
301 | const auto &dom = findDominators(g); |
302 | bool modified = false; |
303 | |
304 | // If a reporter vertex is dominated by another with the same report, we |
305 | // can remove that report; if all reports are removed, we can remove the |
306 | // vertex entirely. |
307 | for (const auto v : reporters) { |
308 | const auto reports = g[v].reports; // copy, as we're going to mutate |
309 | for (const auto &report_id : reports) { |
310 | if (!isSimpleExhaustible(rm.getReport(report_id))) { |
311 | continue; |
312 | } |
313 | if (isDominatedByReporter(g, dom, v, report_id)) { |
314 | DEBUG_PRINTF("removed dominated report %u from vertex %zu\n" , |
315 | report_id, g[v].index); |
316 | g[v].reports.erase(report_id); |
317 | } |
318 | } |
319 | |
320 | if (g[v].reports.empty()) { |
321 | DEBUG_PRINTF("removed edges to accepts from %zu, no reports left\n" , |
322 | g[v].index); |
323 | remove_edge(v, g.accept, g); |
324 | remove_edge(v, g.acceptEod, g); |
325 | modified = true; |
326 | } |
327 | } |
328 | |
329 | // If a reporter vertex has a self-loop, but otherwise only leads to accept |
330 | // (note: NOT acceptEod) and has simple exhaustible reports, we can delete |
331 | // the self-loop. |
332 | for (const auto v : reporters) { |
333 | if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) { |
334 | remove_edge(v, v, g); |
335 | modified = true; |
336 | DEBUG_PRINTF("removed self-loop on %zu\n" , g[v].index); |
337 | } |
338 | } |
339 | |
340 | if (!modified) { |
341 | return; |
342 | } |
343 | |
344 | pruneUseless(g); |
345 | |
346 | // We may have only removed self-loops, in which case pruneUseless wouldn't |
347 | // renumber, so we do edge renumbering explicitly here. |
348 | renumber_edges(g); |
349 | } |
350 | |
351 | /** Removes the given Report ID from vertices connected to accept, and then |
352 | * prunes useless vertices that have had their report sets reduced to empty. */ |
353 | void pruneReport(NGHolder &g, ReportID report) { |
354 | set<NFAEdge> dead; |
355 | |
356 | for (const auto &e : in_edges_range(g.accept, g)) { |
357 | NFAVertex u = source(e, g); |
358 | auto &reports = g[u].reports; |
359 | if (contains(reports, report)) { |
360 | reports.erase(report); |
361 | if (reports.empty()) { |
362 | dead.insert(e); |
363 | } |
364 | } |
365 | } |
366 | |
367 | for (const auto &e : in_edges_range(g.acceptEod, g)) { |
368 | NFAVertex u = source(e, g); |
369 | if (u == g.accept) { |
370 | continue; |
371 | } |
372 | auto &reports = g[u].reports; |
373 | if (contains(reports, report)) { |
374 | reports.erase(report); |
375 | if (reports.empty()) { |
376 | dead.insert(e); |
377 | } |
378 | } |
379 | } |
380 | |
381 | if (dead.empty()) { |
382 | return; |
383 | } |
384 | |
385 | remove_edges(dead, g); |
386 | pruneUnreachable(g); |
387 | renumber_vertices(g); |
388 | renumber_edges(g); |
389 | } |
390 | |
391 | /** Removes all Report IDs bar the given one from vertices connected to accept, |
392 | * and then prunes useless vertices that have had their report sets reduced to |
393 | * empty. */ |
394 | void pruneAllOtherReports(NGHolder &g, ReportID report) { |
395 | set<NFAEdge> dead; |
396 | |
397 | for (const auto &e : in_edges_range(g.accept, g)) { |
398 | NFAVertex u = source(e, g); |
399 | auto &reports = g[u].reports; |
400 | if (contains(reports, report)) { |
401 | reports.clear(); |
402 | reports.insert(report); |
403 | } else { |
404 | reports.clear(); |
405 | dead.insert(e); |
406 | } |
407 | } |
408 | |
409 | for (const auto &e : in_edges_range(g.acceptEod, g)) { |
410 | NFAVertex u = source(e, g); |
411 | if (u == g.accept) { |
412 | continue; |
413 | } |
414 | auto &reports = g[u].reports; |
415 | if (contains(reports, report)) { |
416 | reports.clear(); |
417 | reports.insert(report); |
418 | } else { |
419 | reports.clear(); |
420 | dead.insert(e); |
421 | } |
422 | } |
423 | |
424 | if (dead.empty()) { |
425 | return; |
426 | } |
427 | |
428 | remove_edges(dead, g); |
429 | pruneUnreachable(g); |
430 | renumber_vertices(g); |
431 | renumber_edges(g); |
432 | } |
433 | |
434 | } // namespace ue2 |
435 | |