1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths
31 * leading to a cyclic repeat.
32 *
33 * This is a graph reduction pass intended to remove vertices that are
34 * redundant because they lead solely to a cyclic vertex with a superset of
35 * their character reachability. For example, in this pattern:
36 *
37 * /(abc|def|abcghi).*0123/s
38 *
39 * The vertices for 'ghi' can be removed due to the presence of the dot-star
40 * repeat.
41 *
42 * Algorithm:
43 *
44 * for each cyclic vertex V:
45 * for each proper predecessor U of V:
46 * let S be the set of successors of U that are successors of V
47 * (including V itself)
48 * for each successor W of U not in S:
49 * perform a DFS forward from W, stopping exploration when a vertex
50 * in S is encountered;
51 * if a vertex with reach not in reach(V) or an accept is encountered:
52 * fail and continue to the next W.
53 * else:
54 * remove (U, W)
55 *
56 * NOTE: the following code is templated not just for fun, but so that we can
57 * run this analysis both forward and in reverse over the graph.
58 */
59#include "ng_cyclic_redundancy.h"
60
61#include "ng_holder.h"
62#include "ng_prune.h"
63#include "ng_util.h"
64#include "util/container.h"
65#include "util/flat_containers.h"
66#include "util/graph_range.h"
67#include "util/graph_small_color_map.h"
68
69#include <algorithm>
70#include <boost/graph/depth_first_search.hpp>
71#include <boost/graph/reverse_graph.hpp>
72
73using namespace std;
74using boost::reverse_graph;
75
76namespace ue2 {
77
78namespace {
79
80// Terminator function for depth first traversal, tells us not to explore
81// beyond vertices in set S.
82template<class Vertex, class Graph>
83class VertexInSet {
84 public:
85 explicit VertexInSet(const flat_set<Vertex> &s) : verts(s) {}
86 bool operator()(const Vertex &v, const Graph&) const {
87 return contains(verts, v);
88 }
89
90 private:
91 const flat_set<Vertex> &verts;
92};
93
94struct SearchFailed {};
95
96// Visitor for depth first traversal, throws an error if we encounter a vertex
97// with bad reach or a report.
98class SearchVisitor : public boost::default_dfs_visitor {
99 public:
100 explicit SearchVisitor(const CharReach &r) : cr(r) {}
101
102 template<class Vertex, class Graph>
103 void discover_vertex(const Vertex &v, const Graph &g) const {
104 DEBUG_PRINTF("vertex %zu\n", g[v].index);
105 if (is_special(v, g)) {
106 DEBUG_PRINTF("start or accept\n");
107 throw SearchFailed();
108 }
109
110 if (g[v].assert_flags) {
111 DEBUG_PRINTF("assert flags\n");
112 throw SearchFailed();
113 }
114
115 const CharReach &vcr = g[v].char_reach;
116 if (vcr != (vcr & cr)) {
117 DEBUG_PRINTF("bad reach\n");
118 throw SearchFailed();
119 }
120 }
121
122 private:
123 const CharReach &cr;
124};
125
126} // namespace
127
128template<class Graph, class ColorMap>
129static
130bool searchForward(const Graph &g, const CharReach &reach,
131 ColorMap &colours,
132 const flat_set<typename Graph::vertex_descriptor> &s,
133 typename Graph::vertex_descriptor w) {
134 colours.fill(small_color::white);
135 try {
136 depth_first_visit(g, w, SearchVisitor(reach), colours,
137 VertexInSet<typename Graph::vertex_descriptor, Graph>(s));
138 } catch (SearchFailed &) {
139 return false;
140 }
141
142 return true;
143}
144
145static
146NFAEdge to_raw(const NFAEdge &e, const NGHolder &) {
147 return e;
148}
149
150static
151NFAEdge to_raw(const reverse_graph<NGHolder, NGHolder &>::edge_descriptor &e,
152 const reverse_graph<NGHolder, NGHolder &> &g) {
153 return get(boost::edge_underlying, g, e);
154}
155
156/* returns true if we did stuff */
157template<class Graph>
158static
159bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
160 NGHolder &raw) {
161 bool did_stuff = false;
162
163 const CharReach &reach = g[v].char_reach;
164
165 typedef typename Graph::vertex_descriptor vertex_descriptor;
166
167 // Colour map used for depth_first_visit().
168 auto colours = make_small_color_map(g);
169
170 // precalc successors of v.
171 flat_set<vertex_descriptor> succ_v;
172 insert(&succ_v, adjacent_vertices(v, g));
173
174 flat_set<vertex_descriptor> s;
175
176 for (const auto &e : in_edges_range(v, g)) {
177 vertex_descriptor u = source(e, g);
178 if (u == v) {
179 continue;
180 }
181 if (is_any_accept(u, g)) {
182 continue;
183 }
184
185 DEBUG_PRINTF("- checking u %zu\n", g[u].index);
186
187 // let s be intersection(succ(u), succ(v))
188 s.clear();
189 for (auto b : adjacent_vertices_range(u, g)) {
190 if (contains(succ_v, b)) {
191 s.insert(b);
192 }
193 }
194
195 for (const auto &e_u : make_vector_from(out_edges(u, g))) {
196 vertex_descriptor w = target(e_u, g);
197 if (is_special(w, g) || contains(s, w)) {
198 continue;
199 }
200
201 const CharReach &w_reach = g[w].char_reach;
202 if (!w_reach.isSubsetOf(reach)) {
203 continue;
204 }
205
206 DEBUG_PRINTF(" - checking w %zu\n", g[w].index);
207
208 if (!searchForward(g, reach, colours, s, w)) {
209 continue;
210 }
211
212 DEBUG_PRINTF("removing edge (%zu,%zu)\n", g[u].index, g[w].index);
213 /* we are currently iterating over the in-edges of v, so it
214 would be unwise to remove edges to v. However, */
215 assert(w != v); /* as v is in s */
216 remove_edge(to_raw(e_u, g), raw);
217 did_stuff = true;
218 }
219 }
220
221 return did_stuff;
222}
223
224template<class Graph>
225static
226bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) {
227 bool did_stuff = false;
228
229 for (auto v : vertices_range(g)) {
230 if (is_special(v, g) || !edge(v, v, g).second) {
231 continue;
232 }
233
234 DEBUG_PRINTF("examining cyclic vertex %zu\n", g[v].index);
235 did_stuff |= removeCyclicPathRedundancy(g, v, raw);
236 }
237
238 return did_stuff;
239}
240
241bool removeCyclicPathRedundancy(NGHolder &g) {
242 assert(hasCorrectlyNumberedVertices(g));
243
244 // Forward pass.
245 bool f_changed = cyclicPathRedundancyPass(g, g);
246 if (f_changed) {
247 DEBUG_PRINTF("edges removed by forward pass\n");
248 pruneUseless(g);
249 }
250
251 // Reverse pass.
252 DEBUG_PRINTF("REVERSE PASS\n");
253 typedef reverse_graph<NGHolder, NGHolder &> RevGraph;
254 RevGraph revg(g);
255 bool r_changed = cyclicPathRedundancyPass(revg, g);
256 if (r_changed) {
257 DEBUG_PRINTF("edges removed by reverse pass\n");
258 pruneUseless(g);
259 }
260
261 return f_changed || r_changed;
262}
263
264} // namespace ue2
265