| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** \file |
| 30 | * \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths |
| 31 | * leading to a cyclic repeat. |
| 32 | * |
| 33 | * This is a graph reduction pass intended to remove vertices that are |
| 34 | * redundant because they lead solely to a cyclic vertex with a superset of |
| 35 | * their character reachability. For example, in this pattern: |
| 36 | * |
| 37 | * /(abc|def|abcghi).*0123/s |
| 38 | * |
| 39 | * The vertices for 'ghi' can be removed due to the presence of the dot-star |
| 40 | * repeat. |
| 41 | * |
| 42 | * Algorithm: |
| 43 | * |
| 44 | * for each cyclic vertex V: |
| 45 | * for each proper predecessor U of V: |
| 46 | * let S be the set of successors of U that are successors of V |
| 47 | * (including V itself) |
| 48 | * for each successor W of U not in S: |
| 49 | * perform a DFS forward from W, stopping exploration when a vertex |
| 50 | * in S is encountered; |
| 51 | * if a vertex with reach not in reach(V) or an accept is encountered: |
| 52 | * fail and continue to the next W. |
| 53 | * else: |
| 54 | * remove (U, W) |
| 55 | * |
| 56 | * NOTE: the following code is templated not just for fun, but so that we can |
| 57 | * run this analysis both forward and in reverse over the graph. |
| 58 | */ |
| 59 | #include "ng_cyclic_redundancy.h" |
| 60 | |
| 61 | #include "ng_holder.h" |
| 62 | #include "ng_prune.h" |
| 63 | #include "ng_util.h" |
| 64 | #include "util/container.h" |
| 65 | #include "util/flat_containers.h" |
| 66 | #include "util/graph_range.h" |
| 67 | #include "util/graph_small_color_map.h" |
| 68 | |
| 69 | #include <algorithm> |
| 70 | #include <boost/graph/depth_first_search.hpp> |
| 71 | #include <boost/graph/reverse_graph.hpp> |
| 72 | |
| 73 | using namespace std; |
| 74 | using boost::reverse_graph; |
| 75 | |
| 76 | namespace ue2 { |
| 77 | |
| 78 | namespace { |
| 79 | |
| 80 | // Terminator function for depth first traversal, tells us not to explore |
| 81 | // beyond vertices in set S. |
| 82 | template<class Vertex, class Graph> |
| 83 | class VertexInSet { |
| 84 | public: |
| 85 | explicit VertexInSet(const flat_set<Vertex> &s) : verts(s) {} |
| 86 | bool operator()(const Vertex &v, const Graph&) const { |
| 87 | return contains(verts, v); |
| 88 | } |
| 89 | |
| 90 | private: |
| 91 | const flat_set<Vertex> &verts; |
| 92 | }; |
| 93 | |
| 94 | struct SearchFailed {}; |
| 95 | |
| 96 | // Visitor for depth first traversal, throws an error if we encounter a vertex |
| 97 | // with bad reach or a report. |
| 98 | class SearchVisitor : public boost::default_dfs_visitor { |
| 99 | public: |
| 100 | explicit SearchVisitor(const CharReach &r) : cr(r) {} |
| 101 | |
| 102 | template<class Vertex, class Graph> |
| 103 | void discover_vertex(const Vertex &v, const Graph &g) const { |
| 104 | DEBUG_PRINTF("vertex %zu\n" , g[v].index); |
| 105 | if (is_special(v, g)) { |
| 106 | DEBUG_PRINTF("start or accept\n" ); |
| 107 | throw SearchFailed(); |
| 108 | } |
| 109 | |
| 110 | if (g[v].assert_flags) { |
| 111 | DEBUG_PRINTF("assert flags\n" ); |
| 112 | throw SearchFailed(); |
| 113 | } |
| 114 | |
| 115 | const CharReach &vcr = g[v].char_reach; |
| 116 | if (vcr != (vcr & cr)) { |
| 117 | DEBUG_PRINTF("bad reach\n" ); |
| 118 | throw SearchFailed(); |
| 119 | } |
| 120 | } |
| 121 | |
| 122 | private: |
| 123 | const CharReach &cr; |
| 124 | }; |
| 125 | |
| 126 | } // namespace |
| 127 | |
| 128 | template<class Graph, class ColorMap> |
| 129 | static |
| 130 | bool searchForward(const Graph &g, const CharReach &reach, |
| 131 | ColorMap &colours, |
| 132 | const flat_set<typename Graph::vertex_descriptor> &s, |
| 133 | typename Graph::vertex_descriptor w) { |
| 134 | colours.fill(small_color::white); |
| 135 | try { |
| 136 | depth_first_visit(g, w, SearchVisitor(reach), colours, |
| 137 | VertexInSet<typename Graph::vertex_descriptor, Graph>(s)); |
| 138 | } catch (SearchFailed &) { |
| 139 | return false; |
| 140 | } |
| 141 | |
| 142 | return true; |
| 143 | } |
| 144 | |
| 145 | static |
| 146 | NFAEdge to_raw(const NFAEdge &e, const NGHolder &) { |
| 147 | return e; |
| 148 | } |
| 149 | |
| 150 | static |
| 151 | NFAEdge to_raw(const reverse_graph<NGHolder, NGHolder &>::edge_descriptor &e, |
| 152 | const reverse_graph<NGHolder, NGHolder &> &g) { |
| 153 | return get(boost::edge_underlying, g, e); |
| 154 | } |
| 155 | |
| 156 | /* returns true if we did stuff */ |
| 157 | template<class Graph> |
| 158 | static |
| 159 | bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v, |
| 160 | NGHolder &raw) { |
| 161 | bool did_stuff = false; |
| 162 | |
| 163 | const CharReach &reach = g[v].char_reach; |
| 164 | |
| 165 | typedef typename Graph::vertex_descriptor vertex_descriptor; |
| 166 | |
| 167 | // Colour map used for depth_first_visit(). |
| 168 | auto colours = make_small_color_map(g); |
| 169 | |
| 170 | // precalc successors of v. |
| 171 | flat_set<vertex_descriptor> succ_v; |
| 172 | insert(&succ_v, adjacent_vertices(v, g)); |
| 173 | |
| 174 | flat_set<vertex_descriptor> s; |
| 175 | |
| 176 | for (const auto &e : in_edges_range(v, g)) { |
| 177 | vertex_descriptor u = source(e, g); |
| 178 | if (u == v) { |
| 179 | continue; |
| 180 | } |
| 181 | if (is_any_accept(u, g)) { |
| 182 | continue; |
| 183 | } |
| 184 | |
| 185 | DEBUG_PRINTF("- checking u %zu\n" , g[u].index); |
| 186 | |
| 187 | // let s be intersection(succ(u), succ(v)) |
| 188 | s.clear(); |
| 189 | for (auto b : adjacent_vertices_range(u, g)) { |
| 190 | if (contains(succ_v, b)) { |
| 191 | s.insert(b); |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | for (const auto &e_u : make_vector_from(out_edges(u, g))) { |
| 196 | vertex_descriptor w = target(e_u, g); |
| 197 | if (is_special(w, g) || contains(s, w)) { |
| 198 | continue; |
| 199 | } |
| 200 | |
| 201 | const CharReach &w_reach = g[w].char_reach; |
| 202 | if (!w_reach.isSubsetOf(reach)) { |
| 203 | continue; |
| 204 | } |
| 205 | |
| 206 | DEBUG_PRINTF(" - checking w %zu\n" , g[w].index); |
| 207 | |
| 208 | if (!searchForward(g, reach, colours, s, w)) { |
| 209 | continue; |
| 210 | } |
| 211 | |
| 212 | DEBUG_PRINTF("removing edge (%zu,%zu)\n" , g[u].index, g[w].index); |
| 213 | /* we are currently iterating over the in-edges of v, so it |
| 214 | would be unwise to remove edges to v. However, */ |
| 215 | assert(w != v); /* as v is in s */ |
| 216 | remove_edge(to_raw(e_u, g), raw); |
| 217 | did_stuff = true; |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | return did_stuff; |
| 222 | } |
| 223 | |
| 224 | template<class Graph> |
| 225 | static |
| 226 | bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) { |
| 227 | bool did_stuff = false; |
| 228 | |
| 229 | for (auto v : vertices_range(g)) { |
| 230 | if (is_special(v, g) || !edge(v, v, g).second) { |
| 231 | continue; |
| 232 | } |
| 233 | |
| 234 | DEBUG_PRINTF("examining cyclic vertex %zu\n" , g[v].index); |
| 235 | did_stuff |= removeCyclicPathRedundancy(g, v, raw); |
| 236 | } |
| 237 | |
| 238 | return did_stuff; |
| 239 | } |
| 240 | |
| 241 | bool removeCyclicPathRedundancy(NGHolder &g) { |
| 242 | assert(hasCorrectlyNumberedVertices(g)); |
| 243 | |
| 244 | // Forward pass. |
| 245 | bool f_changed = cyclicPathRedundancyPass(g, g); |
| 246 | if (f_changed) { |
| 247 | DEBUG_PRINTF("edges removed by forward pass\n" ); |
| 248 | pruneUseless(g); |
| 249 | } |
| 250 | |
| 251 | // Reverse pass. |
| 252 | DEBUG_PRINTF("REVERSE PASS\n" ); |
| 253 | typedef reverse_graph<NGHolder, NGHolder &> RevGraph; |
| 254 | RevGraph revg(g); |
| 255 | bool r_changed = cyclicPathRedundancyPass(revg, g); |
| 256 | if (r_changed) { |
| 257 | DEBUG_PRINTF("edges removed by reverse pass\n" ); |
| 258 | pruneUseless(g); |
| 259 | } |
| 260 | |
| 261 | return f_changed || r_changed; |
| 262 | } |
| 263 | |
| 264 | } // namespace ue2 |
| 265 | |