| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "rose_in_util.h" |
| 30 | |
| 31 | #include "rose_build_util.h" |
| 32 | #include "nfa/goughcompile.h" |
| 33 | #include "nfagraph/ng_depth.h" |
| 34 | #include "nfagraph/ng_util.h" |
| 35 | #include "nfagraph/ng_width.h" |
| 36 | #include "util/container.h" |
| 37 | #include "util/graph_range.h" |
| 38 | #include "util/make_unique.h" |
| 39 | |
| 40 | #include <vector> |
| 41 | |
| 42 | #include <boost/graph/copy.hpp> |
| 43 | #include <boost/graph/reverse_graph.hpp> |
| 44 | #include <boost/graph/topological_sort.hpp> |
| 45 | |
| 46 | using namespace std; |
| 47 | |
| 48 | namespace ue2 { |
| 49 | |
| 50 | /* Returns a topological ordering of the vertices in g. That is the starts are |
| 51 | * at the front and all the predecessors of a vertex occur earlier in the list |
| 52 | * than the vertex. */ |
| 53 | vector<RoseInVertex> topo_order(const RoseInGraph &g) { |
| 54 | assert(hasCorrectlyNumberedVertices(g)); |
| 55 | vector<RoseInVertex> v_order; |
| 56 | v_order.reserve(num_vertices(g)); |
| 57 | |
| 58 | boost::topological_sort(g, back_inserter(v_order)); |
| 59 | |
| 60 | reverse(v_order.begin(), v_order.end()); /* put starts at the front */ |
| 61 | |
| 62 | return v_order; |
| 63 | } |
| 64 | |
| 65 | namespace { |
| 66 | struct RoseEdgeCopier { |
| 67 | typedef unordered_map<const NGHolder *, shared_ptr<NGHolder>> GraphMap; |
| 68 | typedef unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> HaigMap; |
| 69 | |
| 70 | RoseEdgeCopier(const RoseInGraph &g1, RoseInGraph &g2, |
| 71 | const GraphMap &graph_map_in, const HaigMap &haig_map_in) |
| 72 | : ig(g1), out(g2), graph_map(graph_map_in), haig_map(haig_map_in) {} |
| 73 | |
| 74 | void operator()(const RoseInEdge &e1, RoseInEdge &e2) { |
| 75 | // Clone all properties. |
| 76 | put(boost::edge_all, out, e2, get(boost::edge_all, ig, e1)); |
| 77 | // Substitute in cloned graphs. |
| 78 | if (ig[e1].graph) { |
| 79 | out[e2].graph = graph_map.at(ig[e1].graph.get()); |
| 80 | } |
| 81 | if (ig[e1].haig) { |
| 82 | out[e2].haig = haig_map.at(ig[e1].haig.get()); |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | private: |
| 87 | const RoseInGraph &ig; |
| 88 | RoseInGraph &out; |
| 89 | const GraphMap &graph_map; |
| 90 | const HaigMap &haig_map; |
| 91 | }; |
| 92 | } |
| 93 | |
| 94 | unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig) { |
| 95 | assert(hasCorrectlyNumberedVertices(ig)); |
| 96 | unique_ptr<RoseInGraph> out = make_unique<RoseInGraph>(); |
| 97 | |
| 98 | unordered_map<const NGHolder *, shared_ptr<NGHolder>> graph_map; |
| 99 | unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> haig_map; |
| 100 | |
| 101 | for (const auto &e : edges_range(ig)) { |
| 102 | const RoseInEdgeProps &ep = ig[e]; |
| 103 | if (ep.graph && !contains(graph_map, ep.graph.get())) { |
| 104 | graph_map[ep.graph.get()] = cloneHolder(*ep.graph); |
| 105 | } |
| 106 | if (ep.haig && !contains(haig_map, ep.haig.get())) { |
| 107 | haig_map[ep.haig.get()] = make_shared<raw_som_dfa>(*ep.haig); |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | copy_graph(ig, *out, |
| 112 | boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map))); |
| 113 | return out; |
| 114 | } |
| 115 | |
| 116 | void calcVertexOffsets(RoseInGraph &g) { |
| 117 | vector<RoseInVertex> v_order = topo_order(g); |
| 118 | |
| 119 | for (RoseInVertex v : v_order) { |
| 120 | if (g[v].type == RIV_START) { |
| 121 | g[v].min_offset = 0; |
| 122 | g[v].max_offset = ROSE_BOUND_INF; |
| 123 | continue; |
| 124 | } else if (g[v].type == RIV_ANCHORED_START) { |
| 125 | g[v].min_offset = 0; |
| 126 | g[v].max_offset = 0; |
| 127 | continue; |
| 128 | } |
| 129 | |
| 130 | DEBUG_PRINTF("vertex '%s'\n" , dumpString(g[v].s).c_str()); |
| 131 | |
| 132 | // Min and max predecessor depths. |
| 133 | u32 min_d = ROSE_BOUND_INF; |
| 134 | u32 max_d = 0; |
| 135 | |
| 136 | for (const auto &e : in_edges_range(v, g)) { |
| 137 | RoseInVertex u = source(e, g); |
| 138 | u32 e_min = g[u].min_offset; |
| 139 | u32 e_max = g[u].max_offset; |
| 140 | |
| 141 | DEBUG_PRINTF("in-edge from u with offsets [%u,%u]\n" , e_min, e_max); |
| 142 | |
| 143 | if (g[e].graph) { |
| 144 | const NGHolder &h = *g[e].graph; |
| 145 | depth g_min_width = findMinWidth(h); |
| 146 | depth g_max_width = |
| 147 | isAnchored(h) ? findMaxWidth(h) : depth::infinity(); |
| 148 | u32 graph_lag = g[e].graph_lag; |
| 149 | |
| 150 | DEBUG_PRINTF("edge has graph, depths [%s,%s] and lag %u\n" , |
| 151 | g_min_width.str().c_str(), |
| 152 | g_max_width.str().c_str(), graph_lag); |
| 153 | g_min_width += graph_lag; |
| 154 | g_max_width += graph_lag; |
| 155 | e_min = add_rose_depth(e_min, g_min_width); |
| 156 | if (g_max_width.is_finite()) { |
| 157 | e_max = add_rose_depth(e_max, g_max_width); |
| 158 | } else { |
| 159 | e_max = ROSE_BOUND_INF; |
| 160 | } |
| 161 | } else { |
| 162 | DEBUG_PRINTF("edge has bounds [%u,%u]\n" , g[e].minBound, |
| 163 | g[e].maxBound); |
| 164 | e_min = add_rose_depth(e_min, g[e].minBound); |
| 165 | e_max = add_rose_depth(e_max, g[e].maxBound); |
| 166 | if (g[v].type == RIV_LITERAL) { |
| 167 | u32 len = g[v].s.length(); |
| 168 | DEBUG_PRINTF("lit len %u\n" , len); |
| 169 | e_min = add_rose_depth(e_min, len); |
| 170 | e_max = add_rose_depth(e_max, len); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | min_d = min(min_d, e_min); |
| 175 | max_d = max(max_d, e_max); |
| 176 | } |
| 177 | |
| 178 | DEBUG_PRINTF("vertex depths [%u,%u]\n" , min_d, max_d); |
| 179 | |
| 180 | assert(max_d >= min_d); |
| 181 | g[v].min_offset = min_d; |
| 182 | g[v].max_offset = max_d; |
| 183 | } |
| 184 | |
| 185 | // It's possible that we may have literal delays assigned to vertices here |
| 186 | // as well. If so, these need to be added to the min/max offsets. |
| 187 | for (RoseInVertex v : v_order) { |
| 188 | const u32 delay = g[v].delay; |
| 189 | g[v].min_offset = add_rose_depth(g[v].min_offset, delay); |
| 190 | g[v].max_offset = add_rose_depth(g[v].max_offset, delay); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e) { |
| 195 | RoseInVertex u = source(e, in); |
| 196 | RoseInVertex v = target(e, in); |
| 197 | |
| 198 | bool start = in[u].type == RIV_START || in[u].type == RIV_ANCHORED_START; |
| 199 | bool end = in[v].type == RIV_ACCEPT || in[v].type == RIV_ACCEPT_EOD; |
| 200 | |
| 201 | if (start && !end) { |
| 202 | return NFA_PREFIX; |
| 203 | } else if (!start && end) { |
| 204 | return NFA_SUFFIX; |
| 205 | } else if (!start && !end) { |
| 206 | return NFA_INFIX; |
| 207 | } else { |
| 208 | assert(in[v].type == RIV_ACCEPT_EOD); |
| 209 | return NFA_OUTFIX; |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | void pruneUseless(RoseInGraph &g) { |
| 214 | DEBUG_PRINTF("pruning useless vertices\n" ); |
| 215 | |
| 216 | set<RoseInVertex> dead; |
| 217 | RoseInVertex dummy_start |
| 218 | = add_vertex(RoseInVertexProps::makeStart(true), g); |
| 219 | RoseInVertex dummy_end |
| 220 | = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g); |
| 221 | dead.insert(dummy_start); |
| 222 | dead.insert(dummy_end); |
| 223 | for (auto v : vertices_range(g)) { |
| 224 | if (v == dummy_start || v == dummy_end) { |
| 225 | continue; |
| 226 | } |
| 227 | switch (g[v].type) { |
| 228 | case RIV_ANCHORED_START: |
| 229 | case RIV_START: |
| 230 | add_edge(dummy_start, v, g); |
| 231 | break; |
| 232 | case RIV_ACCEPT: |
| 233 | case RIV_ACCEPT_EOD: |
| 234 | add_edge(v, dummy_end, g); |
| 235 | break; |
| 236 | default: |
| 237 | break; |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | find_unreachable(g, vector<RoseInVertex>(1, dummy_start), &dead); |
| 242 | find_unreachable(boost::reverse_graph<RoseInGraph, RoseInGraph &>(g), |
| 243 | vector<RoseInVertex>(1, dummy_end), &dead); |
| 244 | |
| 245 | for (auto v : dead) { |
| 246 | clear_vertex(v, g); |
| 247 | remove_vertex(v, g); |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | } |
| 252 | |