| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** \file |
| 30 | * \brief Miscellaneous NFA graph utilities. |
| 31 | */ |
| 32 | #include "ng_util.h" |
| 33 | |
| 34 | #include "grey.h" |
| 35 | #include "ng_dump.h" |
| 36 | #include "ng_prune.h" |
| 37 | #include "ue2common.h" |
| 38 | #include "nfa/limex_limits.h" // for NFA_MAX_TOP_MASKS. |
| 39 | #include "parser/position.h" |
| 40 | #include "util/graph_range.h" |
| 41 | #include "util/graph_small_color_map.h" |
| 42 | #include "util/make_unique.h" |
| 43 | #include "util/order_check.h" |
| 44 | #include "util/ue2string.h" |
| 45 | #include "util/report_manager.h" |
| 46 | |
| 47 | #include <limits> |
| 48 | #include <map> |
| 49 | #include <set> |
| 50 | #include <unordered_map> |
| 51 | #include <unordered_set> |
| 52 | |
| 53 | #include <boost/graph/filtered_graph.hpp> |
| 54 | #include <boost/graph/topological_sort.hpp> |
| 55 | #include <boost/range/adaptor/map.hpp> |
| 56 | |
| 57 | using namespace std; |
| 58 | using boost::make_filtered_graph; |
| 59 | using boost::make_assoc_property_map; |
| 60 | |
| 61 | namespace ue2 { |
| 62 | |
| 63 | NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex a) { |
| 64 | assert(a != NGHolder::null_vertex()); |
| 65 | |
| 66 | NGHolder::out_edge_iterator ii, iie; |
| 67 | tie(ii, iie) = out_edges(a, g); |
| 68 | if (ii == iie) { |
| 69 | return NGHolder::null_vertex(); |
| 70 | } |
| 71 | NFAVertex b = target(*ii, g); |
| 72 | if (a == b) { |
| 73 | ++ii; |
| 74 | if (ii == iie) { |
| 75 | return NGHolder::null_vertex(); |
| 76 | } |
| 77 | |
| 78 | b = target(*ii, g); |
| 79 | if (++ii != iie) { |
| 80 | return NGHolder::null_vertex(); |
| 81 | } |
| 82 | } else if (++ii != iie && (target(*ii, g) != a || ++ii != iie)) { |
| 83 | return NGHolder::null_vertex(); |
| 84 | } |
| 85 | |
| 86 | assert(a != b); |
| 87 | return b; |
| 88 | } |
| 89 | |
| 90 | NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex a) { |
| 91 | assert(a != NGHolder::null_vertex()); |
| 92 | |
| 93 | u32 idegree = in_degree(a, g); |
| 94 | if (idegree != 1 && !(idegree == 2 && hasSelfLoop(a, g))) { |
| 95 | return NGHolder::null_vertex(); |
| 96 | } |
| 97 | |
| 98 | NGHolder::in_edge_iterator ii, iie; |
| 99 | tie(ii, iie) = in_edges(a, g); |
| 100 | if (ii == iie) { |
| 101 | return NGHolder::null_vertex(); |
| 102 | } |
| 103 | NFAVertex b = source(*ii, g); |
| 104 | if (a == b) { |
| 105 | ++ii; |
| 106 | if (ii == iie) { |
| 107 | return NGHolder::null_vertex(); |
| 108 | } |
| 109 | |
| 110 | b = source(*ii, g); |
| 111 | } |
| 112 | |
| 113 | assert(a != b); |
| 114 | return b; |
| 115 | } |
| 116 | |
| 117 | NFAVertex clone_vertex(NGHolder &g, NFAVertex v) { |
| 118 | NFAVertex clone = add_vertex(g); |
| 119 | u32 idx = g[clone].index; |
| 120 | g[clone] = g[v]; |
| 121 | g[clone].index = idx; |
| 122 | |
| 123 | return clone; |
| 124 | } |
| 125 | |
| 126 | void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest) { |
| 127 | for (const auto &e : out_edges_range(source, g)) { |
| 128 | NFAVertex t = target(e, g); |
| 129 | if (edge(dest, t, g).second) { |
| 130 | continue; |
| 131 | } |
| 132 | NFAEdge clone = add_edge(dest, t, g); |
| 133 | u32 idx = g[clone].index; |
| 134 | g[clone] = g[e]; |
| 135 | g[clone].index = idx; |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | void clone_in_edges(NGHolder &g, NFAVertex s, NFAVertex dest) { |
| 140 | for (const auto &e : in_edges_range(s, g)) { |
| 141 | NFAVertex ss = source(e, g); |
| 142 | assert(!edge(ss, dest, g).second); |
| 143 | NFAEdge clone = add_edge(ss, dest, g); |
| 144 | u32 idx = g[clone].index; |
| 145 | g[clone] = g[e]; |
| 146 | g[clone].index = idx; |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | bool onlyOneTop(const NGHolder &g) { |
| 151 | return getTops(g).size() == 1; |
| 152 | } |
| 153 | |
| 154 | namespace { |
| 155 | struct CycleFound {}; |
| 156 | struct DetectCycles : public boost::default_dfs_visitor { |
| 157 | explicit DetectCycles(const NGHolder &g) : startDs(g.startDs) {} |
| 158 | void back_edge(const NFAEdge &e, const NGHolder &g) const { |
| 159 | NFAVertex u = source(e, g), v = target(e, g); |
| 160 | // We ignore the startDs self-loop. |
| 161 | if (u == startDs && v == startDs) { |
| 162 | return; |
| 163 | } |
| 164 | // Any other back-edge indicates a cycle. |
| 165 | DEBUG_PRINTF("back edge %zu->%zu found\n" , g[u].index, g[v].index); |
| 166 | throw CycleFound(); |
| 167 | } |
| 168 | private: |
| 169 | const NFAVertex startDs; |
| 170 | }; |
| 171 | } // namespace |
| 172 | |
| 173 | bool isVacuous(const NGHolder &h) { |
| 174 | return edge(h.start, h.accept, h).second |
| 175 | || edge(h.start, h.acceptEod, h).second |
| 176 | || edge(h.startDs, h.accept, h).second |
| 177 | || edge(h.startDs, h.acceptEod, h).second; |
| 178 | } |
| 179 | |
| 180 | bool isAnchored(const NGHolder &g) { |
| 181 | for (auto v : adjacent_vertices_range(g.startDs, g)) { |
| 182 | if (v != g.startDs) { |
| 183 | return false; |
| 184 | } |
| 185 | } |
| 186 | return true; |
| 187 | } |
| 188 | |
| 189 | bool isFloating(const NGHolder &g) { |
| 190 | for (auto v : adjacent_vertices_range(g.start, g)) { |
| 191 | if (v != g.startDs && !edge(g.startDs, v, g).second) { |
| 192 | return false; |
| 193 | } |
| 194 | } |
| 195 | return true; |
| 196 | } |
| 197 | |
| 198 | bool isAcyclic(const NGHolder &g) { |
| 199 | try { |
| 200 | boost::depth_first_search(g, DetectCycles(g), make_small_color_map(g), |
| 201 | g.start); |
| 202 | } catch (const CycleFound &) { |
| 203 | return false; |
| 204 | } |
| 205 | |
| 206 | return true; |
| 207 | } |
| 208 | |
| 209 | /** True if the graph has a cycle reachable from the given source vertex. */ |
| 210 | bool hasReachableCycle(const NGHolder &g, NFAVertex src) { |
| 211 | assert(hasCorrectlyNumberedVertices(g)); |
| 212 | |
| 213 | try { |
| 214 | // Use depth_first_visit, rather than depth_first_search, so that we |
| 215 | // only search from src. |
| 216 | boost::depth_first_visit(g, src, DetectCycles(g), |
| 217 | make_small_color_map(g)); |
| 218 | } catch (const CycleFound &) { |
| 219 | return true; |
| 220 | } |
| 221 | |
| 222 | return false; |
| 223 | } |
| 224 | |
| 225 | bool hasBigCycles(const NGHolder &g) { |
| 226 | assert(hasCorrectlyNumberedVertices(g)); |
| 227 | set<NFAEdge> dead; |
| 228 | BackEdges<set<NFAEdge>> backEdgeVisitor(dead); |
| 229 | boost::depth_first_search(g, backEdgeVisitor, make_small_color_map(g), |
| 230 | g.start); |
| 231 | |
| 232 | for (const auto &e : dead) { |
| 233 | if (source(e, g) != target(e, g)) { |
| 234 | return true; |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | return false; |
| 239 | } |
| 240 | |
| 241 | bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count) { |
| 242 | return any_of_in(vertices_range(g), [&](NFAVertex v) { |
| 243 | return !is_special(v, g) && g[v].char_reach.count() < max_reach_count; |
| 244 | }); |
| 245 | } |
| 246 | |
| 247 | bool can_never_match(const NGHolder &g) { |
| 248 | assert(edge(g.accept, g.acceptEod, g).second); |
| 249 | if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { |
| 250 | DEBUG_PRINTF("no paths into accept\n" ); |
| 251 | return true; |
| 252 | } |
| 253 | |
| 254 | return false; |
| 255 | } |
| 256 | |
| 257 | bool can_match_at_eod(const NGHolder &h) { |
| 258 | if (in_degree(h.acceptEod, h) > 1) { |
| 259 | DEBUG_PRINTF("more than one edge to acceptEod\n" ); |
| 260 | return true; |
| 261 | } |
| 262 | |
| 263 | for (auto e : in_edges_range(h.accept, h)) { |
| 264 | if (h[e].assert_flags) { |
| 265 | DEBUG_PRINTF("edge to accept has assert flags %d\n" , |
| 266 | h[e].assert_flags); |
| 267 | return true; |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | return false; |
| 272 | } |
| 273 | |
| 274 | bool can_only_match_at_eod(const NGHolder &g) { |
| 275 | NGHolder::in_edge_iterator ie, ee; |
| 276 | tie(ie, ee) = in_edges(g.accept, g); |
| 277 | |
| 278 | return ie == ee; |
| 279 | } |
| 280 | |
| 281 | bool matches_everywhere(const NGHolder &h) { |
| 282 | NFAEdge e = edge(h.startDs, h.accept, h); |
| 283 | |
| 284 | return e && !h[e].assert_flags; |
| 285 | } |
| 286 | |
| 287 | bool is_virtual_start(NFAVertex v, const NGHolder &g) { |
| 288 | return g[v].assert_flags & POS_FLAG_VIRTUAL_START; |
| 289 | } |
| 290 | |
| 291 | static |
| 292 | void reorderSpecials(const NGHolder &g, vector<NFAVertex> &topoOrder) { |
| 293 | // Start is last element of reverse topo ordering. |
| 294 | auto it = find(topoOrder.begin(), topoOrder.end(), g.start); |
| 295 | if (it != topoOrder.end() - 1) { |
| 296 | DEBUG_PRINTF("repositioning start\n" ); |
| 297 | assert(it != topoOrder.end()); |
| 298 | topoOrder.erase(it); |
| 299 | topoOrder.insert(topoOrder.end(), g.start); |
| 300 | } |
| 301 | |
| 302 | // StartDs is second-to-last element of reverse topo ordering. |
| 303 | it = find(topoOrder.begin(), topoOrder.end(), g.startDs); |
| 304 | if (it != topoOrder.end() - 2) { |
| 305 | DEBUG_PRINTF("repositioning start ds\n" ); |
| 306 | assert(it != topoOrder.end()); |
| 307 | topoOrder.erase(it); |
| 308 | topoOrder.insert(topoOrder.end() - 1, g.startDs); |
| 309 | } |
| 310 | |
| 311 | // AcceptEOD is first element of reverse topo ordering. |
| 312 | it = find(topoOrder.begin(), topoOrder.end(), g.acceptEod); |
| 313 | if (it != topoOrder.begin()) { |
| 314 | DEBUG_PRINTF("repositioning accept\n" ); |
| 315 | assert(it != topoOrder.end()); |
| 316 | topoOrder.erase(it); |
| 317 | topoOrder.insert(topoOrder.begin(), g.acceptEod); |
| 318 | } |
| 319 | |
| 320 | // Accept is second element of reverse topo ordering, if it's connected. |
| 321 | it = find(topoOrder.begin(), topoOrder.end(), g.accept); |
| 322 | if (it != topoOrder.begin() + 1) { |
| 323 | DEBUG_PRINTF("repositioning accept\n" ); |
| 324 | assert(it != topoOrder.end()); |
| 325 | topoOrder.erase(it); |
| 326 | if (in_degree(g.accept, g) != 0) { |
| 327 | topoOrder.insert(topoOrder.begin() + 1, g.accept); |
| 328 | } |
| 329 | } |
| 330 | } |
| 331 | |
| 332 | vector<NFAVertex> getTopoOrdering(const NGHolder &g) { |
| 333 | assert(hasCorrectlyNumberedVertices(g)); |
| 334 | |
| 335 | // Use the same colour map for both DFS and topological_sort below: avoids |
| 336 | // having to reallocate it, etc. |
| 337 | auto colors = make_small_color_map(g); |
| 338 | |
| 339 | using EdgeSet = unordered_set<NFAEdge>; |
| 340 | EdgeSet backEdges; |
| 341 | BackEdges<EdgeSet> be(backEdges); |
| 342 | |
| 343 | depth_first_search(g, visitor(be).root_vertex(g.start).color_map(colors)); |
| 344 | |
| 345 | auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&backEdges)); |
| 346 | |
| 347 | vector<NFAVertex> ordering; |
| 348 | ordering.reserve(num_vertices(g)); |
| 349 | topological_sort(acyclic_g, back_inserter(ordering), color_map(colors)); |
| 350 | |
| 351 | reorderSpecials(g, ordering); |
| 352 | |
| 353 | return ordering; |
| 354 | } |
| 355 | |
| 356 | static |
| 357 | void mustBeSetBefore_int(NFAVertex u, const NGHolder &g, |
| 358 | decltype(make_small_color_map(NGHolder())) &colors) { |
| 359 | set<NFAVertex> s; |
| 360 | insert(&s, adjacent_vertices(u, g)); |
| 361 | |
| 362 | set<NFAEdge> dead; // Edges leading to u or u's successors. |
| 363 | |
| 364 | for (auto v : inv_adjacent_vertices_range(u, g)) { |
| 365 | for (const auto &e : out_edges_range(v, g)) { |
| 366 | NFAVertex t = target(e, g); |
| 367 | if (t == u || contains(s, t)) { |
| 368 | dead.insert(e); |
| 369 | } |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | auto prefix = make_filtered_graph(g, make_bad_edge_filter(&dead)); |
| 374 | |
| 375 | depth_first_visit(prefix, g.start, make_dfs_visitor(boost::null_visitor()), |
| 376 | colors); |
| 377 | } |
| 378 | |
| 379 | bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g, |
| 380 | mbsb_cache &cache) { |
| 381 | assert(&cache.g == &g); |
| 382 | auto key = make_pair(g[u].index, g[v].index); |
| 383 | DEBUG_PRINTF("cache checking (%zu)\n" , cache.cache.size()); |
| 384 | if (contains(cache.cache, key)) { |
| 385 | DEBUG_PRINTF("cache hit\n" ); |
| 386 | return cache.cache[key]; |
| 387 | } |
| 388 | |
| 389 | auto colors = make_small_color_map(g); |
| 390 | mustBeSetBefore_int(u, g, colors); |
| 391 | |
| 392 | for (auto vi : vertices_range(g)) { |
| 393 | auto key2 = make_pair(g[u].index, g[vi].index); |
| 394 | DEBUG_PRINTF("adding %zu %zu\n" , key2.first, key2.second); |
| 395 | assert(!contains(cache.cache, key2)); |
| 396 | bool value = get(colors, vi) == small_color::white; |
| 397 | cache.cache[key2] = value; |
| 398 | assert(contains(cache.cache, key2)); |
| 399 | } |
| 400 | DEBUG_PRINTF("cache miss %zu %zu (%zu)\n" , key.first, key.second, |
| 401 | cache.cache.size()); |
| 402 | return cache.cache[key]; |
| 403 | } |
| 404 | |
| 405 | void appendLiteral(NGHolder &h, const ue2_literal &s) { |
| 406 | DEBUG_PRINTF("adding '%s' to graph\n" , dumpString(s).c_str()); |
| 407 | vector<NFAVertex> tail; |
| 408 | assert(in_degree(h.acceptEod, h) == 1); |
| 409 | for (auto v : inv_adjacent_vertices_range(h.accept, h)) { |
| 410 | tail.push_back(v); |
| 411 | } |
| 412 | assert(!tail.empty()); |
| 413 | |
| 414 | for (auto v : tail) { |
| 415 | remove_edge(v, h.accept, h); |
| 416 | } |
| 417 | |
| 418 | for (const auto &c : s) { |
| 419 | NFAVertex v = add_vertex(h); |
| 420 | h[v].char_reach = c; |
| 421 | for (auto u : tail) { |
| 422 | add_edge(u, v, h); |
| 423 | } |
| 424 | tail.clear(); |
| 425 | tail.push_back(v); |
| 426 | } |
| 427 | |
| 428 | for (auto v : tail) { |
| 429 | add_edge(v, h.accept, h); |
| 430 | } |
| 431 | } |
| 432 | |
| 433 | flat_set<u32> getTops(const NGHolder &h) { |
| 434 | flat_set<u32> tops; |
| 435 | for (const auto &e : out_edges_range(h.start, h)) { |
| 436 | insert(&tops, h[e].tops); |
| 437 | } |
| 438 | return tops; |
| 439 | } |
| 440 | |
| 441 | void setTops(NGHolder &h, u32 top) { |
| 442 | for (const auto &e : out_edges_range(h.start, h)) { |
| 443 | assert(h[e].tops.empty()); |
| 444 | if (target(e, h) == h.startDs) { |
| 445 | continue; |
| 446 | } |
| 447 | h[e].tops.insert(top); |
| 448 | } |
| 449 | } |
| 450 | |
| 451 | void clearReports(NGHolder &g) { |
| 452 | DEBUG_PRINTF("clearing reports without an accept edge\n" ); |
| 453 | unordered_set<NFAVertex> allow; |
| 454 | insert(&allow, inv_adjacent_vertices(g.accept, g)); |
| 455 | insert(&allow, inv_adjacent_vertices(g.acceptEod, g)); |
| 456 | allow.erase(g.accept); // due to stylised edge. |
| 457 | |
| 458 | for (auto v : vertices_range(g)) { |
| 459 | if (contains(allow, v)) { |
| 460 | continue; |
| 461 | } |
| 462 | g[v].reports.clear(); |
| 463 | } |
| 464 | } |
| 465 | |
| 466 | void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new) { |
| 467 | for (auto v : vertices_range(g)) { |
| 468 | auto &reports = g[v].reports; |
| 469 | if (contains(reports, r_old)) { |
| 470 | reports.insert(r_new); |
| 471 | } |
| 472 | } |
| 473 | } |
| 474 | |
| 475 | static |
| 476 | void fillHolderOutEdges(NGHolder &out, const NGHolder &in, |
| 477 | const unordered_map<NFAVertex, NFAVertex> &v_map, |
| 478 | NFAVertex u) { |
| 479 | NFAVertex u_new = v_map.at(u); |
| 480 | |
| 481 | for (auto e : out_edges_range(u, in)) { |
| 482 | NFAVertex v = target(e, in); |
| 483 | |
| 484 | if (is_special(u, in) && is_special(v, in)) { |
| 485 | continue; |
| 486 | } |
| 487 | |
| 488 | auto it = v_map.find(v); |
| 489 | if (it == v_map.end()) { |
| 490 | continue; |
| 491 | } |
| 492 | NFAVertex v_new = it->second; |
| 493 | assert(!edge(u_new, v_new, out).second); |
| 494 | add_edge(u_new, v_new, in[e], out); |
| 495 | } |
| 496 | } |
| 497 | |
| 498 | void fillHolder(NGHolder *outp, const NGHolder &in, const deque<NFAVertex> &vv, |
| 499 | unordered_map<NFAVertex, NFAVertex> *v_map_out) { |
| 500 | NGHolder &out = *outp; |
| 501 | unordered_map<NFAVertex, NFAVertex> &v_map = *v_map_out; |
| 502 | |
| 503 | out.kind = in.kind; |
| 504 | |
| 505 | for (auto v : vv) { |
| 506 | if (is_special(v, in)) { |
| 507 | continue; |
| 508 | } |
| 509 | v_map[v] = add_vertex(in[v], out); |
| 510 | } |
| 511 | |
| 512 | for (u32 i = 0; i < N_SPECIALS; i++) { |
| 513 | v_map[in.getSpecialVertex(i)] = out.getSpecialVertex(i); |
| 514 | } |
| 515 | |
| 516 | DEBUG_PRINTF("copied %zu vertices to NG graph\n" , v_map.size()); |
| 517 | |
| 518 | fillHolderOutEdges(out, in, v_map, in.start); |
| 519 | fillHolderOutEdges(out, in, v_map, in.startDs); |
| 520 | |
| 521 | for (auto u : vv) { |
| 522 | if (is_special(u, in)) { |
| 523 | continue; |
| 524 | } |
| 525 | fillHolderOutEdges(out, in, v_map, u); |
| 526 | } |
| 527 | |
| 528 | renumber_edges(out); |
| 529 | renumber_vertices(out); |
| 530 | } |
| 531 | |
| 532 | void cloneHolder(NGHolder &out, const NGHolder &in) { |
| 533 | assert(hasCorrectlyNumberedVertices(in)); |
| 534 | assert(hasCorrectlyNumberedVertices(out)); |
| 535 | out.kind = in.kind; |
| 536 | |
| 537 | // Note: depending on the state of the input graph, some stylized edges |
| 538 | // (e.g. start->startDs) may not exist. This must be propagated to the |
| 539 | // output graph as well. |
| 540 | |
| 541 | /* remove the existing special edges */ |
| 542 | clear_vertex(out.startDs, out); |
| 543 | clear_vertex(out.accept, out); |
| 544 | renumber_edges(out); |
| 545 | |
| 546 | vector<NFAVertex> out_mapping(num_vertices(in)); |
| 547 | out_mapping[NODE_START] = out.start; |
| 548 | out_mapping[NODE_START_DOTSTAR] = out.startDs; |
| 549 | out_mapping[NODE_ACCEPT] = out.accept; |
| 550 | out_mapping[NODE_ACCEPT_EOD] = out.acceptEod; |
| 551 | |
| 552 | for (auto v : vertices_range(in)) { |
| 553 | u32 i = in[v].index; |
| 554 | |
| 555 | /* special vertices are already in the out graph */ |
| 556 | if (i >= N_SPECIALS) { |
| 557 | assert(!out_mapping[i]); |
| 558 | out_mapping[i] = add_vertex(in[v], out); |
| 559 | } |
| 560 | |
| 561 | out[out_mapping[i]] = in[v]; |
| 562 | } |
| 563 | |
| 564 | for (auto e : edges_range(in)) { |
| 565 | u32 si = in[source(e, in)].index; |
| 566 | u32 ti = in[target(e, in)].index; |
| 567 | |
| 568 | DEBUG_PRINTF("adding edge %u->%u\n" , si, ti); |
| 569 | |
| 570 | NFAVertex s = out_mapping[si]; |
| 571 | NFAVertex t = out_mapping[ti]; |
| 572 | NFAEdge e2 = add_edge(s, t, out); |
| 573 | out[e2] = in[e]; |
| 574 | } |
| 575 | |
| 576 | // Safety checks. |
| 577 | assert(num_vertices(in) == num_vertices(out)); |
| 578 | assert(num_edges(in) == num_edges(out)); |
| 579 | assert(hasCorrectlyNumberedVertices(out)); |
| 580 | } |
| 581 | |
| 582 | void cloneHolder(NGHolder &out, const NGHolder &in, |
| 583 | unordered_map<NFAVertex, NFAVertex> *mapping) { |
| 584 | cloneHolder(out, in); |
| 585 | vector<NFAVertex> out_verts(num_vertices(in)); |
| 586 | for (auto v : vertices_range(out)) { |
| 587 | out_verts[out[v].index] = v; |
| 588 | } |
| 589 | |
| 590 | mapping->clear(); |
| 591 | |
| 592 | for (auto v : vertices_range(in)) { |
| 593 | (*mapping)[v] = out_verts[in[v].index]; |
| 594 | assert((*mapping)[v]); |
| 595 | } |
| 596 | } |
| 597 | |
| 598 | unique_ptr<NGHolder> cloneHolder(const NGHolder &in) { |
| 599 | unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(); |
| 600 | cloneHolder(*h, in); |
| 601 | return h; |
| 602 | } |
| 603 | |
| 604 | void reverseHolder(const NGHolder &g_in, NGHolder &g) { |
| 605 | // Make the BGL do the grunt work. |
| 606 | unordered_map<NFAVertex, NFAVertex> vertexMap; |
| 607 | boost::transpose_graph(g_in, g, |
| 608 | orig_to_copy(boost::make_assoc_property_map(vertexMap))); |
| 609 | |
| 610 | // The transpose_graph operation will have created extra copies of our |
| 611 | // specials. We have to rewire their neighbours to the 'real' specials and |
| 612 | // delete them. |
| 613 | NFAVertex start = vertexMap[g_in.acceptEod]; |
| 614 | NFAVertex startDs = vertexMap[g_in.accept]; |
| 615 | NFAVertex accept = vertexMap[g_in.startDs]; |
| 616 | NFAVertex acceptEod = vertexMap[g_in.start]; |
| 617 | |
| 618 | // Successors of starts. |
| 619 | for (const auto &e : out_edges_range(start, g)) { |
| 620 | NFAVertex v = target(e, g); |
| 621 | add_edge(g.start, v, g[e], g); |
| 622 | } |
| 623 | for (const auto &e : out_edges_range(startDs, g)) { |
| 624 | NFAVertex v = target(e, g); |
| 625 | add_edge(g.startDs, v, g[e], g); |
| 626 | } |
| 627 | |
| 628 | // Predecessors of accepts. |
| 629 | for (const auto &e : in_edges_range(accept, g)) { |
| 630 | NFAVertex u = source(e, g); |
| 631 | add_edge(u, g.accept, g[e], g); |
| 632 | } |
| 633 | for (const auto &e : in_edges_range(acceptEod, g)) { |
| 634 | NFAVertex u = source(e, g); |
| 635 | add_edge(u, g.acceptEod, g[e], g); |
| 636 | } |
| 637 | |
| 638 | // Remove our impostors. |
| 639 | clear_vertex(start, g); |
| 640 | remove_vertex(start, g); |
| 641 | clear_vertex(startDs, g); |
| 642 | remove_vertex(startDs, g); |
| 643 | clear_vertex(accept, g); |
| 644 | remove_vertex(accept, g); |
| 645 | clear_vertex(acceptEod, g); |
| 646 | remove_vertex(acceptEod, g); |
| 647 | |
| 648 | // Renumber so that g's properties (number of vertices, edges) are |
| 649 | // accurate. |
| 650 | renumber_vertices(g); |
| 651 | renumber_edges(g); |
| 652 | |
| 653 | assert(num_vertices(g) == num_vertices(g_in)); |
| 654 | assert(num_edges(g) == num_edges(g_in)); |
| 655 | } |
| 656 | |
| 657 | u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, |
| 658 | u32 max_delay, bool overhang_ok) { |
| 659 | assert(isCorrectlyTopped(g)); |
| 660 | if (max_delay == numeric_limits<u32>::max()) { |
| 661 | max_delay--; |
| 662 | } |
| 663 | |
| 664 | DEBUG_PRINTF("killing off '%s'\n" , dumpString(lit).c_str()); |
| 665 | set<NFAVertex> curr, next; |
| 666 | curr.insert(g.accept); |
| 667 | |
| 668 | auto it = lit.rbegin(); |
| 669 | for (u32 delay = max_delay; delay > 0 && it != lit.rend(); delay--, ++it) { |
| 670 | next.clear(); |
| 671 | for (auto v : curr) { |
| 672 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
| 673 | if (u == g.start) { |
| 674 | if (overhang_ok) { |
| 675 | DEBUG_PRINTF("bail\n" ); |
| 676 | goto bail; /* things got complicated */ |
| 677 | } else { |
| 678 | continue; /* it is not possible for a lhs literal to |
| 679 | * overhang the start */ |
| 680 | } |
| 681 | } |
| 682 | |
| 683 | const CharReach &cr = g[u].char_reach; |
| 684 | if (!overlaps(*it, cr)) { |
| 685 | DEBUG_PRINTF("skip\n" ); |
| 686 | continue; |
| 687 | } |
| 688 | if (isSubsetOf(*it, cr)) { |
| 689 | next.insert(u); |
| 690 | } else { |
| 691 | DEBUG_PRINTF("bail\n" ); |
| 692 | goto bail; /* things got complicated */ |
| 693 | } |
| 694 | } |
| 695 | } |
| 696 | |
| 697 | curr.swap(next); |
| 698 | } |
| 699 | bail: |
| 700 | if (curr.empty()) { |
| 701 | /* This can happen when we have an edge representing a cross from two |
| 702 | * sides of an alternation. This whole edge needs to be marked as |
| 703 | * dead */ |
| 704 | assert(0); /* should have been picked up by can match */ |
| 705 | return numeric_limits<u32>::max(); |
| 706 | } |
| 707 | |
| 708 | u32 delay = distance(lit.rbegin(), it); |
| 709 | assert(delay <= max_delay); |
| 710 | assert(delay <= lit.length()); |
| 711 | DEBUG_PRINTF("managed delay %u (of max %u)\n" , delay, max_delay); |
| 712 | |
| 713 | set<NFAVertex> pred; |
| 714 | for (auto v : curr) { |
| 715 | insert(&pred, inv_adjacent_vertices_range(v, g)); |
| 716 | } |
| 717 | |
| 718 | clear_in_edges(g.accept, g); |
| 719 | clearReports(g); |
| 720 | |
| 721 | for (auto v : pred) { |
| 722 | NFAEdge e = add_edge(v, g.accept, g); |
| 723 | g[v].reports.insert(0); |
| 724 | if (is_triggered(g) && v == g.start) { |
| 725 | g[e].tops.insert(DEFAULT_TOP); |
| 726 | } |
| 727 | } |
| 728 | |
| 729 | pruneUseless(g); |
| 730 | assert(allMatchStatesHaveReports(g)); |
| 731 | assert(isCorrectlyTopped(g)); |
| 732 | |
| 733 | DEBUG_PRINTF("graph has %zu vertices left\n" , num_vertices(g)); |
| 734 | return delay; |
| 735 | } |
| 736 | |
| 737 | #ifndef NDEBUG |
| 738 | |
| 739 | bool allMatchStatesHaveReports(const NGHolder &g) { |
| 740 | unordered_set<NFAVertex> reporters; |
| 741 | for (auto v : inv_adjacent_vertices_range(g.accept, g)) { |
| 742 | if (g[v].reports.empty()) { |
| 743 | DEBUG_PRINTF("vertex %zu has no reports!\n" , g[v].index); |
| 744 | return false; |
| 745 | } |
| 746 | reporters.insert(v); |
| 747 | } |
| 748 | |
| 749 | for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { |
| 750 | if (v == g.accept) { |
| 751 | continue; // stylised edge |
| 752 | } |
| 753 | if (g[v].reports.empty()) { |
| 754 | DEBUG_PRINTF("vertex %zu has no reports!\n" , g[v].index); |
| 755 | return false; |
| 756 | } |
| 757 | reporters.insert(v); |
| 758 | } |
| 759 | |
| 760 | for (auto v : vertices_range(g)) { |
| 761 | if (!contains(reporters, v) && !g[v].reports.empty()) { |
| 762 | DEBUG_PRINTF("vertex %zu is not a match state, but has reports!\n" , |
| 763 | g[v].index); |
| 764 | return false; |
| 765 | } |
| 766 | } |
| 767 | |
| 768 | return true; |
| 769 | } |
| 770 | |
| 771 | bool isCorrectlyTopped(const NGHolder &g) { |
| 772 | if (is_triggered(g)) { |
| 773 | for (const auto &e : out_edges_range(g.start, g)) { |
| 774 | if (g[e].tops.empty() != (target(e, g) == g.startDs)) { |
| 775 | return false; |
| 776 | } |
| 777 | } |
| 778 | } else { |
| 779 | for (const auto &e : out_edges_range(g.start, g)) { |
| 780 | if (!g[e].tops.empty()) { |
| 781 | return false; |
| 782 | } |
| 783 | } |
| 784 | } |
| 785 | |
| 786 | return true; |
| 787 | } |
| 788 | |
| 789 | #endif // NDEBUG |
| 790 | |
| 791 | } // namespace ue2 |
| 792 | |