| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** \file |
| 30 | * \brief NFA graph reductions. |
| 31 | * |
| 32 | * This code attempts to make the NFA graph smaller by performing a number of |
| 33 | * local transformations: |
| 34 | * |
| 35 | * ### (1) removal of redundant vertices: |
| 36 | * |
| 37 | * v is redundant wrt to u if succ(v) is a subset of succ(u) |
| 38 | * AND pred(v) is a subset of pred(u) |
| 39 | * AND cr(v) is a subset of cr(u) |
| 40 | * |
| 41 | * ### (2) 'diamond' transformation: |
| 42 | * |
| 43 | * given succ(v) == succ(u) and pred(v) == pred(u), |
| 44 | * v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v), |
| 45 | * and cr(w) = union(cr(v), cr(u)) |
| 46 | * |
| 47 | * ### (3) locally identifiable left equivalence: |
| 48 | * |
| 49 | * given pred(v) == pred(u) (**) and cr(v) == cr(u), |
| 50 | * v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v), |
| 51 | * and succ(w) = union(succ(v), succ(u)) |
| 52 | * |
| 53 | * ### (4) locally identifiable right equivalence: |
| 54 | * |
| 55 | * given succ(v) == succ(u) (**) and cr(v) == cr(u), |
| 56 | * v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v), |
| 57 | * and pred(w) = union(pred(v), pred(u)) |
| 58 | * |
| 59 | * NOTE (**): for left and right equivalence, we can also do the transform if |
| 60 | * set(u) contains u, set(v) contains v and the sets are otherwise equal. This |
| 61 | * enables equivalent vertices with self-loops to be merged. |
| 62 | * |
| 63 | * If v and u raise accepts, they can only be merged if they raise the same |
| 64 | * report IDs. |
| 65 | * |
| 66 | * Transformations are applied repeatedly until the graph stops changing. |
| 67 | * |
| 68 | * Note that the final graph may depend on the order in which these |
| 69 | * transformations are applied. In order to reduce the non-determinism the |
| 70 | * following order is imposed: (1); (2); (3) + (4). |
| 71 | */ |
| 72 | #include "ng_redundancy.h" |
| 73 | |
| 74 | #include "ng_holder.h" |
| 75 | #include "ng_calc_components.h" |
| 76 | #include "ng_dominators.h" |
| 77 | #include "ng_prune.h" |
| 78 | #include "ng_util.h" |
| 79 | #include "ue2common.h" |
| 80 | #include "util/container.h" |
| 81 | #include "util/flat_containers.h" |
| 82 | #include "util/graph_range.h" |
| 83 | |
| 84 | #include <algorithm> |
| 85 | #include <cassert> |
| 86 | #include <map> |
| 87 | #include <set> |
| 88 | #include <vector> |
| 89 | |
| 90 | #include <boost/graph/depth_first_search.hpp> |
| 91 | #include <boost/graph/reverse_graph.hpp> |
| 92 | |
| 93 | using namespace std; |
| 94 | |
| 95 | namespace ue2 { |
| 96 | |
| 97 | namespace { |
| 98 | |
| 99 | /** Precalculated (and maintained) information about a vertex. */ |
| 100 | class VertexInfo { |
| 101 | public: |
| 102 | flat_set<NFAVertex> pred; //!< predecessors of this vertex |
| 103 | flat_set<NFAVertex> succ; //!< successors of this vertex |
| 104 | bool isAccept = false; //!< does this vertex lead to accept? |
| 105 | bool isRemoved = false; //!< have we already removed this vertex? |
| 106 | |
| 107 | size_t inDegree() const { return pred.size(); } |
| 108 | size_t outDegree() const { return succ.size(); } |
| 109 | }; |
| 110 | |
| 111 | class VertexInfoMap { |
| 112 | public: |
| 113 | explicit VertexInfoMap(const NGHolder &gg) |
| 114 | : g(gg), infos(num_vertices(gg)) {} |
| 115 | VertexInfo &operator[](NFAVertex v) { |
| 116 | u32 i = g[v].index; |
| 117 | assert(i < infos.size()); |
| 118 | return infos[i]; |
| 119 | } |
| 120 | |
| 121 | const VertexInfo &operator[](NFAVertex v) const { |
| 122 | u32 i = g[v].index; |
| 123 | assert(i < infos.size()); |
| 124 | return infos[i]; |
| 125 | } |
| 126 | |
| 127 | private: |
| 128 | const NGHolder &g; |
| 129 | vector<VertexInfo> infos; |
| 130 | }; |
| 131 | |
| 132 | } // namespace |
| 133 | |
| 134 | /** Populates the info map with their predecessor and successor states, and |
| 135 | * whether they are accept states. */ |
| 136 | static |
| 137 | void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) { |
| 138 | for (auto v : vertices_range(g)) { |
| 139 | VertexInfo &info = infoMap[v]; |
| 140 | assert(info.pred.empty() && info.succ.empty()); |
| 141 | |
| 142 | // Build successor and predecessor sets |
| 143 | insert(&info.pred, inv_adjacent_vertices(v, g)); |
| 144 | insert(&info.succ, adjacent_vertices(v, g)); |
| 145 | |
| 146 | // Note whether the vertex is an accept state |
| 147 | if (!is_special(v, g)) { |
| 148 | if (contains(info.succ, g.accept) |
| 149 | || contains(info.succ, g.acceptEod)) { |
| 150 | info.isAccept = true; |
| 151 | } |
| 152 | } |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | /** Helper function to take the intersection of two sorted vertex sets |
| 157 | * in-place. */ |
| 158 | static |
| 159 | void inplaceIntersection(vector<NFAVertex> &vset1, |
| 160 | const flat_set<NFAVertex> &vset2) { |
| 161 | const NFAVertex GONE = NGHolder::null_vertex(); |
| 162 | |
| 163 | vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end(); |
| 164 | flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end(); |
| 165 | |
| 166 | while ((it != ite) && (jt != jte)) { |
| 167 | assert(*it != GONE); |
| 168 | |
| 169 | if (*it < *jt) { |
| 170 | // present in vset1 but not in vset2. Set to null, remove in a |
| 171 | // second pass. |
| 172 | *it = GONE; |
| 173 | ++it; |
| 174 | } else if (*jt < *it) { |
| 175 | // present in vset2 but not in vset1, skip. |
| 176 | ++jt; |
| 177 | } else { |
| 178 | // present in both sets. |
| 179 | ++it; ++jt; |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | // Left overs are only in that set. |
| 184 | vset1.erase(it, ite); |
| 185 | |
| 186 | // Remove nulls created above. |
| 187 | vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end()); |
| 188 | } |
| 189 | |
| 190 | /** Find the intersection of the successors of our predecessors. */ |
| 191 | static |
| 192 | void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet, |
| 193 | const VertexInfoMap &infoMap, |
| 194 | vector<NFAVertex> &intersection, |
| 195 | bool considerSelf = true /* follow self loops */) { |
| 196 | /* find a good seed for the intersection */ |
| 197 | const flat_set<NFAVertex> *best = nullptr; |
| 198 | for (auto u : predSet) { |
| 199 | if (!considerSelf && u == v) { |
| 200 | continue; |
| 201 | } |
| 202 | |
| 203 | const flat_set<NFAVertex> &succSet = infoMap[u].succ; |
| 204 | if (!best || succSet.size() <= best->size()) { |
| 205 | best = &succSet; |
| 206 | |
| 207 | // Break out if we've reduced our intersection to [v] |
| 208 | if (best->size() == 1) { |
| 209 | assert(*(best->begin()) == v); |
| 210 | intersection.push_back(v); |
| 211 | return; |
| 212 | } |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | if (best) { |
| 217 | insert(&intersection, intersection.end(), *best); |
| 218 | } |
| 219 | |
| 220 | for (auto u : predSet) { |
| 221 | if (!considerSelf && u == v) { |
| 222 | continue; |
| 223 | } |
| 224 | |
| 225 | inplaceIntersection(intersection, infoMap[u].succ); |
| 226 | |
| 227 | // Check: intersection should always be at least size 1 |
| 228 | assert(!intersection.empty()); |
| 229 | |
| 230 | // Break out if we've reduced our intersection to [v] |
| 231 | if (intersection.size() == 1) { |
| 232 | assert(*intersection.begin() == v); |
| 233 | return; |
| 234 | } |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | /** Find the intersection of the predecessors of our successors. */ |
| 239 | static |
| 240 | void predSuccIntersection(const NFAVertex v, |
| 241 | const flat_set<NFAVertex> &succSet, |
| 242 | const VertexInfoMap &infoMap, |
| 243 | vector<NFAVertex> &intersection, |
| 244 | bool considerSelf = true /* follow self loops */) { |
| 245 | /* find a good seed for the intersection */ |
| 246 | const flat_set<NFAVertex> *best = nullptr; |
| 247 | for (auto w : succSet) { |
| 248 | if (!considerSelf && w == v) { |
| 249 | continue; |
| 250 | } |
| 251 | |
| 252 | const flat_set<NFAVertex> &predSet = infoMap[w].pred; |
| 253 | if (!best || predSet.size() <= best->size()) { |
| 254 | best = &predSet; |
| 255 | |
| 256 | // Break out if we've reduced our intersection to [v] |
| 257 | if (best->size() == 1) { |
| 258 | assert(*(best->begin()) == v); |
| 259 | intersection.push_back(v); |
| 260 | return; |
| 261 | } |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | if (best) { |
| 266 | insert(&intersection, intersection.end(), *best); |
| 267 | } |
| 268 | |
| 269 | for (auto w : succSet) { |
| 270 | if (!considerSelf && w == v) { |
| 271 | continue; |
| 272 | } |
| 273 | |
| 274 | inplaceIntersection(intersection, infoMap[w].pred); |
| 275 | |
| 276 | // Check: intersection should always be at least size 1 |
| 277 | assert(!intersection.empty()); |
| 278 | |
| 279 | // Break out if we've reduced our intersection to [v] |
| 280 | if (intersection.size() == 1) { |
| 281 | assert(*intersection.begin() == v); |
| 282 | return; |
| 283 | } |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | /** Update containers to take into account the removal of vertex v. */ |
| 288 | static |
| 289 | void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap, |
| 290 | set<NFAVertex> &removable) { |
| 291 | VertexInfo &info = infoMap[v]; |
| 292 | assert(!info.isRemoved); |
| 293 | assert(!contains(removable, v)); |
| 294 | info.isRemoved = true; |
| 295 | removable.insert(v); |
| 296 | |
| 297 | // remove v from its predecessors' successors |
| 298 | for (auto u : info.pred) { |
| 299 | infoMap[u].succ.erase(v); |
| 300 | } |
| 301 | |
| 302 | // remove v from its successors' predecessors |
| 303 | for (auto w : info.succ) { |
| 304 | infoMap[w].pred.erase(v); |
| 305 | } |
| 306 | } |
| 307 | |
| 308 | static |
| 309 | bool hasInEdgeTops(const NGHolder &g, NFAVertex v) { |
| 310 | NFAEdge e = edge(g.start, v, g); |
| 311 | return e && !g[e].tops.empty(); |
| 312 | } |
| 313 | |
| 314 | /** Transform (1), removal of redundant vertices. */ |
| 315 | static |
| 316 | bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, |
| 317 | set<NFAVertex> &removable) { |
| 318 | /* useless merges can be done in any order, no need to take any care with |
| 319 | * ordering */ |
| 320 | |
| 321 | // Temporary vectors used for intersections below |
| 322 | vector<NFAVertex> succPredSet, predSuccSet, intersection; |
| 323 | |
| 324 | bool changed = false; |
| 325 | for (auto v : vertices_range(g)) { |
| 326 | VertexInfo &info = infoMap[v]; |
| 327 | |
| 328 | if (info.isRemoved) { |
| 329 | continue; |
| 330 | } |
| 331 | |
| 332 | assert(!contains(removable, v)); |
| 333 | |
| 334 | if (is_special(v, g)) { |
| 335 | continue; |
| 336 | } |
| 337 | |
| 338 | /* we do not need to check for out edge tops - as only specials (start) |
| 339 | * can have tops and they are already disqualified. */ |
| 340 | if (hasInEdgeTops(g, v)) { |
| 341 | continue; // Conservatively skip anything with nonzero tops. |
| 342 | } |
| 343 | |
| 344 | if (info.pred.empty() || info.succ.empty()) { |
| 345 | DEBUG_PRINTF("vertex %zu has empty pred/succ list\n" , g[v].index); |
| 346 | assert(0); // non-special states should always have succ/pred lists |
| 347 | continue; |
| 348 | } |
| 349 | |
| 350 | // The following cases are more complex and rely on the intersection of |
| 351 | // Succ(Pred(v)) and Pred(Succ(v)) |
| 352 | |
| 353 | // Compute intersections, operating on the smaller set first |
| 354 | // Note that we use vectors here, as set_intersection underneath |
| 355 | // guarantees sorted output, and vectors were quite a bit |
| 356 | // faster than sets or lists. |
| 357 | |
| 358 | succPredSet.clear(); |
| 359 | predSuccSet.clear(); |
| 360 | |
| 361 | if (info.pred.size() <= info.succ.size()) { |
| 362 | succPredIntersection(v, info.pred, infoMap, succPredSet); |
| 363 | if (succPredSet.size() == 1) { |
| 364 | // nobody in here but us chickens |
| 365 | assert(*succPredSet.begin() == v); |
| 366 | continue; |
| 367 | } |
| 368 | predSuccIntersection(v, info.succ, infoMap, predSuccSet); |
| 369 | if (predSuccSet.size() == 1) { |
| 370 | assert(*predSuccSet.begin() == v); |
| 371 | continue; |
| 372 | } |
| 373 | } else { |
| 374 | predSuccIntersection(v, info.succ, infoMap, predSuccSet); |
| 375 | if (predSuccSet.size() == 1) { |
| 376 | assert(*predSuccSet.begin() == v); |
| 377 | continue; |
| 378 | } |
| 379 | succPredIntersection(v, info.pred, infoMap, succPredSet); |
| 380 | if (succPredSet.size() == 1) { |
| 381 | assert(*succPredSet.begin() == v); |
| 382 | continue; |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | // Find the intersection of Succ(Pred(v)) and Pred(Succ(v)) |
| 387 | intersection.clear(); |
| 388 | set_intersection(succPredSet.begin(), succPredSet.end(), |
| 389 | predSuccSet.begin(), predSuccSet.end(), |
| 390 | back_inserter(intersection)); |
| 391 | |
| 392 | /* Boring if it is just us in the intersection */ |
| 393 | if (intersection.size() < 2) { |
| 394 | continue; |
| 395 | } |
| 396 | |
| 397 | // Compare char_reach, mark v for removal if any members of |
| 398 | // the intersection have an equal or greater reach |
| 399 | const CharReach &currReach = g[v].char_reach; |
| 400 | const auto &currReports = g[v].reports; |
| 401 | for (auto t : intersection) { |
| 402 | const VertexInfo &info2 = infoMap[t]; |
| 403 | |
| 404 | /* start is never a succ of a state, so will never be in the |
| 405 | * predsucc/succpred intersection */ |
| 406 | assert(t != g.start); |
| 407 | |
| 408 | if (t == v || info2.isRemoved) { |
| 409 | continue; |
| 410 | } |
| 411 | |
| 412 | // For each candidate C to make V redundant, check: |
| 413 | // if V is an accept state, C must be an accept state for |
| 414 | // the same pattern |
| 415 | // pred(C) is a superset of pred(V) |
| 416 | // succ(C) is a superset of succ(V) |
| 417 | // reach(C) is a superset of reach(V) |
| 418 | // |
| 419 | // Note: pred/sec tests are covered by the intersections |
| 420 | // calculated above. |
| 421 | |
| 422 | /* note: links to accepts are also tracked in succs */ |
| 423 | if (info.isAccept && currReports != g[t].reports) { |
| 424 | continue; |
| 425 | } |
| 426 | |
| 427 | if (som) { |
| 428 | if (t == g.startDs) { |
| 429 | continue; |
| 430 | } |
| 431 | if (is_virtual_start(t, g) != is_virtual_start(v, g)) { |
| 432 | continue; |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | /* we do not need to check for out edge tops - as only start |
| 437 | * can have tops and it has already been ruled out. */ |
| 438 | if (hasInEdgeTops(g, t)) { |
| 439 | continue; // Conservatively skip anything with nonzero tops. |
| 440 | } |
| 441 | |
| 442 | CharReach &otherReach = g[t].char_reach; |
| 443 | if (currReach.isSubsetOf(otherReach)) { |
| 444 | DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n" , |
| 445 | g[v].index, g[t].index); |
| 446 | markForRemoval(v, infoMap, removable); |
| 447 | changed = true; |
| 448 | break; |
| 449 | } |
| 450 | } |
| 451 | } |
| 452 | |
| 453 | return changed; |
| 454 | } |
| 455 | |
| 456 | /** Transform (2), diamond merge pass. */ |
| 457 | static |
| 458 | bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap, |
| 459 | set<NFAVertex> &removable) { |
| 460 | // Temporary vectors used for intersections below |
| 461 | vector<NFAVertex> succPredSet, predSuccSet, intersection; |
| 462 | |
| 463 | bool changed = false; |
| 464 | for (auto v : vertices_range(g)) { |
| 465 | VertexInfo &info = infoMap[v]; |
| 466 | |
| 467 | if (info.isRemoved) { |
| 468 | continue; |
| 469 | } |
| 470 | |
| 471 | assert(!contains(removable, v)); |
| 472 | |
| 473 | if (is_special(v, g)) { |
| 474 | continue; |
| 475 | } |
| 476 | |
| 477 | /* we do not need to check for out edge tops - as only specials (start) |
| 478 | * can have tops and they are already disqualified. */ |
| 479 | if (hasInEdgeTops(g, v)) { |
| 480 | continue; // Conservatively skip anything with nonzero tops. |
| 481 | } |
| 482 | |
| 483 | if (info.pred.empty() || info.succ.empty()) { |
| 484 | assert(0); // non-special states should always have succ/pred lists |
| 485 | continue; |
| 486 | } |
| 487 | |
| 488 | // The following cases are more complex and rely on the intersection of |
| 489 | // Succ(Pred(v)) and Pred(Succ(v)) |
| 490 | |
| 491 | // Compute intersections, operating on the smaller set first |
| 492 | // Note that we use vectors here, as set_intersection underneath |
| 493 | // guarantees sorted output, and vectors were quite a bit faster than |
| 494 | // sets or lists. |
| 495 | |
| 496 | succPredSet.clear(); |
| 497 | predSuccSet.clear(); |
| 498 | |
| 499 | if (info.pred.size() <= info.succ.size()) { |
| 500 | succPredIntersection(v, info.pred, infoMap, succPredSet); |
| 501 | if (succPredSet.size() == 1) { |
| 502 | // nobody in here but us chickens |
| 503 | assert(*succPredSet.begin() == v); |
| 504 | continue; |
| 505 | } |
| 506 | predSuccIntersection(v, info.succ, infoMap, predSuccSet); |
| 507 | if (predSuccSet.size() == 1) { |
| 508 | assert(*predSuccSet.begin() == v); |
| 509 | continue; |
| 510 | } |
| 511 | } else { |
| 512 | predSuccIntersection(v, info.succ, infoMap, predSuccSet); |
| 513 | if (predSuccSet.size() == 1) { |
| 514 | assert(*predSuccSet.begin() == v); |
| 515 | continue; |
| 516 | } |
| 517 | succPredIntersection(v, info.pred, infoMap, succPredSet); |
| 518 | if (succPredSet.size() == 1) { |
| 519 | assert(*succPredSet.begin() == v); |
| 520 | continue; |
| 521 | } |
| 522 | } |
| 523 | |
| 524 | // Find the intersection of Succ(Pred(v)) and Pred(Succ(v)) |
| 525 | intersection.clear(); |
| 526 | set_intersection(succPredSet.begin(), succPredSet.end(), |
| 527 | predSuccSet.begin(), predSuccSet.end(), |
| 528 | back_inserter(intersection)); |
| 529 | |
| 530 | /* Boring if it is just us in the intersection */ |
| 531 | if (intersection.size() < 2) { |
| 532 | continue; |
| 533 | } |
| 534 | |
| 535 | const CharReach &currReach = g[v].char_reach; |
| 536 | const auto &currReports = g[v].reports; |
| 537 | for (auto t : intersection) { |
| 538 | const VertexInfo &info2 = infoMap[t]; |
| 539 | |
| 540 | if (t == v || info2.isRemoved || is_special(t, g)) { |
| 541 | continue; |
| 542 | } |
| 543 | |
| 544 | /* note: links to accepts are also tracked in succs */ |
| 545 | if (info.isAccept && currReports != g[t].reports) { |
| 546 | continue; |
| 547 | } |
| 548 | |
| 549 | /* we do not need to check for out edge tops - as only specials |
| 550 | * (start) can have tops and they are already disqualified. */ |
| 551 | if (hasInEdgeTops(g, t)) { |
| 552 | continue; // Conservatively skip anything with nonzero tops. |
| 553 | } |
| 554 | |
| 555 | if (som) { |
| 556 | if (is_virtual_start(v, g) != is_virtual_start(t, g)) { |
| 557 | continue; // can only merge like with like. |
| 558 | } |
| 559 | } |
| 560 | |
| 561 | // If in-degree of v == in-degree of target |
| 562 | // and out-degree of v == out-degree of target |
| 563 | // (because pred and succ are supersets) |
| 564 | // then combine charreach of v into target and remove v |
| 565 | if (info.inDegree() == info2.inDegree() |
| 566 | && info.outDegree() == info2.outDegree()) { |
| 567 | // add character reachability of v into target |
| 568 | CharReach &otherReach = g[t].char_reach; |
| 569 | otherReach |= currReach; |
| 570 | // v can be removed |
| 571 | DEBUG_PRINTF("removing redundant vertex %zu and merging " |
| 572 | "reachability with vertex %zu\n" , |
| 573 | g[v].index, g[t].index); |
| 574 | markForRemoval(v, infoMap, removable); |
| 575 | changed = true; |
| 576 | break; |
| 577 | } |
| 578 | } |
| 579 | } |
| 580 | |
| 581 | return changed; |
| 582 | } |
| 583 | |
| 584 | namespace { |
| 585 | |
| 586 | struct ReachMismatch {}; |
| 587 | |
| 588 | class ReachSubsetVisitor : public boost::default_dfs_visitor { |
| 589 | public: |
| 590 | explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {} |
| 591 | |
| 592 | template <class Graph, class Vertex> |
| 593 | void discover_vertex(const Vertex &v, const Graph &g) const { |
| 594 | if (is_any_start(v, g)) { |
| 595 | return; // start vertices are OK |
| 596 | } else if (is_special(v, g)) { |
| 597 | assert(0); |
| 598 | throw ReachMismatch(); // other special nodes?? |
| 599 | } |
| 600 | |
| 601 | const CharReach &vcr = g[v].char_reach; |
| 602 | DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n" , vcr.count(), |
| 603 | cr.count()); |
| 604 | if (vcr != (vcr & cr)) { |
| 605 | throw ReachMismatch(); |
| 606 | } |
| 607 | } |
| 608 | |
| 609 | private: |
| 610 | const CharReach &cr; |
| 611 | }; |
| 612 | |
| 613 | /** Terminator function for DFS used in pathReachSubset. */ |
| 614 | template <class Graph, class Vertex> class VertexIs { |
| 615 | public: |
| 616 | explicit VertexIs(const Vertex &v) : vertex(v) {} |
| 617 | bool operator()(const Vertex &v, const Graph &) const { |
| 618 | return v == vertex; |
| 619 | } |
| 620 | |
| 621 | private: |
| 622 | Vertex vertex; |
| 623 | }; |
| 624 | |
| 625 | } // namespace |
| 626 | |
| 627 | /** Returns true if every vertex on paths leading to edge \p e has reachability |
| 628 | * which is a subset of the reachability of \p dom */ |
| 629 | static |
| 630 | bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom, |
| 631 | const NGHolder &g) { |
| 632 | const CharReach &domReach = g[dom].char_reach; |
| 633 | if (domReach.all()) { |
| 634 | return true; |
| 635 | } |
| 636 | |
| 637 | NFAVertex start = source(e, g); |
| 638 | using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>; |
| 639 | map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor; |
| 640 | |
| 641 | // Walk the graph backwards from v, examining each node. We fail (return |
| 642 | // false) if we encounter a node with reach NOT a subset of domReach, and |
| 643 | // we stop searching at dom. |
| 644 | try { |
| 645 | depth_first_visit(RevGraph(g), start, |
| 646 | ReachSubsetVisitor(domReach), |
| 647 | make_assoc_property_map(vertexColor), |
| 648 | VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom)); |
| 649 | } catch(ReachMismatch&) { |
| 650 | return false; |
| 651 | } |
| 652 | |
| 653 | return true; |
| 654 | } |
| 655 | |
| 656 | /** Returns true if every vertex on paths leading from edge \p e has |
| 657 | * reachability which is a subset of the reachability of \p dom */ |
| 658 | static |
| 659 | bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom, |
| 660 | const NGHolder &g) { |
| 661 | const CharReach &domReach = g[dom].char_reach; |
| 662 | if (domReach.all()) { |
| 663 | return true; |
| 664 | } |
| 665 | |
| 666 | NFAVertex start = target(e, g); |
| 667 | map<NFAVertex, boost::default_color_type> vertexColor; |
| 668 | |
| 669 | // Walk the graph forward from v, examining each node. We fail (return |
| 670 | // false) if we encounter a node with reach NOT a subset of domReach, and |
| 671 | // we stop searching at dom. |
| 672 | try { |
| 673 | depth_first_visit(g, start, ReachSubsetVisitor(domReach), |
| 674 | make_assoc_property_map(vertexColor), |
| 675 | VertexIs<NGHolder, NFAVertex>(dom)); |
| 676 | } catch(ReachMismatch&) { |
| 677 | return false; |
| 678 | } |
| 679 | |
| 680 | return true; |
| 681 | } |
| 682 | |
| 683 | static |
| 684 | bool allOutsSpecial(NFAVertex v, const NGHolder &g) { |
| 685 | for (auto w : adjacent_vertices_range(v, g)) { |
| 686 | if (!is_special(w, g)) { |
| 687 | return false; |
| 688 | } |
| 689 | } |
| 690 | return true; |
| 691 | } |
| 692 | |
| 693 | static |
| 694 | bool allInsSpecial(NFAVertex v, const NGHolder &g) { |
| 695 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
| 696 | if (!is_special(u, g)) { |
| 697 | return false; |
| 698 | } |
| 699 | } |
| 700 | return true; |
| 701 | } |
| 702 | |
| 703 | /** Cheaply check whether this graph can't be reduced at all, because it is |
| 704 | * just a chain of vertices with no other edges. */ |
| 705 | static |
| 706 | bool isIrreducible(const NGHolder &g) { |
| 707 | for (auto v : vertices_range(g)) { |
| 708 | // skip specials |
| 709 | if (is_special(v, g)) { |
| 710 | continue; |
| 711 | } |
| 712 | |
| 713 | if (in_degree(v, g) != 1 && !allInsSpecial(v, g)) { |
| 714 | return false; |
| 715 | } |
| 716 | if (out_degree(v, g) != 1 && !allOutsSpecial(v, g)) { |
| 717 | return false; |
| 718 | } |
| 719 | } |
| 720 | |
| 721 | /* if calcComponents got sleepy and went home, the above checks don't hold |
| 722 | * as it assumes there is only one connected component. */ |
| 723 | if (isAlternationOfClasses(g)) { |
| 724 | return false; |
| 725 | } |
| 726 | |
| 727 | return true; |
| 728 | } |
| 729 | |
| 730 | static |
| 731 | u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) { |
| 732 | u32 count = 0; |
| 733 | |
| 734 | cyclic.resize(num_vertices(g)); |
| 735 | |
| 736 | for (auto v : vertices_range(g)) { |
| 737 | assert(g[v].index < cyclic.size()); |
| 738 | if (hasSelfLoop(v, g)) { |
| 739 | count++; |
| 740 | cyclic[g[v].index] = true; |
| 741 | } |
| 742 | } |
| 743 | |
| 744 | return count; |
| 745 | } |
| 746 | |
| 747 | static |
| 748 | void findCyclicDom(NGHolder &g, vector<bool> &cyclic, |
| 749 | set<NFAEdge> &dead, som_type som) { |
| 750 | auto dominators = findDominators(g); |
| 751 | |
| 752 | for (auto v : vertices_range(g)) { |
| 753 | if (is_special(v, g)) { |
| 754 | continue; |
| 755 | } |
| 756 | |
| 757 | // Path in through a dominator (e.g. '.+a?foobar') |
| 758 | NFAVertex dom = dominators[v]; |
| 759 | if (dom && cyclic[g[dom].index] |
| 760 | && edge(dom, v, g).second) { |
| 761 | |
| 762 | if (som && dom == g.startDs) { |
| 763 | continue; |
| 764 | } |
| 765 | |
| 766 | DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic " |
| 767 | "vertex %zu\n" , g[v].index, g[dom].index); |
| 768 | |
| 769 | // iff all paths through in-edge e of v involve vertices whose |
| 770 | // reachability is a subset of reach(dom), we can delete edge e. |
| 771 | for (const auto &e : in_edges_range(v, g)) { |
| 772 | if (source(e, g) == dom) { |
| 773 | continue; |
| 774 | } |
| 775 | |
| 776 | if (reversePathReachSubset(e, dom, g)) { |
| 777 | DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading " |
| 778 | "paths share dom reach\n" , |
| 779 | g[source(e, g)].index, g[target(e, g)].index); |
| 780 | dead.insert(e); |
| 781 | if (source(e, g) == v) { |
| 782 | cyclic[g[v].index] = false; |
| 783 | } |
| 784 | continue; |
| 785 | } |
| 786 | } |
| 787 | } |
| 788 | } |
| 789 | } |
| 790 | |
| 791 | static |
| 792 | void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic, |
| 793 | set<NFAEdge> &dead) { |
| 794 | auto postdominators = findPostDominators(g); |
| 795 | |
| 796 | for (auto v : vertices_range(g)) { |
| 797 | if (is_special(v, g)) { |
| 798 | continue; |
| 799 | } |
| 800 | |
| 801 | // Path out through a post-dominator (e.g. a?.+foobar') |
| 802 | NFAVertex postdom = postdominators[v]; |
| 803 | if (postdom && cyclic[g[postdom].index] && edge(v, postdom, g).second) { |
| 804 | DEBUG_PRINTF("vertex %zu is postdominated by directly-connected " |
| 805 | "cyclic vertex %zu\n" , g[v].index, g[postdom].index); |
| 806 | |
| 807 | // iff all paths through in-edge e of v involve vertices whose |
| 808 | // reachability is a subset of reach(dom), we can delete edge e. |
| 809 | for (const auto &e : out_edges_range(v, g)) { |
| 810 | if (target(e, g) == postdom) { |
| 811 | continue; |
| 812 | } |
| 813 | |
| 814 | if (forwardPathReachSubset(e, postdom, g)) { |
| 815 | DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing " |
| 816 | "paths share postdom reach\n" , |
| 817 | g[source(e, g)].index, g[target(e, g)].index); |
| 818 | if (target(e, g) == v) { |
| 819 | cyclic[g[v].index] = false; |
| 820 | } |
| 821 | dead.insert(e); |
| 822 | continue; |
| 823 | } |
| 824 | } |
| 825 | } |
| 826 | } |
| 827 | } |
| 828 | |
| 829 | bool removeRedundancy(NGHolder &g, som_type som) { |
| 830 | DEBUG_PRINTF("rr som = %d\n" , (int)som); |
| 831 | renumber_vertices(g); |
| 832 | |
| 833 | // Cheap check: if all the non-special vertices have in-degree one and |
| 834 | // out-degree one, there's no redundancy in this here graph and we can |
| 835 | // vamoose. |
| 836 | if (isIrreducible(g)) { |
| 837 | return false; |
| 838 | } |
| 839 | |
| 840 | VertexInfoMap infoMap(g); |
| 841 | |
| 842 | // Populate maps of successors and predecessors, and accept status |
| 843 | populateContainers(g, infoMap); |
| 844 | |
| 845 | /* Run multiple passes: terminate when a full pass doesn't remove |
| 846 | * any vertices */ |
| 847 | bool doUseless = true; |
| 848 | bool doDiamond = true; |
| 849 | set<NFAVertex> removable; |
| 850 | while (doUseless || doDiamond) { |
| 851 | if (doUseless |
| 852 | && doUselessMergePass(g, som, infoMap, removable)) { |
| 853 | doDiamond = true; |
| 854 | } |
| 855 | doUseless = false; |
| 856 | |
| 857 | if (doDiamond |
| 858 | && doDiamondMergePass(g, som, infoMap, removable)) { |
| 859 | doUseless = true; |
| 860 | } |
| 861 | doDiamond = false; |
| 862 | } |
| 863 | DEBUG_PRINTF("found %zu removable vertices overall.\n" , removable.size()); |
| 864 | remove_vertices(removable, g); |
| 865 | |
| 866 | return !removable.empty(); |
| 867 | } |
| 868 | |
| 869 | /** UE-524: remove edges into nodes that are dominated by cyclic nodes with |
| 870 | * reachability that is a superset of all paths feeding into that edge. */ |
| 871 | bool removeCyclicDominated(NGHolder &g, som_type som) { |
| 872 | set<NFAEdge> dead; |
| 873 | vector<bool> cyclic; |
| 874 | bool changed = false; |
| 875 | |
| 876 | findCyclic(g, cyclic); |
| 877 | |
| 878 | findCyclicDom(g, cyclic, dead, som); |
| 879 | if (!dead.empty()) { |
| 880 | remove_edges(dead, g); |
| 881 | pruneUseless(g); |
| 882 | dead.clear(); |
| 883 | cyclic.clear(); // need to recalculate cyclic as ids have changed |
| 884 | findCyclic(g, cyclic); |
| 885 | changed = true; |
| 886 | } |
| 887 | |
| 888 | findCyclicPostDom(g, cyclic, dead); |
| 889 | if (!dead.empty()) { |
| 890 | remove_edges(dead, g); |
| 891 | pruneUseless(g); |
| 892 | dead.clear(); |
| 893 | changed = true; |
| 894 | } |
| 895 | |
| 896 | return changed; |
| 897 | } |
| 898 | |
| 899 | } // namespace ue2 |
| 900 | |