| 1 | /* |
| 2 | * Copyright (c) 2015-2018, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "rose_build_add_internal.h" |
| 30 | #include "rose_build_impl.h" |
| 31 | |
| 32 | #include "ue2common.h" |
| 33 | #include "grey.h" |
| 34 | #include "rose_build_anchored.h" |
| 35 | #include "rose_in_util.h" |
| 36 | #include "hwlm/hwlm_literal.h" |
| 37 | #include "nfa/goughcompile.h" |
| 38 | #include "nfa/nfa_api_queue.h" |
| 39 | #include "nfagraph/ng_depth.h" |
| 40 | #include "nfagraph/ng_dump.h" |
| 41 | #include "nfagraph/ng_holder.h" |
| 42 | #include "nfagraph/ng_limex.h" |
| 43 | #include "nfagraph/ng_mcclellan.h" |
| 44 | #include "nfagraph/ng_prefilter.h" |
| 45 | #include "nfagraph/ng_prune.h" |
| 46 | #include "nfagraph/ng_region.h" |
| 47 | #include "nfagraph/ng_repeat.h" |
| 48 | #include "nfagraph/ng_reports.h" |
| 49 | #include "nfagraph/ng_util.h" |
| 50 | #include "nfagraph/ng_width.h" |
| 51 | #include "util/charreach.h" |
| 52 | #include "util/charreach_util.h" |
| 53 | #include "util/compare.h" |
| 54 | #include "util/compile_context.h" |
| 55 | #include "util/container.h" |
| 56 | #include "util/dump_charclass.h" |
| 57 | #include "util/graph_range.h" |
| 58 | #include "util/insertion_ordered.h" |
| 59 | #include "util/make_unique.h" |
| 60 | #include "util/noncopyable.h" |
| 61 | #include "util/order_check.h" |
| 62 | #include "util/report_manager.h" |
| 63 | #include "util/ue2string.h" |
| 64 | #include "util/verify_types.h" |
| 65 | |
| 66 | #include <algorithm> |
| 67 | #include <map> |
| 68 | #include <set> |
| 69 | #include <string> |
| 70 | #include <vector> |
| 71 | #include <utility> |
| 72 | |
| 73 | using namespace std; |
| 74 | |
| 75 | namespace ue2 { |
| 76 | |
| 77 | /** |
| 78 | * \brief Data used by most of the construction code in this file. |
| 79 | */ |
| 80 | struct RoseBuildData : noncopyable { |
| 81 | RoseBuildData(const RoseInGraph &ig_in, bool som_in) |
| 82 | : ig(ig_in), som(som_in) {} |
| 83 | |
| 84 | /** Input rose graph. */ |
| 85 | const RoseInGraph &ig; |
| 86 | |
| 87 | /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which |
| 88 | * require ANCH history to prevent overlap. */ |
| 89 | unordered_set<RoseInEdge> anch_history_edges; |
| 90 | |
| 91 | /** True if we're tracking Start of Match. */ |
| 92 | bool som; |
| 93 | }; |
| 94 | |
| 95 | static |
| 96 | ReportID findReportId(const NGHolder &g) { |
| 97 | /* prefix/infix always have an edge to accept and only 1 reportid initially |
| 98 | */ |
| 99 | assert(in_degree(g.accept, g)); |
| 100 | const auto &rep = g[*inv_adjacent_vertices(g.accept, g).first].reports; |
| 101 | assert(!rep.empty()); |
| 102 | return *rep.begin(); |
| 103 | } |
| 104 | |
| 105 | static |
| 106 | RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset, |
| 107 | u32 max_offset) { |
| 108 | RoseGraph &g = build->g; |
| 109 | // add to tree |
| 110 | RoseVertex v = add_vertex(g); |
| 111 | g[v].min_offset = min_offset; |
| 112 | g[v].max_offset = max_offset; |
| 113 | |
| 114 | DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n" , g[v].index, |
| 115 | literalId); |
| 116 | g[v].literals.insert(literalId); |
| 117 | build->literal_info[literalId].vertices.insert(v); |
| 118 | |
| 119 | return v; |
| 120 | } |
| 121 | |
| 122 | RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent, |
| 123 | u32 minBound, u32 maxBound, u32 literalId, |
| 124 | size_t literalLength, |
| 125 | const flat_set<ReportID> &reports) { |
| 126 | assert(parent != RoseGraph::null_vertex()); |
| 127 | |
| 128 | RoseGraph &g = build->g; |
| 129 | // add to tree (offsets set latter) |
| 130 | RoseVertex v = createVertex(build, literalId, 0U, 0U); |
| 131 | |
| 132 | /* fill in report information */ |
| 133 | g[v].reports.insert(reports.begin(), reports.end()); |
| 134 | |
| 135 | RoseEdge e = add_edge(parent, v, g); |
| 136 | DEBUG_PRINTF("adding edge (%u, %u) to parent\n" , minBound, maxBound); |
| 137 | |
| 138 | g[e].minBound = minBound; |
| 139 | g[e].maxBound = maxBound; |
| 140 | g[e].rose_top = 0; |
| 141 | |
| 142 | u32 min_offset = add_rose_depth(g[parent].min_offset, minBound); |
| 143 | u32 max_offset = add_rose_depth(g[parent].max_offset, maxBound); |
| 144 | |
| 145 | /* take literal length into account for offsets */ |
| 146 | const u32 lit_len = verify_u32(literalLength); |
| 147 | min_offset = add_rose_depth(min_offset, lit_len); |
| 148 | max_offset = add_rose_depth(max_offset, lit_len); |
| 149 | |
| 150 | g[v].min_offset = min_offset; |
| 151 | g[v].max_offset = max_offset; |
| 152 | |
| 153 | return v; |
| 154 | } |
| 155 | |
| 156 | static |
| 157 | RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId, |
| 158 | u32 min_offset, u32 max_offset) { |
| 159 | RoseGraph &g = build->g; |
| 160 | RoseVertex v = createVertex(build, literalId, min_offset, max_offset); |
| 161 | |
| 162 | DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n" , g[v].index, |
| 163 | literalId); |
| 164 | |
| 165 | RoseEdge e = add_edge(build->anchored_root, v, g); |
| 166 | g[e].minBound = min_offset; |
| 167 | g[e].maxBound = max_offset; |
| 168 | |
| 169 | return v; |
| 170 | } |
| 171 | |
| 172 | static |
| 173 | RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) { |
| 174 | RoseGraph &g = build->g; |
| 175 | RoseVertex w = add_vertex(g[v], g); |
| 176 | DEBUG_PRINTF("added vertex %zu\n" , g[w].index); |
| 177 | |
| 178 | for (auto lit_id : g[w].literals) { |
| 179 | build->literal_info[lit_id].vertices.insert(w); |
| 180 | } |
| 181 | |
| 182 | for (const auto &e : in_edges_range(v, g)) { |
| 183 | RoseVertex s = source(e, g); |
| 184 | add_edge(s, w, g[e], g); |
| 185 | DEBUG_PRINTF("added edge (%zu,%zu)\n" , g[s].index, g[w].index); |
| 186 | } |
| 187 | |
| 188 | return w; |
| 189 | } |
| 190 | |
| 191 | namespace { |
| 192 | struct created_key { |
| 193 | explicit created_key(const RoseInEdgeProps &trep) |
| 194 | : prefix(trep.graph.get()), lag(trep.graph_lag) { |
| 195 | } |
| 196 | bool operator<(const created_key &b) const { |
| 197 | const created_key &a = *this; |
| 198 | ORDER_CHECK(prefix); |
| 199 | ORDER_CHECK(lag); |
| 200 | return false; |
| 201 | } |
| 202 | NGHolder *prefix; |
| 203 | u32 lag; |
| 204 | }; |
| 205 | } |
| 206 | |
| 207 | static |
| 208 | bool isPureAnchored(const NGHolder &h) { |
| 209 | return !proper_out_degree(h.startDs, h); |
| 210 | } |
| 211 | |
| 212 | static |
| 213 | RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd, |
| 214 | const RoseInEdge &rose_edge, const RoseEdge &e) { |
| 215 | const RoseGraph &g = tbi.g; |
| 216 | const RoseVertex u = source(e, g), v = target(e, g); |
| 217 | const bool fixed_offset_src = g[u].fixedOffset(); |
| 218 | const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF); |
| 219 | |
| 220 | DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n" , |
| 221 | g[u].index, g[v].index, g[e].minBound, g[e].maxBound, |
| 222 | (int)g[u].fixedOffset(), (int)g[v].left); |
| 223 | |
| 224 | if (g[v].left) { |
| 225 | // Roles with prefix engines have their history handled by that prefix. |
| 226 | assert(!contains(bd.anch_history_edges, rose_edge)); |
| 227 | return ROSE_ROLE_HISTORY_NONE; |
| 228 | } |
| 229 | |
| 230 | if (contains(bd.anch_history_edges, rose_edge)) { |
| 231 | DEBUG_PRINTF("needs anch history\n" ); |
| 232 | return ROSE_ROLE_HISTORY_ANCH; |
| 233 | } |
| 234 | |
| 235 | if (fixed_offset_src && has_bounds) { |
| 236 | DEBUG_PRINTF("needs anch history\n" ); |
| 237 | return ROSE_ROLE_HISTORY_ANCH; |
| 238 | } |
| 239 | |
| 240 | return ROSE_ROLE_HISTORY_NONE; |
| 241 | } |
| 242 | |
| 243 | static |
| 244 | bool hasSuccessorLiterals(RoseInVertex iv, const RoseInGraph &ig) { |
| 245 | for (auto v : adjacent_vertices_range(iv, ig)) { |
| 246 | if (ig[v].type != RIV_ACCEPT) { |
| 247 | return true; |
| 248 | } |
| 249 | } |
| 250 | return false; |
| 251 | } |
| 252 | |
| 253 | static |
| 254 | void createVertices(RoseBuildImpl *tbi, |
| 255 | map<RoseInVertex, vector<RoseVertex> > &vertex_map, |
| 256 | const vector<pair<RoseVertex, RoseInEdge> > &parents, |
| 257 | RoseInVertex iv, u32 min_offset, u32 max_offset, |
| 258 | u32 literalId, u32 delay, const RoseBuildData &bd) { |
| 259 | RoseGraph &g = tbi->g; |
| 260 | |
| 261 | DEBUG_PRINTF("vertex has %zu parents\n" , parents.size()); |
| 262 | |
| 263 | map<created_key, RoseVertex> created; |
| 264 | |
| 265 | for (const auto &pv : parents) { |
| 266 | RoseVertex w; |
| 267 | const RoseInEdgeProps &edge_props = bd.ig[pv.second]; |
| 268 | shared_ptr<NGHolder> prefix_graph = edge_props.graph; |
| 269 | u32 prefix_lag = edge_props.graph_lag; |
| 270 | |
| 271 | created_key key(edge_props); |
| 272 | |
| 273 | if (!contains(created, key)) { |
| 274 | assert(prefix_graph || !edge_props.haig); |
| 275 | w = createVertex(tbi, literalId, min_offset, max_offset); |
| 276 | created[key] = w; |
| 277 | |
| 278 | if (prefix_graph) { |
| 279 | g[w].left.graph = prefix_graph; |
| 280 | if (edge_props.dfa) { |
| 281 | g[w].left.dfa = edge_props.dfa; |
| 282 | } |
| 283 | g[w].left.haig = edge_props.haig; |
| 284 | g[w].left.lag = prefix_lag; |
| 285 | |
| 286 | // The graph already has its report id allocated - find it. |
| 287 | g[w].left.leftfix_report = findReportId(*prefix_graph); |
| 288 | |
| 289 | if (g[w].left.dfa || g[w].left.haig) { |
| 290 | assert(prefix_graph); |
| 291 | g[w].left.dfa_min_width = findMinWidth(*prefix_graph); |
| 292 | g[w].left.dfa_max_width = findMaxWidth(*prefix_graph); |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | if (bd.som && !g[w].left.haig) { |
| 297 | /* no prefix - som based on literal start */ |
| 298 | assert(!prefix_graph); |
| 299 | g[w].som_adjust = tbi->literals.at(literalId).elength(); |
| 300 | DEBUG_PRINTF("set som_adjust to %u\n" , g[w].som_adjust); |
| 301 | } |
| 302 | |
| 303 | DEBUG_PRINTF(" adding new vertex index=%zu\n" , tbi->g[w].index); |
| 304 | vertex_map[iv].push_back(w); |
| 305 | } else { |
| 306 | w = created[key]; |
| 307 | } |
| 308 | |
| 309 | RoseVertex p = pv.first; |
| 310 | |
| 311 | RoseEdge e = add_edge(p, w, g); |
| 312 | DEBUG_PRINTF("adding edge (%u,%u) to parent\n" , edge_props.minBound, |
| 313 | edge_props.maxBound); |
| 314 | g[e].minBound = edge_props.minBound; |
| 315 | if (p != tbi->root && g[w].left.graph |
| 316 | && (!tbi->isAnyStart(p) || isPureAnchored(*g[w].left.graph))) { |
| 317 | depth mw = findMaxWidth(*g[w].left.graph); |
| 318 | if (mw.is_infinite()) { |
| 319 | g[e].maxBound = ROSE_BOUND_INF; |
| 320 | } else { |
| 321 | DEBUG_PRINTF("setting max to %s + %u\n" , mw.str().c_str(), |
| 322 | prefix_lag); |
| 323 | g[e].maxBound = prefix_lag + mw; |
| 324 | } |
| 325 | } else { |
| 326 | g[e].maxBound = edge_props.maxBound; |
| 327 | } |
| 328 | g[e].rose_top = 0; |
| 329 | g[e].history = selectHistory(*tbi, bd, pv.second, e); |
| 330 | } |
| 331 | |
| 332 | if (delay && hasSuccessorLiterals(iv, bd.ig)) { |
| 333 | // Add an undelayed "ghost" vertex for this literal. |
| 334 | u32 ghostId = tbi->literal_info[literalId].undelayed_id; |
| 335 | DEBUG_PRINTF("creating delay ghost vertex, id=%u\n" , ghostId); |
| 336 | assert(ghostId != literalId); |
| 337 | assert(tbi->literals.at(ghostId).delay == 0); |
| 338 | |
| 339 | // Adjust offsets, removing delay. |
| 340 | u32 ghost_min = min_offset, ghost_max = max_offset; |
| 341 | assert(ghost_min < ROSE_BOUND_INF && ghost_min >= delay); |
| 342 | ghost_min -= delay; |
| 343 | ghost_max -= ghost_max == ROSE_BOUND_INF ? 0 : delay; |
| 344 | |
| 345 | RoseVertex g_v = createVertex(tbi, ghostId, ghost_min, ghost_max); |
| 346 | |
| 347 | for (const auto &pv : parents) { |
| 348 | const RoseInEdgeProps &edge_props = bd.ig[pv.second]; |
| 349 | RoseEdge e = add_edge(pv.first, g_v, tbi->g); |
| 350 | g[e].minBound = edge_props.minBound; |
| 351 | g[e].maxBound = edge_props.maxBound; |
| 352 | g[e].history = selectHistory(*tbi, bd, pv.second, e); |
| 353 | DEBUG_PRINTF("parent edge has bounds [%u,%u]\n" , |
| 354 | edge_props.minBound, edge_props.maxBound); |
| 355 | } |
| 356 | |
| 357 | for (auto &m : created) { |
| 358 | tbi->ghost[m.second] = g_v; |
| 359 | } |
| 360 | } |
| 361 | } |
| 362 | |
| 363 | /* ensure the holder does not accept any paths which do not end with lit */ |
| 364 | static |
| 365 | void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { |
| 366 | DEBUG_PRINTF("strip '%s'\n" , dumpString(lit).c_str()); |
| 367 | set<NFAVertex> curr, next; |
| 368 | curr.insert(g.accept); |
| 369 | curr.insert(g.acceptEod); |
| 370 | |
| 371 | for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { |
| 372 | next.clear(); |
| 373 | for (auto curr_v : curr) { |
| 374 | DEBUG_PRINTF("handling %zu\n" , g[curr_v].index); |
| 375 | vector<NFAVertex> next_cand; |
| 376 | insert(&next_cand, next_cand.end(), |
| 377 | inv_adjacent_vertices(curr_v, g)); |
| 378 | clear_in_edges(curr_v, g); |
| 379 | if (curr_v == g.acceptEod) { |
| 380 | add_edge(g.accept, g.acceptEod, g); |
| 381 | } |
| 382 | |
| 383 | for (auto v : next_cand) { |
| 384 | assert(v != g.startDs); |
| 385 | if (v == g.start || v == g.startDs || v == g.accept) { |
| 386 | continue; |
| 387 | } |
| 388 | |
| 389 | const CharReach &cr = g[v].char_reach; |
| 390 | |
| 391 | if (!overlaps(*it, cr)) { |
| 392 | DEBUG_PRINTF("false edge %zu\n" , g[v].index); |
| 393 | continue; |
| 394 | } |
| 395 | |
| 396 | NFAVertex v2 = clone_vertex(g, v); |
| 397 | clone_in_edges(g, v, v2); |
| 398 | add_edge(v2, curr_v, g); |
| 399 | g[v2].char_reach &= *it; |
| 400 | DEBUG_PRINTF("next <- %zu\n" , g[v2].index); |
| 401 | next.insert(v2); |
| 402 | } |
| 403 | } |
| 404 | |
| 405 | curr.swap(next); |
| 406 | } |
| 407 | |
| 408 | pruneUseless(g); |
| 409 | clearReports(g); |
| 410 | assert(in_degree(g.accept, g) || in_degree(g.acceptEod, g) > 1); |
| 411 | assert(allMatchStatesHaveReports(g)); |
| 412 | |
| 413 | DEBUG_PRINTF("graph has %zu vertices left\n" , num_vertices(g)); |
| 414 | } |
| 415 | |
| 416 | static |
| 417 | RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi, |
| 418 | const RoseInVertexProps &iv_info, |
| 419 | const RoseInEdgeProps &ep) { |
| 420 | if (ep.graph_lag && ep.graph_lag != iv_info.s.length()) { |
| 421 | DEBUG_PRINTF("bad lag %u != %zu\n" , ep.graph_lag, iv_info.s.length()); |
| 422 | return RoseGraph::null_vertex(); /* TODO: better */ |
| 423 | } |
| 424 | |
| 425 | const depth anchored_max_depth(tbi->cc.grey.maxAnchoredRegion); |
| 426 | depth min_width(0), max_width(0); |
| 427 | |
| 428 | if (ep.graph.get()) { |
| 429 | const depth graph_lag(ep.graph_lag); |
| 430 | max_width = findMaxWidth(*ep.graph) + graph_lag; |
| 431 | min_width = findMinWidth(*ep.graph) + graph_lag; |
| 432 | if (proper_out_degree(ep.graph->startDs, *ep.graph)) { |
| 433 | max_width = depth::infinity(); |
| 434 | } |
| 435 | } |
| 436 | |
| 437 | DEBUG_PRINTF("mw = %s; lag = %u\n" , max_width.str().c_str(), ep.graph_lag); |
| 438 | |
| 439 | NGHolder h; |
| 440 | |
| 441 | if (ep.graph.get() && max_width <= anchored_max_depth) { |
| 442 | cloneHolder(h, *ep.graph); |
| 443 | |
| 444 | /* add literal/dots */ |
| 445 | if (ep.graph_lag) { |
| 446 | assert(ep.graph_lag == iv_info.s.length()); |
| 447 | appendLiteral(h, iv_info.s); |
| 448 | } else { |
| 449 | removeFalsePaths(h, iv_info.s); |
| 450 | } |
| 451 | } else if (!ep.graph.get() && ep.maxBound < ROSE_BOUND_INF |
| 452 | && iv_info.s.length() + ep.maxBound |
| 453 | <= tbi->cc.grey.maxAnchoredRegion) { |
| 454 | if (ep.maxBound || ep.minBound) { |
| 455 | /* TODO: handle, however these cases are not generated currently by |
| 456 | ng_violet */ |
| 457 | return RoseGraph::null_vertex(); |
| 458 | } |
| 459 | max_width = depth(ep.maxBound + iv_info.s.length()); |
| 460 | min_width = depth(ep.minBound + iv_info.s.length()); |
| 461 | add_edge(h.start, h.accept, h); |
| 462 | appendLiteral(h, iv_info.s); |
| 463 | } else { |
| 464 | return RoseGraph::null_vertex(); |
| 465 | } |
| 466 | |
| 467 | u32 anchored_exit_id = tbi->getNewLiteralId(); |
| 468 | u32 remap_id = 0; |
| 469 | DEBUG_PRINTF(" trying to add dfa stuff\n" ); |
| 470 | int rv = addToAnchoredMatcher(*tbi, h, anchored_exit_id, &remap_id); |
| 471 | |
| 472 | if (rv == ANCHORED_FAIL) { |
| 473 | return RoseGraph::null_vertex(); |
| 474 | } else if (rv == ANCHORED_REMAP) { |
| 475 | anchored_exit_id = remap_id; |
| 476 | } else { |
| 477 | assert(rv == ANCHORED_SUCCESS); |
| 478 | } |
| 479 | |
| 480 | // Store the literal itself in a side structure so that we can use it for |
| 481 | // overlap calculations later. This may be obsolete when the old Rose |
| 482 | // construction path (and its history selection code) goes away. |
| 483 | rose_literal_id lit(iv_info.s, ROSE_ANCHORED, 0); |
| 484 | tbi->anchoredLitSuffix.insert(make_pair(anchored_exit_id, lit)); |
| 485 | |
| 486 | assert(min_width <= anchored_max_depth); |
| 487 | assert(max_width <= anchored_max_depth); |
| 488 | assert(min_width <= max_width); |
| 489 | |
| 490 | /* Note: bounds are end-to-end as anchored lits are considered |
| 491 | * to have 0 length. */ |
| 492 | RoseVertex v = createAnchoredVertex(tbi, anchored_exit_id, min_width, |
| 493 | max_width); |
| 494 | return v; |
| 495 | } |
| 496 | |
| 497 | static |
| 498 | u32 findRoseAnchorFloatingOverlap(const RoseInEdgeProps &ep, |
| 499 | const RoseInVertexProps &succ_vp) { |
| 500 | /* we need to ensure there is enough history to find the successor literal |
| 501 | * when we enable its group. |
| 502 | */ |
| 503 | |
| 504 | if (!ep.graph.get()) { |
| 505 | return 0; /* non overlapping */ |
| 506 | } |
| 507 | depth graph_min_width = findMinWidth(*ep.graph); |
| 508 | u32 min_width = ep.graph_lag + graph_min_width; |
| 509 | u32 s_len = succ_vp.s.length(); |
| 510 | |
| 511 | if (s_len <= min_width) { |
| 512 | return 0; /* no overlap */ |
| 513 | } |
| 514 | |
| 515 | u32 overlap = s_len - min_width; |
| 516 | DEBUG_PRINTF("found overlap of %u\n" , overlap); |
| 517 | return overlap; |
| 518 | } |
| 519 | |
| 520 | static |
| 521 | void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector<u8> &msk, |
| 522 | vector<u8> &cmp) { |
| 523 | if (lag >= HWLM_MASKLEN) { |
| 524 | msk.clear(); cmp.clear(); |
| 525 | return; |
| 526 | } |
| 527 | |
| 528 | assert(in_degree(h.acceptEod, h) == 1); // no eod reports |
| 529 | |
| 530 | // Start with the set of reporter vertices for this rose. |
| 531 | set<NFAVertex> curr, next; |
| 532 | insert(&curr, inv_adjacent_vertices(h.accept, h)); |
| 533 | assert(!curr.empty()); |
| 534 | |
| 535 | msk.assign(HWLM_MASKLEN, 0); |
| 536 | cmp.assign(HWLM_MASKLEN, 0); |
| 537 | size_t i = HWLM_MASKLEN - lag - 1; |
| 538 | do { |
| 539 | if (curr.empty() || contains(curr, h.start) || |
| 540 | contains(curr, h.startDs)) { |
| 541 | DEBUG_PRINTF("end of the road\n" ); |
| 542 | break; |
| 543 | } |
| 544 | |
| 545 | next.clear(); |
| 546 | CharReach cr; |
| 547 | for (auto v : curr) { |
| 548 | DEBUG_PRINTF("vertex %zu, reach %s\n" , h[v].index, |
| 549 | describeClass(h[v].char_reach).c_str()); |
| 550 | cr |= h[v].char_reach; |
| 551 | insert(&next, inv_adjacent_vertices(v, h)); |
| 552 | } |
| 553 | make_and_cmp_mask(cr, &msk[i], &cmp[i]); |
| 554 | DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n" , i, |
| 555 | describeClass(cr).c_str(), msk.at(i), cmp.at(i)); |
| 556 | curr.swap(next); |
| 557 | } while (i-- > 0); |
| 558 | } |
| 559 | |
| 560 | static |
| 561 | void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table, |
| 562 | map<RoseInVertex, vector<RoseVertex> > &vertex_map, |
| 563 | const vector<pair<RoseVertex, RoseInEdge> > &parents, |
| 564 | RoseInVertex iv, const RoseBuildData &bd) { |
| 565 | const RoseInGraph &ig = bd.ig; |
| 566 | const RoseInVertexProps &iv_info = ig[iv]; |
| 567 | assert(iv_info.type == RIV_LITERAL); |
| 568 | assert(!parents.empty()); /* start vertices should not be here */ |
| 569 | |
| 570 | // ng_violet should have ensured that mixed-sensitivity literals are no |
| 571 | // longer than the benefits max width. |
| 572 | assert(iv_info.s.length() <= MAX_MASK2_WIDTH || |
| 573 | !mixed_sensitivity(iv_info.s)); |
| 574 | |
| 575 | // Rose graph construction process should have given us a min_offset. |
| 576 | assert(iv_info.min_offset > 0); |
| 577 | |
| 578 | if (use_eod_table) { |
| 579 | goto floating; |
| 580 | } |
| 581 | |
| 582 | DEBUG_PRINTF("rose find vertex\n" ); |
| 583 | if (parents.size() == 1) { |
| 584 | const RoseVertex u = parents.front().first; |
| 585 | const RoseInEdgeProps &ep = ig[parents.front().second]; |
| 586 | |
| 587 | if (!tbi->isAnyStart(u)) { |
| 588 | goto floating; |
| 589 | } |
| 590 | |
| 591 | if (!ep.graph && ep.maxBound == ROSE_BOUND_INF) { |
| 592 | goto floating; |
| 593 | } |
| 594 | if (ep.graph && !isAnchored(*ep.graph)) { |
| 595 | goto floating; |
| 596 | } |
| 597 | |
| 598 | DEBUG_PRINTF("cand for anchored maxBound %u, %p (%d)\n" , ep.maxBound, |
| 599 | ep.graph.get(), ep.graph ? (int)isAnchored(*ep.graph) : 3); |
| 600 | |
| 601 | /* need to check if putting iv into the anchored table would create |
| 602 | * any bad_overlap relationships with its successor literals */ |
| 603 | for (const auto &e : out_edges_range(iv, ig)) { |
| 604 | RoseInVertex t = target(e, ig); |
| 605 | u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[t]); |
| 606 | DEBUG_PRINTF("found overlap of %u\n" , overlap); |
| 607 | if (overlap > tbi->cc.grey.maxHistoryAvailable + 1) { |
| 608 | goto floating; |
| 609 | } |
| 610 | } |
| 611 | |
| 612 | RoseVertex v = tryForAnchoredVertex(tbi, iv_info, ep); |
| 613 | if (v != RoseGraph::null_vertex()) { |
| 614 | DEBUG_PRINTF("add anchored literal vertex\n" ); |
| 615 | vertex_map[iv].push_back(v); |
| 616 | return; |
| 617 | } |
| 618 | } |
| 619 | |
| 620 | floating: |
| 621 | vector<u8> msk, cmp; |
| 622 | if (tbi->cc.grey.roseHamsterMasks && in_degree(iv, ig) == 1) { |
| 623 | RoseInEdge e = *in_edges(iv, ig).first; |
| 624 | if (ig[e].graph) { |
| 625 | findRoseLiteralMask(*ig[e].graph, ig[e].graph_lag, msk, cmp); |
| 626 | } |
| 627 | } |
| 628 | |
| 629 | u32 delay = iv_info.delay; |
| 630 | rose_literal_table table = use_eod_table ? ROSE_EOD_ANCHORED : ROSE_FLOATING; |
| 631 | |
| 632 | u32 literalId = tbi->getLiteralId(iv_info.s, msk, cmp, delay, table); |
| 633 | |
| 634 | DEBUG_PRINTF("literal=%u (len=%zu, delay=%u, offsets=[%u,%u] '%s')\n" , |
| 635 | literalId, iv_info.s.length(), delay, iv_info.min_offset, |
| 636 | iv_info.max_offset, dumpString(iv_info.s).c_str()); |
| 637 | |
| 638 | createVertices(tbi, vertex_map, parents, iv, iv_info.min_offset, |
| 639 | iv_info.max_offset, literalId, delay, bd); |
| 640 | } |
| 641 | |
| 642 | static |
| 643 | unique_ptr<NGHolder> makeRoseEodPrefix(const NGHolder &h, RoseBuildImpl &build, |
| 644 | map<flat_set<ReportID>, ReportID> &remap) { |
| 645 | assert(generates_callbacks(h)); |
| 646 | assert(!in_degree(h.accept, h)); |
| 647 | auto gg = cloneHolder(h); |
| 648 | NGHolder &g = *gg; |
| 649 | g.kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX; |
| 650 | |
| 651 | // Move acceptEod edges over to accept. |
| 652 | vector<NFAEdge> dead; |
| 653 | for (const auto &e : in_edges_range(g.acceptEod, g)) { |
| 654 | NFAVertex u = source(e, g); |
| 655 | if (u == g.accept) { |
| 656 | continue; |
| 657 | } |
| 658 | add_edge_if_not_present(u, g.accept, g); |
| 659 | dead.push_back(e); |
| 660 | |
| 661 | if (!contains(remap, g[u].reports)) { |
| 662 | remap[g[u].reports] = build.getNewNfaReport(); |
| 663 | } |
| 664 | |
| 665 | g[u].reports = { remap[g[u].reports] }; |
| 666 | } |
| 667 | |
| 668 | remove_edges(dead, g); |
| 669 | return gg; |
| 670 | } |
| 671 | |
| 672 | static |
| 673 | u32 getEodEventID(RoseBuildImpl &build) { |
| 674 | // Allocate the EOD event if it hasn't been already. |
| 675 | if (build.eod_event_literal_id == MO_INVALID_IDX) { |
| 676 | build.eod_event_literal_id = build.getLiteralId({}, 0, ROSE_EVENT); |
| 677 | } |
| 678 | |
| 679 | return build.eod_event_literal_id; |
| 680 | } |
| 681 | |
| 682 | static |
| 683 | void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, |
| 684 | const NGHolder &h) { |
| 685 | assert(!build.isInETable(u)); |
| 686 | |
| 687 | RoseGraph &g = build.g; |
| 688 | map<flat_set<ReportID>, ReportID> report_remap; |
| 689 | shared_ptr<NGHolder> eod_leftfix |
| 690 | = makeRoseEodPrefix(h, build, report_remap); |
| 691 | |
| 692 | u32 eod_event = getEodEventID(build); |
| 693 | |
| 694 | for (const auto &report_mapping : report_remap) { |
| 695 | RoseVertex v = add_vertex(g); |
| 696 | g[v].literals.insert(eod_event); |
| 697 | build.literal_info[eod_event].vertices.insert(v); |
| 698 | |
| 699 | g[v].left.graph = eod_leftfix; |
| 700 | g[v].left.leftfix_report = report_mapping.second; |
| 701 | g[v].left.lag = 0; |
| 702 | RoseEdge e1 = add_edge(u, v, g); |
| 703 | g[e1].minBound = 0; |
| 704 | g[e1].maxBound = ROSE_BOUND_INF; |
| 705 | g[v].min_offset = add_rose_depth(g[u].min_offset, |
| 706 | findMinWidth(*g[v].left.graph)); |
| 707 | g[v].max_offset = ROSE_BOUND_INF; |
| 708 | |
| 709 | depth max_width = findMaxWidth(*g[v].left.graph); |
| 710 | if (u != build.root && max_width.is_finite() |
| 711 | && (!build.isAnyStart(u) || isPureAnchored(*g[v].left.graph))) { |
| 712 | g[e1].maxBound = max_width; |
| 713 | g[v].max_offset = add_rose_depth(g[u].max_offset, max_width); |
| 714 | } |
| 715 | |
| 716 | g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix |
| 717 | RoseVertex w = add_vertex(g); |
| 718 | g[w].eod_accept = true; |
| 719 | g[w].reports = report_mapping.first; |
| 720 | g[w].min_offset = g[v].min_offset; |
| 721 | g[w].max_offset = g[v].max_offset; |
| 722 | RoseEdge e = add_edge(v, w, g); |
| 723 | g[e].minBound = 0; |
| 724 | g[e].maxBound = 0; |
| 725 | /* No need to set history as the event is only delivered at the last |
| 726 | * byte anyway - no need to invalidate stale entries. */ |
| 727 | g[e].history = ROSE_ROLE_HISTORY_NONE; |
| 728 | DEBUG_PRINTF("accept eod vertex (index=%zu)\n" , g[w].index); |
| 729 | } |
| 730 | } |
| 731 | |
| 732 | static |
| 733 | void doRoseAcceptVertex(RoseBuildImpl *tbi, |
| 734 | const vector<pair<RoseVertex, RoseInEdge> > &parents, |
| 735 | RoseInVertex iv, const RoseBuildData &bd) { |
| 736 | const RoseInGraph &ig = bd.ig; |
| 737 | assert(ig[iv].type == RIV_ACCEPT || ig[iv].type == RIV_ACCEPT_EOD); |
| 738 | |
| 739 | RoseGraph &g = tbi->g; |
| 740 | |
| 741 | for (const auto &pv : parents) { |
| 742 | RoseVertex u = pv.first; |
| 743 | const RoseInEdgeProps &edge_props = bd.ig[pv.second]; |
| 744 | |
| 745 | /* We need to duplicate the parent vertices if: |
| 746 | * |
| 747 | * 1) It already has a suffix, etc as we are going to add the specified |
| 748 | * suffix, etc to the parents and we do not want to overwrite the |
| 749 | * existing information. |
| 750 | * |
| 751 | * 2) We are making the an EOD accept and the vertex already has other |
| 752 | * out-edges - The LAST_BYTE history used for EOD accepts is |
| 753 | * incompatible with normal successors. As accepts are processed last we |
| 754 | * do not need to worry about other normal successors being added later. |
| 755 | */ |
| 756 | if (g[u].suffix || !g[u].reports.empty() |
| 757 | || (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g) |
| 758 | && !edge_props.graph) |
| 759 | || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) { |
| 760 | DEBUG_PRINTF("duplicating for parent %zu\n" , g[u].index); |
| 761 | assert(!tbi->isAnyStart(u)); |
| 762 | u = duplicate(tbi, u); |
| 763 | g[u].suffix.reset(); |
| 764 | g[u].eod_accept = false; |
| 765 | } |
| 766 | |
| 767 | assert(!g[u].suffix); |
| 768 | if (ig[iv].type == RIV_ACCEPT) { |
| 769 | assert(!tbi->isAnyStart(u)); |
| 770 | if (edge_props.dfa) { |
| 771 | DEBUG_PRINTF("adding early dfa suffix to i%zu\n" , g[u].index); |
| 772 | g[u].suffix.rdfa = edge_props.dfa; |
| 773 | g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph); |
| 774 | g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph); |
| 775 | } else if (edge_props.graph) { |
| 776 | DEBUG_PRINTF("adding suffix to i%zu\n" , g[u].index); |
| 777 | g[u].suffix.graph = edge_props.graph; |
| 778 | assert(g[u].suffix.graph->kind == NFA_SUFFIX); |
| 779 | /* TODO: set dfa_(min|max)_width */ |
| 780 | } else if (edge_props.haig) { |
| 781 | DEBUG_PRINTF("adding suffaig to i%zu\n" , g[u].index); |
| 782 | g[u].suffix.haig = edge_props.haig; |
| 783 | } else { |
| 784 | DEBUG_PRINTF("adding boring accept to i%zu\n" , g[u].index); |
| 785 | assert(!g[u].eod_accept); |
| 786 | g[u].reports = ig[iv].reports; |
| 787 | } |
| 788 | } else { |
| 789 | assert(ig[iv].type == RIV_ACCEPT_EOD); |
| 790 | assert(!edge_props.haig); |
| 791 | |
| 792 | if (!edge_props.graph) { |
| 793 | RoseVertex w = add_vertex(g); |
| 794 | g[w].eod_accept = true; |
| 795 | g[w].reports = ig[iv].reports; |
| 796 | g[w].min_offset = g[u].min_offset; |
| 797 | g[w].max_offset = g[u].max_offset; |
| 798 | RoseEdge e = add_edge(u, w, g); |
| 799 | g[e].minBound = 0; |
| 800 | g[e].maxBound = 0; |
| 801 | g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; |
| 802 | DEBUG_PRINTF("accept eod vertex (index=%zu)\n" , g[w].index); |
| 803 | continue; |
| 804 | } |
| 805 | |
| 806 | const NGHolder &h = *edge_props.graph; |
| 807 | assert(!in_degree(h.accept, h)); |
| 808 | assert(generates_callbacks(h)); |
| 809 | |
| 810 | if (tbi->isInETable(u)) { |
| 811 | assert(h.kind == NFA_SUFFIX); |
| 812 | assert(!tbi->isAnyStart(u)); |
| 813 | /* etable can't/shouldn't use eod event */ |
| 814 | DEBUG_PRINTF("adding suffix to i%zu\n" , g[u].index); |
| 815 | g[u].suffix.graph = edge_props.graph; |
| 816 | continue; |
| 817 | } |
| 818 | |
| 819 | makeEodEventLeftfix(*tbi, u, h); |
| 820 | } |
| 821 | } |
| 822 | } |
| 823 | |
| 824 | static |
| 825 | bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo, |
| 826 | u32 *max_len, const CompileContext &cc) { |
| 827 | map<RoseInVertex, u32> max_depth_from_eod; |
| 828 | *max_len = 0; |
| 829 | |
| 830 | reverse(topo.begin(), topo.end()); /* we want to start at accept end */ |
| 831 | |
| 832 | for (auto v : topo) { |
| 833 | u32 v_depth = 0; |
| 834 | |
| 835 | if (ig[v].type == RIV_ACCEPT) { |
| 836 | DEBUG_PRINTF("[ACCEPT]\n" ); |
| 837 | for (const auto &e : in_edges_range(v, ig)) { |
| 838 | if (!ig[e].graph || !can_only_match_at_eod(*ig[e].graph)) { |
| 839 | DEBUG_PRINTF("floating accept\n" ); |
| 840 | return false; |
| 841 | } |
| 842 | } |
| 843 | } |
| 844 | |
| 845 | switch (ig[v].type) { |
| 846 | case RIV_LITERAL: |
| 847 | DEBUG_PRINTF("[LITERAL]\n" ); |
| 848 | break; |
| 849 | case RIV_START: |
| 850 | DEBUG_PRINTF("[START]\n" ); |
| 851 | break; |
| 852 | case RIV_ANCHORED_START: |
| 853 | DEBUG_PRINTF("[ANCHOR]\n" ); |
| 854 | break; |
| 855 | case RIV_ACCEPT: |
| 856 | break; |
| 857 | case RIV_ACCEPT_EOD: |
| 858 | DEBUG_PRINTF("[EOD]\n" ); |
| 859 | break; |
| 860 | default: |
| 861 | assert(0); |
| 862 | DEBUG_PRINTF("????\n" ); |
| 863 | return false; |
| 864 | } |
| 865 | |
| 866 | for (const auto &e : out_edges_range(v, ig)) { |
| 867 | RoseInVertex t = target(e, ig); |
| 868 | |
| 869 | assert(contains(max_depth_from_eod, t)); |
| 870 | u64a max_width; |
| 871 | |
| 872 | if (ig[v].type == RIV_START || ig[v].type == RIV_ANCHORED_START) { |
| 873 | /* start itself doesn't need to be in history buffer |
| 874 | * just need to make sure all succ literals are ok */ |
| 875 | if (ig[t].type == RIV_LITERAL) { |
| 876 | max_width = ig[t].s.length(); |
| 877 | } else { |
| 878 | max_width = 0; |
| 879 | } |
| 880 | if (ig[e].graph) { |
| 881 | depth graph_max_width = findMaxWidth(*ig[e].graph); |
| 882 | DEBUG_PRINTF("graph max width %s, lag %u\n" , |
| 883 | graph_max_width.str().c_str(), |
| 884 | ig[e].graph_lag); |
| 885 | if (!graph_max_width.is_finite()) { |
| 886 | DEBUG_PRINTF("fail due to graph with inf max width\n" ); |
| 887 | return false; |
| 888 | } |
| 889 | max_width += graph_max_width; |
| 890 | } |
| 891 | } else if (ig[e].haig) { |
| 892 | DEBUG_PRINTF("fail due to haig\n" ); |
| 893 | return false; |
| 894 | } else if (ig[e].graph) { |
| 895 | depth graph_max_width = findMaxWidth(*ig[e].graph); |
| 896 | DEBUG_PRINTF("graph max width %s, lag %u\n" , |
| 897 | graph_max_width.str().c_str(), ig[e].graph_lag); |
| 898 | if (!graph_max_width.is_finite()) { |
| 899 | DEBUG_PRINTF("fail due to graph with inf max width\n" ); |
| 900 | return false; |
| 901 | } |
| 902 | max_width = ig[e].graph_lag + graph_max_width; |
| 903 | } else { |
| 904 | max_width = ig[e].maxBound; |
| 905 | if (ig[t].type == RIV_LITERAL) { |
| 906 | max_width += ig[t].s.length(); |
| 907 | } |
| 908 | } |
| 909 | |
| 910 | max_width += max_depth_from_eod[t]; |
| 911 | if (max_width > ROSE_BOUND_INF) { |
| 912 | max_width = ROSE_BOUND_INF; |
| 913 | } |
| 914 | |
| 915 | DEBUG_PRINTF("max_width=%llu\n" , max_width); |
| 916 | |
| 917 | ENSURE_AT_LEAST(&v_depth, (u32)max_width); |
| 918 | } |
| 919 | |
| 920 | if (v_depth == ROSE_BOUND_INF |
| 921 | || v_depth > cc.grey.maxHistoryAvailable) { |
| 922 | DEBUG_PRINTF("not suitable for eod table %u\n" , v_depth); |
| 923 | return false; |
| 924 | } |
| 925 | |
| 926 | max_depth_from_eod[v] = v_depth; |
| 927 | ENSURE_AT_LEAST(max_len, v_depth); |
| 928 | } |
| 929 | |
| 930 | DEBUG_PRINTF("to the eod table and beyond\n" ); |
| 931 | return true; |
| 932 | } |
| 933 | |
| 934 | static |
| 935 | void shift_accepts_to_end(const RoseInGraph &ig, |
| 936 | vector<RoseInVertex> &topo_order) { |
| 937 | stable_partition(begin(topo_order), end(topo_order), |
| 938 | [&](RoseInVertex v){ return !is_any_accept(v, ig); }); |
| 939 | } |
| 940 | |
| 941 | static |
| 942 | void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) { |
| 943 | const RoseInGraph &ig = bd.ig; |
| 944 | |
| 945 | /* add the pattern in to the main rose graph */ |
| 946 | DEBUG_PRINTF("%srose pop\n" , bd.som ? "som " : "" ); |
| 947 | |
| 948 | /* Note: an input vertex may need to create several rose vertices. This is |
| 949 | * primarily because a RoseVertex can only have 1 one leftfix */ |
| 950 | map<RoseInVertex, vector<RoseVertex> > vertex_map; |
| 951 | |
| 952 | vector<RoseInVertex> v_order = topo_order(ig); |
| 953 | shift_accepts_to_end(ig, v_order); |
| 954 | |
| 955 | u32 eod_space_required; |
| 956 | bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required, |
| 957 | tbi->cc); |
| 958 | if (use_eod_table) { |
| 959 | ENSURE_AT_LEAST(&tbi->ematcher_region_size, eod_space_required); |
| 960 | } |
| 961 | |
| 962 | assert(ig[v_order.front()].type == RIV_START |
| 963 | || ig[v_order.front()].type == RIV_ANCHORED_START); |
| 964 | |
| 965 | for (RoseInVertex iv : v_order) { |
| 966 | DEBUG_PRINTF("vertex %zu\n" , ig[iv].index); |
| 967 | |
| 968 | if (ig[iv].type == RIV_START) { |
| 969 | DEBUG_PRINTF("is root\n" ); |
| 970 | vertex_map[iv].push_back(tbi->root); |
| 971 | continue; |
| 972 | } else if (ig[iv].type == RIV_ANCHORED_START) { |
| 973 | DEBUG_PRINTF("is anchored root\n" ); |
| 974 | vertex_map[iv].push_back(tbi->anchored_root); |
| 975 | continue; |
| 976 | } |
| 977 | |
| 978 | vector<pair<RoseVertex, RoseInEdge> > parents; |
| 979 | for (const auto &e : in_edges_range(iv, ig)) { |
| 980 | RoseInVertex u = source(e, ig); |
| 981 | assert(contains(vertex_map, u)); |
| 982 | const vector<RoseVertex> &images = vertex_map[u]; |
| 983 | |
| 984 | // We should have no dupes. |
| 985 | assert(set<RoseVertex>(images.begin(), images.end()).size() |
| 986 | == images.size()); |
| 987 | |
| 988 | for (auto v_image : images) { |
| 989 | // v_image should NOT already be in our parents list. |
| 990 | assert(find_if(parents.begin(), parents.end(), |
| 991 | [&v_image](const pair<RoseVertex, RoseInEdge> &p) { |
| 992 | return p.first == v_image; |
| 993 | }) == parents.end()); |
| 994 | |
| 995 | parents.emplace_back(v_image, e); |
| 996 | |
| 997 | if (tbi->isAnchored(v_image)) { |
| 998 | assert(!use_eod_table); |
| 999 | u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[iv]); |
| 1000 | assert(overlap <= tbi->cc.grey.maxHistoryAvailable + 1); |
| 1001 | ENSURE_AT_LEAST(&tbi->max_rose_anchored_floating_overlap, |
| 1002 | overlap); |
| 1003 | } |
| 1004 | } |
| 1005 | } |
| 1006 | |
| 1007 | if (ig[iv].type == RIV_LITERAL) { |
| 1008 | DEBUG_PRINTF("LITERAL '%s'\n" , dumpString(ig[iv].s).c_str()); |
| 1009 | assert(!isLeafNode(iv, ig)); |
| 1010 | doRoseLiteralVertex(tbi, use_eod_table, vertex_map, parents, iv, |
| 1011 | bd); |
| 1012 | } else { |
| 1013 | if (ig[iv].type == RIV_ACCEPT) { |
| 1014 | DEBUG_PRINTF("ACCEPT\n" ); |
| 1015 | } else { |
| 1016 | assert(ig[iv].type == RIV_ACCEPT_EOD); |
| 1017 | DEBUG_PRINTF("ACCEPT_EOD\n" ); |
| 1018 | } |
| 1019 | assert(isLeafNode(iv, ig)); /* accepts are final */ |
| 1020 | doRoseAcceptVertex(tbi, parents, iv, bd); |
| 1021 | } |
| 1022 | } |
| 1023 | DEBUG_PRINTF("done\n" ); |
| 1024 | } |
| 1025 | |
| 1026 | template<typename GraphT> |
| 1027 | static |
| 1028 | bool empty(const GraphT &g) { |
| 1029 | typename GraphT::vertex_iterator vi, ve; |
| 1030 | tie(vi, ve) = vertices(g); |
| 1031 | return vi == ve; |
| 1032 | } |
| 1033 | |
| 1034 | static |
| 1035 | bool canImplementGraph(NGHolder &h, bool prefilter, const ReportManager &rm, |
| 1036 | const CompileContext &cc) { |
| 1037 | if (isImplementableNFA(h, &rm, cc)) { |
| 1038 | return true; |
| 1039 | } |
| 1040 | |
| 1041 | if (prefilter && cc.grey.prefilterReductions) { |
| 1042 | // If we're prefiltering, we can have another go with a reduced graph. |
| 1043 | UNUSED size_t numBefore = num_vertices(h); |
| 1044 | prefilterReductions(h, cc); |
| 1045 | UNUSED size_t numAfter = num_vertices(h); |
| 1046 | DEBUG_PRINTF("reduced from %zu to %zu vertices\n" , numBefore, numAfter); |
| 1047 | |
| 1048 | if (isImplementableNFA(h, &rm, cc)) { |
| 1049 | return true; |
| 1050 | } |
| 1051 | } |
| 1052 | |
| 1053 | DEBUG_PRINTF("unable to build engine\n" ); |
| 1054 | return false; |
| 1055 | } |
| 1056 | |
| 1057 | static |
| 1058 | bool predsAreDelaySensitive(const RoseInGraph &ig, RoseInVertex v) { |
| 1059 | assert(in_degree(v, ig)); |
| 1060 | |
| 1061 | for (const auto &e : in_edges_range(v, ig)) { |
| 1062 | if (ig[e].graph || ig[e].haig) { |
| 1063 | DEBUG_PRINTF("edge graph\n" ); |
| 1064 | return true; |
| 1065 | } |
| 1066 | if (ig[e].minBound || ig[e].maxBound != ROSE_BOUND_INF) { |
| 1067 | DEBUG_PRINTF("edge bounds\n" ); |
| 1068 | return true; |
| 1069 | } |
| 1070 | |
| 1071 | RoseInVertex u = source(e, ig); |
| 1072 | if (ig[u].type == RIV_START) { |
| 1073 | continue; |
| 1074 | } |
| 1075 | if (ig[u].type != RIV_LITERAL) { |
| 1076 | DEBUG_PRINTF("unsafe pred vertex\n" ); |
| 1077 | return true; |
| 1078 | } |
| 1079 | if (ig[u].delay) { |
| 1080 | DEBUG_PRINTF("pred has delay\n" ); |
| 1081 | return true; |
| 1082 | } |
| 1083 | } |
| 1084 | |
| 1085 | return false; |
| 1086 | } |
| 1087 | |
| 1088 | static |
| 1089 | u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) { |
| 1090 | /* overly conservative if only part of the string is nocase */ |
| 1091 | string pred = pred_key.get_string(); |
| 1092 | string lit = lit_key.get_string(); |
| 1093 | |
| 1094 | if (pred_key.any_nocase() || lit_key.any_nocase()) { |
| 1095 | upperString(pred); |
| 1096 | upperString(lit); |
| 1097 | } |
| 1098 | |
| 1099 | string::size_type last = pred.rfind(lit); |
| 1100 | if (last == string::npos) { |
| 1101 | return MAX_DELAY; |
| 1102 | } |
| 1103 | |
| 1104 | u32 raw = pred.size() - last - 1; |
| 1105 | return MIN(raw, MAX_DELAY); |
| 1106 | } |
| 1107 | |
| 1108 | static |
| 1109 | u32 findMaxSafeDelay(const RoseInGraph &ig, RoseInVertex u, RoseInVertex v) { |
| 1110 | // First, check the overlap constraints on (u,v). |
| 1111 | size_t max_delay; |
| 1112 | if (ig[v].type == RIV_LITERAL) { |
| 1113 | DEBUG_PRINTF("lit->lit edge: '%s' -> '%s'\n" , |
| 1114 | escapeString(ig[u].s).c_str(), |
| 1115 | escapeString(ig[v].s).c_str()); |
| 1116 | max_delay = maxAvailableDelay(ig[u].s, ig[v].s); |
| 1117 | } else if (ig[v].type == RIV_ACCEPT) { |
| 1118 | DEBUG_PRINTF("lit->accept edge: '%s' -> ACCEPT\n" , |
| 1119 | escapeString(ig[u].s).c_str()); |
| 1120 | max_delay = MAX_DELAY; |
| 1121 | } else { |
| 1122 | assert(0); |
| 1123 | return 0; |
| 1124 | } |
| 1125 | |
| 1126 | DEBUG_PRINTF("max safe delay for this edge: %zu\n" , max_delay); |
| 1127 | |
| 1128 | // Now consider the predecessors of u. |
| 1129 | for (const auto &e : in_edges_range(u, ig)) { |
| 1130 | RoseInVertex w = source(e, ig); |
| 1131 | if (ig[w].type == RIV_START) { |
| 1132 | continue; |
| 1133 | } |
| 1134 | assert(ig[w].type == RIV_LITERAL); |
| 1135 | assert(ig[w].delay == 0); |
| 1136 | |
| 1137 | DEBUG_PRINTF("pred lit->lit edge: '%s' -> '%s'\n" , |
| 1138 | escapeString(ig[w].s).c_str(), |
| 1139 | escapeString(ig[u].s).c_str()); |
| 1140 | |
| 1141 | // We cannot delay the literal on u so much that a predecessor literal |
| 1142 | // could occur in the delayed region. For example, consider |
| 1143 | // 'barman.*foobar': if we allow 'foobar' to be delayed by 3, then |
| 1144 | // 'barman' could occur in the input string and race with 'foobar', as |
| 1145 | // in 'foobarman'. |
| 1146 | |
| 1147 | const size_t pred_len = ig[w].s.length(); |
| 1148 | size_t overlap = maxOverlap(ig[u].s, ig[w].s, 0); |
| 1149 | DEBUG_PRINTF("pred_len=%zu, overlap=%zu\n" , pred_len, overlap); |
| 1150 | assert(overlap <= pred_len); |
| 1151 | size_t max_lit_delay = pred_len - min(overlap + 1, pred_len); |
| 1152 | DEBUG_PRINTF("overlap=%zu -> max_lit_delay=%zu\n" , overlap, |
| 1153 | max_lit_delay); |
| 1154 | max_delay = min(max_delay, max_lit_delay); |
| 1155 | } |
| 1156 | |
| 1157 | DEBUG_PRINTF("max_delay=%zu\n" , max_delay); |
| 1158 | assert(max_delay <= MAX_DELAY); |
| 1159 | return max_delay; |
| 1160 | } |
| 1161 | |
| 1162 | static |
| 1163 | bool transformInfixToDelay(const RoseInGraph &ig, const RoseInEdge &e, |
| 1164 | const CompileContext &cc, u32 *delay_out) { |
| 1165 | const u32 max_history = |
| 1166 | cc.streaming ? cc.grey.maxHistoryAvailable : ROSE_BOUND_INF; |
| 1167 | |
| 1168 | const RoseInVertex u = source(e, ig), v = target(e, ig); |
| 1169 | const u32 graph_lag = ig[e].graph_lag; |
| 1170 | |
| 1171 | // Clone a copy of the graph, as we need to be able to roll back this |
| 1172 | // operation. |
| 1173 | NGHolder h; |
| 1174 | cloneHolder(h, *ig[e].graph); |
| 1175 | |
| 1176 | DEBUG_PRINTF("target literal: %s\n" , dumpString(ig[v].s).c_str()); |
| 1177 | DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n" , num_vertices(h), |
| 1178 | graph_lag); |
| 1179 | |
| 1180 | assert(graph_lag <= ig[v].s.length()); |
| 1181 | if (graph_lag < ig[v].s.length()) { |
| 1182 | size_t len = ig[v].s.length() - graph_lag; |
| 1183 | ue2_literal lit(ig[v].s.substr(0, len)); |
| 1184 | DEBUG_PRINTF("lit2=%s\n" , dumpString(lit).c_str()); |
| 1185 | u32 delay2 = removeTrailingLiteralStates(h, lit, max_history); |
| 1186 | if (delay2 == MO_INVALID_IDX) { |
| 1187 | DEBUG_PRINTF("couldn't remove trailing literal\n" ); |
| 1188 | return false; |
| 1189 | } |
| 1190 | if (delay2 != len) { |
| 1191 | DEBUG_PRINTF("couldn't remove entire trailing literal\n" ); |
| 1192 | return false; |
| 1193 | } |
| 1194 | } |
| 1195 | |
| 1196 | PureRepeat repeat; |
| 1197 | if (!isPureRepeat(h, repeat)) { |
| 1198 | DEBUG_PRINTF("graph is not repeat\n" ); |
| 1199 | return false; |
| 1200 | } |
| 1201 | DEBUG_PRINTF("graph is %s repeat\n" , repeat.bounds.str().c_str()); |
| 1202 | if (!repeat.bounds.max.is_infinite()) { |
| 1203 | DEBUG_PRINTF("not inf\n" ); |
| 1204 | return false; |
| 1205 | } |
| 1206 | |
| 1207 | if (!repeat.reach.all()) { |
| 1208 | DEBUG_PRINTF("non-dot reach\n" ); |
| 1209 | return false; |
| 1210 | } |
| 1211 | |
| 1212 | u32 delay = ig[v].s.length() + repeat.bounds.min; |
| 1213 | if (delay > MAX_DELAY) { |
| 1214 | DEBUG_PRINTF("delay %u > MAX_DELAY\n" , delay); |
| 1215 | return false; |
| 1216 | } |
| 1217 | |
| 1218 | if (delay + ig[u].s.length() - 1 > max_history) { |
| 1219 | DEBUG_PRINTF("delay too large for history\n" ); |
| 1220 | return false; |
| 1221 | } |
| 1222 | |
| 1223 | *delay_out = delay; |
| 1224 | return true; |
| 1225 | } |
| 1226 | |
| 1227 | static |
| 1228 | void transformLiteralDelay(RoseInGraph &ig, const CompileContext &cc) { |
| 1229 | if (!cc.grey.roseTransformDelay) { |
| 1230 | return; |
| 1231 | } |
| 1232 | |
| 1233 | for (auto u : vertices_range(ig)) { |
| 1234 | if (ig[u].type != RIV_LITERAL) { |
| 1235 | continue; |
| 1236 | } |
| 1237 | if (out_degree(u, ig) != 1) { |
| 1238 | continue; |
| 1239 | } |
| 1240 | |
| 1241 | RoseInEdge e = *out_edges(u, ig).first; |
| 1242 | RoseInVertex v = target(e, ig); |
| 1243 | if (ig[v].type != RIV_LITERAL) { |
| 1244 | continue; |
| 1245 | } |
| 1246 | if (ig[e].haig) { |
| 1247 | continue; |
| 1248 | } |
| 1249 | if (!ig[e].graph) { |
| 1250 | continue; |
| 1251 | } |
| 1252 | |
| 1253 | if (predsAreDelaySensitive(ig, u)) { |
| 1254 | DEBUG_PRINTF("preds are delay sensitive\n" ); |
| 1255 | continue; |
| 1256 | } |
| 1257 | |
| 1258 | u32 max_delay = findMaxSafeDelay(ig, u, v); |
| 1259 | |
| 1260 | DEBUG_PRINTF("lit->lit edge with graph: '%s' -> '%s'\n" , |
| 1261 | escapeString(ig[u].s).c_str(), |
| 1262 | escapeString(ig[v].s).c_str()); |
| 1263 | |
| 1264 | u32 delay = 0; |
| 1265 | if (!transformInfixToDelay(ig, e, cc, &delay)) { |
| 1266 | continue; |
| 1267 | } |
| 1268 | |
| 1269 | if (delay > max_delay) { |
| 1270 | DEBUG_PRINTF("delay=%u > max_delay=%u\n" , delay, max_delay); |
| 1271 | continue; |
| 1272 | } |
| 1273 | |
| 1274 | DEBUG_PRINTF("setting lit delay to %u and deleting graph\n" , delay); |
| 1275 | ig[u].delay = delay; |
| 1276 | ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay); |
| 1277 | ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay); |
| 1278 | ig[e].graph_lag = 0; |
| 1279 | ig[e].graph.reset(); |
| 1280 | ig[e].minBound = 0; |
| 1281 | ig[e].maxBound = ROSE_BOUND_INF; |
| 1282 | } |
| 1283 | } |
| 1284 | |
| 1285 | static |
| 1286 | bool transformInfixToAnchBounds(const RoseInGraph &ig, const RoseInEdge &e, |
| 1287 | const CompileContext &cc, DepthMinMax *bounds) { |
| 1288 | const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable |
| 1289 | : ROSE_BOUND_INF; |
| 1290 | |
| 1291 | const RoseInVertex v = target(e, ig); |
| 1292 | const u32 graph_lag = ig[e].graph_lag; |
| 1293 | |
| 1294 | // Clone a copy of the graph, as we need to be able to roll back this |
| 1295 | // operation. |
| 1296 | NGHolder h; |
| 1297 | cloneHolder(h, *ig[e].graph); |
| 1298 | |
| 1299 | DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n" , num_vertices(h), |
| 1300 | graph_lag); |
| 1301 | |
| 1302 | assert(graph_lag <= ig[v].s.length()); |
| 1303 | if (graph_lag < ig[v].s.length()) { |
| 1304 | size_t len = ig[v].s.length() - graph_lag; |
| 1305 | ue2_literal lit(ig[v].s.substr(0, len)); |
| 1306 | DEBUG_PRINTF("lit2=%s\n" , dumpString(lit).c_str()); |
| 1307 | u32 delay2 = removeTrailingLiteralStates(h, lit, max_history); |
| 1308 | if (delay2 == MO_INVALID_IDX) { |
| 1309 | DEBUG_PRINTF("couldn't remove trailing literal\n" ); |
| 1310 | return false; |
| 1311 | } |
| 1312 | if (delay2 != len) { |
| 1313 | DEBUG_PRINTF("couldn't remove entire trailing literal\n" ); |
| 1314 | return false; |
| 1315 | } |
| 1316 | } |
| 1317 | |
| 1318 | PureRepeat repeat; |
| 1319 | if (!isPureRepeat(h, repeat)) { |
| 1320 | DEBUG_PRINTF("graph is not repeat\n" ); |
| 1321 | return false; |
| 1322 | } |
| 1323 | DEBUG_PRINTF("graph is %s repeat\n" , repeat.bounds.str().c_str()); |
| 1324 | if (!repeat.bounds.max.is_infinite()) { |
| 1325 | DEBUG_PRINTF("not inf\n" ); |
| 1326 | return false; |
| 1327 | } |
| 1328 | |
| 1329 | if (!repeat.reach.all()) { |
| 1330 | DEBUG_PRINTF("non-dot reach\n" ); |
| 1331 | return false; |
| 1332 | } |
| 1333 | |
| 1334 | *bounds = repeat.bounds; |
| 1335 | return true; |
| 1336 | } |
| 1337 | |
| 1338 | static |
| 1339 | void transformAnchoredLiteralOverlap(RoseInGraph &ig, RoseBuildData &bd, |
| 1340 | const CompileContext &cc) { |
| 1341 | if (!cc.grey.roseTransformDelay) { |
| 1342 | return; |
| 1343 | } |
| 1344 | |
| 1345 | for (const auto &e : edges_range(ig)) { |
| 1346 | const RoseInVertex u = source(e, ig); |
| 1347 | const RoseInVertex v = target(e, ig); |
| 1348 | |
| 1349 | if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) { |
| 1350 | continue; |
| 1351 | } |
| 1352 | if (ig[e].haig || !ig[e].graph) { |
| 1353 | continue; |
| 1354 | } |
| 1355 | |
| 1356 | if (ig[u].min_offset != ig[u].max_offset) { |
| 1357 | DEBUG_PRINTF("u not fixed depth\n" ); |
| 1358 | continue; |
| 1359 | } |
| 1360 | |
| 1361 | DEBUG_PRINTF("anch_lit->lit edge with graph: '%s' -> '%s'\n" , |
| 1362 | escapeString(ig[u].s).c_str(), |
| 1363 | escapeString(ig[v].s).c_str()); |
| 1364 | |
| 1365 | DepthMinMax bounds; |
| 1366 | if (!transformInfixToAnchBounds(ig, e, cc, &bounds)) { |
| 1367 | continue; |
| 1368 | } |
| 1369 | |
| 1370 | DEBUG_PRINTF("setting bounds to %s and deleting graph\n" , |
| 1371 | bounds.str().c_str()); |
| 1372 | ig[e].graph_lag = 0; |
| 1373 | ig[e].graph.reset(); |
| 1374 | ig[e].minBound = bounds.min; |
| 1375 | ig[e].maxBound = bounds.max.is_finite() ? (u32)bounds.max |
| 1376 | : ROSE_BOUND_INF; |
| 1377 | bd.anch_history_edges.insert(e); |
| 1378 | } |
| 1379 | } |
| 1380 | |
| 1381 | /** |
| 1382 | * \brief Transform small trailing dot repeat suffixes into delay on the last |
| 1383 | * literal. |
| 1384 | * |
| 1385 | * For example, the case /hatstand.*teakettle./s can just delay 'teakettle' +1 |
| 1386 | * rather than having a suffix to handle the dot. |
| 1387 | * |
| 1388 | * This transformation looks for literal->accept edges and transforms them if |
| 1389 | * appropriate. It doesn't handle complex cases where the literal has more than |
| 1390 | * one successor. |
| 1391 | */ |
| 1392 | static |
| 1393 | void transformSuffixDelay(RoseInGraph &ig, const CompileContext &cc) { |
| 1394 | if (!cc.grey.roseTransformDelay) { |
| 1395 | return; |
| 1396 | } |
| 1397 | |
| 1398 | const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable |
| 1399 | : ROSE_BOUND_INF; |
| 1400 | |
| 1401 | set<RoseInVertex> modified_accepts; // may be dead after transform |
| 1402 | |
| 1403 | for (auto u : vertices_range(ig)) { |
| 1404 | if (ig[u].type != RIV_LITERAL) { |
| 1405 | continue; |
| 1406 | } |
| 1407 | if (out_degree(u, ig) != 1) { |
| 1408 | continue; |
| 1409 | } |
| 1410 | |
| 1411 | RoseInEdge e = *out_edges(u, ig).first; |
| 1412 | RoseInVertex v = target(e, ig); |
| 1413 | if (ig[v].type != RIV_ACCEPT) { |
| 1414 | continue; |
| 1415 | } |
| 1416 | if (ig[e].haig) { |
| 1417 | continue; |
| 1418 | } |
| 1419 | if (!ig[e].graph) { |
| 1420 | continue; |
| 1421 | } |
| 1422 | |
| 1423 | if (predsAreDelaySensitive(ig, u)) { |
| 1424 | DEBUG_PRINTF("preds are delay sensitive\n" ); |
| 1425 | continue; |
| 1426 | } |
| 1427 | |
| 1428 | DEBUG_PRINTF("lit->accept edge with graph: lit='%s'\n" , |
| 1429 | escapeString(ig[u].s).c_str()); |
| 1430 | |
| 1431 | const NGHolder &h = *ig[e].graph; |
| 1432 | const set<ReportID> reports = all_reports(h); |
| 1433 | if (reports.size() != 1) { |
| 1434 | DEBUG_PRINTF("too many reports\n" ); |
| 1435 | continue; |
| 1436 | } |
| 1437 | |
| 1438 | PureRepeat repeat; |
| 1439 | if (!isPureRepeat(h, repeat)) { |
| 1440 | DEBUG_PRINTF("suffix graph is not repeat\n" ); |
| 1441 | continue; |
| 1442 | } |
| 1443 | DEBUG_PRINTF("suffix graph is %s repeat\n" , |
| 1444 | repeat.bounds.str().c_str()); |
| 1445 | |
| 1446 | if (!repeat.reach.all()) { |
| 1447 | DEBUG_PRINTF("non-dot reach\n" ); |
| 1448 | continue; |
| 1449 | } |
| 1450 | |
| 1451 | if (repeat.bounds.min != repeat.bounds.max || |
| 1452 | repeat.bounds.min > depth(MAX_DELAY)) { |
| 1453 | DEBUG_PRINTF("repeat is variable or too large\n" ); |
| 1454 | continue; |
| 1455 | } |
| 1456 | |
| 1457 | u32 max_delay = findMaxSafeDelay(ig, u, v); |
| 1458 | |
| 1459 | u32 delay = repeat.bounds.min; |
| 1460 | if (delay > max_delay) { |
| 1461 | DEBUG_PRINTF("delay=%u > max_delay=%u\n" , delay, max_delay); |
| 1462 | continue; |
| 1463 | } |
| 1464 | |
| 1465 | if (delay + ig[u].s.length() - 1 > max_history) { |
| 1466 | DEBUG_PRINTF("delay too large for history\n" ); |
| 1467 | continue; |
| 1468 | } |
| 1469 | |
| 1470 | DEBUG_PRINTF("setting lit delay to %u and removing suffix\n" , delay); |
| 1471 | ig[u].delay = delay; |
| 1472 | ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay); |
| 1473 | ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay); |
| 1474 | |
| 1475 | // Construct a new accept vertex for this report and remove edge e. |
| 1476 | // (This allows us to cope if v has more than one in-edge). |
| 1477 | RoseInVertex v2 = |
| 1478 | add_vertex(RoseInVertexProps::makeAccept(reports), ig); |
| 1479 | add_edge(u, v2, ig); |
| 1480 | remove_edge(e, ig); |
| 1481 | modified_accepts.insert(v); |
| 1482 | } |
| 1483 | |
| 1484 | DEBUG_PRINTF("%zu modified accepts\n" , modified_accepts.size()); |
| 1485 | |
| 1486 | for (auto v : modified_accepts) { |
| 1487 | if (in_degree(v, ig) == 0) { |
| 1488 | DEBUG_PRINTF("removing accept vertex with no preds\n" ); |
| 1489 | remove_vertex(v, ig); |
| 1490 | } |
| 1491 | } |
| 1492 | } |
| 1493 | |
| 1494 | #ifndef NDEBUG |
| 1495 | static |
| 1496 | bool validateKinds(const RoseInGraph &g) { |
| 1497 | for (const auto &e : edges_range(g)) { |
| 1498 | if (g[e].graph && g[e].graph->kind != whatRoseIsThis(g, e)) { |
| 1499 | return false; |
| 1500 | } |
| 1501 | } |
| 1502 | |
| 1503 | return true; |
| 1504 | } |
| 1505 | #endif |
| 1506 | |
| 1507 | bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) { |
| 1508 | DEBUG_PRINTF("trying to rose\n" ); |
| 1509 | assert(validateKinds(ig)); |
| 1510 | assert(hasCorrectlyNumberedVertices(ig)); |
| 1511 | |
| 1512 | if (::ue2::empty(ig)) { |
| 1513 | assert(0); |
| 1514 | return false; |
| 1515 | } |
| 1516 | |
| 1517 | const unique_ptr<RoseInGraph> in_ptr = cloneRoseGraph(ig); |
| 1518 | RoseInGraph &in = *in_ptr; |
| 1519 | |
| 1520 | RoseBuildData bd(in, false); |
| 1521 | |
| 1522 | transformLiteralDelay(in, cc); |
| 1523 | transformAnchoredLiteralOverlap(in, bd, cc); |
| 1524 | transformSuffixDelay(in, cc); |
| 1525 | |
| 1526 | renumber_vertices(in); |
| 1527 | assert(validateKinds(in)); |
| 1528 | |
| 1529 | insertion_ordered_map<NGHolder *, vector<RoseInEdge>> graphs; |
| 1530 | |
| 1531 | for (const auto &e : edges_range(in)) { |
| 1532 | if (!in[e].graph) { |
| 1533 | assert(!in[e].dfa); |
| 1534 | assert(!in[e].haig); |
| 1535 | continue; // no graph |
| 1536 | } |
| 1537 | |
| 1538 | if (in[e].haig || in[e].dfa) { |
| 1539 | /* Early DFAs/Haigs are always implementable (we've already built |
| 1540 | * the raw DFA). */ |
| 1541 | continue; |
| 1542 | } |
| 1543 | |
| 1544 | NGHolder *h = in[e].graph.get(); |
| 1545 | |
| 1546 | assert(isCorrectlyTopped(*h)); |
| 1547 | graphs[h].push_back(e); |
| 1548 | } |
| 1549 | |
| 1550 | vector<RoseInEdge> graph_edges; |
| 1551 | |
| 1552 | for (const auto &m : graphs) { |
| 1553 | NGHolder *h = m.first; |
| 1554 | if (!canImplementGraph(*h, prefilter, rm, cc)) { |
| 1555 | return false; |
| 1556 | } |
| 1557 | insert(&graph_edges, graph_edges.end(), m.second); |
| 1558 | } |
| 1559 | |
| 1560 | /* we are now past the point of no return. We can start making irreversible |
| 1561 | changes to the rose graph, etc */ |
| 1562 | |
| 1563 | for (const auto &e : graph_edges) { |
| 1564 | assert(in[e].graph); |
| 1565 | assert(!in[e].haig); |
| 1566 | NGHolder &h = *in[e].graph; |
| 1567 | DEBUG_PRINTF("handling %p\n" , &h); |
| 1568 | assert(allMatchStatesHaveReports(h)); |
| 1569 | |
| 1570 | if (!generates_callbacks(whatRoseIsThis(in, e)) |
| 1571 | && in[target(e, in)].type != RIV_ACCEPT_EOD) { |
| 1572 | set_report(h, getNewNfaReport()); |
| 1573 | } |
| 1574 | } |
| 1575 | |
| 1576 | populateRoseGraph(this, bd); |
| 1577 | |
| 1578 | return true; |
| 1579 | } |
| 1580 | |
| 1581 | bool RoseBuildImpl::addSombeRose(const RoseInGraph &ig) { |
| 1582 | DEBUG_PRINTF("rose is trying to consume a sombe\n" ); |
| 1583 | assert(validateKinds(ig)); |
| 1584 | |
| 1585 | if (::ue2::empty(ig)) { |
| 1586 | assert(0); |
| 1587 | return false; |
| 1588 | } |
| 1589 | |
| 1590 | RoseBuildData bd(ig, true); |
| 1591 | |
| 1592 | for (const auto &e : edges_range(ig)) { |
| 1593 | if (!ig[e].graph) { |
| 1594 | continue; // no graph |
| 1595 | } |
| 1596 | DEBUG_PRINTF("handling %p\n" , ig[e].graph.get()); |
| 1597 | assert(allMatchStatesHaveReports(*ig[e].graph)); |
| 1598 | assert(ig[e].haig); |
| 1599 | } |
| 1600 | |
| 1601 | populateRoseGraph(this, bd); |
| 1602 | |
| 1603 | return true; |
| 1604 | } |
| 1605 | |
| 1606 | bool roseCheckRose(const RoseInGraph &ig, bool prefilter, |
| 1607 | const ReportManager &rm, const CompileContext &cc) { |
| 1608 | assert(validateKinds(ig)); |
| 1609 | |
| 1610 | if (::ue2::empty(ig)) { |
| 1611 | assert(0); |
| 1612 | return false; |
| 1613 | } |
| 1614 | |
| 1615 | vector<NGHolder *> graphs; |
| 1616 | |
| 1617 | for (const auto &e : edges_range(ig)) { |
| 1618 | if (!ig[e].graph) { |
| 1619 | continue; // no graph |
| 1620 | } |
| 1621 | |
| 1622 | if (ig[e].haig) { |
| 1623 | // Haigs are always implementable (we've already built the raw DFA). |
| 1624 | continue; |
| 1625 | } |
| 1626 | |
| 1627 | graphs.push_back(ig[e].graph.get()); |
| 1628 | } |
| 1629 | |
| 1630 | for (const auto &g : graphs) { |
| 1631 | if (!canImplementGraph(*g, prefilter, rm, cc)) { |
| 1632 | return false; |
| 1633 | } |
| 1634 | } |
| 1635 | |
| 1636 | return true; |
| 1637 | } |
| 1638 | |
| 1639 | void RoseBuildImpl::add(bool anchored, bool eod, const ue2_literal &lit, |
| 1640 | const flat_set<ReportID> &reports) { |
| 1641 | assert(!reports.empty()); |
| 1642 | |
| 1643 | if (cc.grey.floodAsPuffette && !anchored && !eod && is_flood(lit) && |
| 1644 | lit.length() > 3) { |
| 1645 | DEBUG_PRINTF("adding as puffette\n" ); |
| 1646 | const CharReach &cr = *lit.begin(); |
| 1647 | for (const auto &report : reports) { |
| 1648 | addOutfix(raw_puff(lit.length(), true, report, cr, true)); |
| 1649 | } |
| 1650 | |
| 1651 | return; |
| 1652 | } |
| 1653 | |
| 1654 | RoseInGraph ig; |
| 1655 | RoseInVertex start = add_vertex(RoseInVertexProps::makeStart(anchored), ig); |
| 1656 | RoseInVertex accept = add_vertex( |
| 1657 | eod ? RoseInVertexProps::makeAcceptEod(set<ReportID>()) |
| 1658 | : RoseInVertexProps::makeAccept(set<ReportID>()), ig); |
| 1659 | RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); |
| 1660 | |
| 1661 | add_edge(start, v, RoseInEdgeProps(0U, anchored ? 0U : ROSE_BOUND_INF), ig); |
| 1662 | add_edge(v, accept, RoseInEdgeProps(0U, 0U), ig); |
| 1663 | |
| 1664 | calcVertexOffsets(ig); |
| 1665 | |
| 1666 | ig[accept].reports.insert(reports.begin(), reports.end()); |
| 1667 | |
| 1668 | addRose(ig, false); |
| 1669 | } |
| 1670 | |
| 1671 | static |
| 1672 | u32 findMaxBAWidth(const NGHolder &h) { |
| 1673 | // Must be bi-anchored: no out-edges from startDs (other than its |
| 1674 | // self-loop), no in-edges to accept. |
| 1675 | if (out_degree(h.startDs, h) > 1 || in_degree(h.accept, h)) { |
| 1676 | return ROSE_BOUND_INF; |
| 1677 | } |
| 1678 | depth d = findMaxWidth(h); |
| 1679 | assert(d.is_reachable()); |
| 1680 | |
| 1681 | if (!d.is_finite()) { |
| 1682 | return ROSE_BOUND_INF; |
| 1683 | } |
| 1684 | return d; |
| 1685 | } |
| 1686 | |
| 1687 | static |
| 1688 | void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h, |
| 1689 | const RoseBuildImpl &tbi) { |
| 1690 | outfix.maxBAWidth = findMaxBAWidth(h); |
| 1691 | outfix.minWidth = findMinWidth(h); |
| 1692 | outfix.maxWidth = findMaxWidth(h); |
| 1693 | outfix.maxOffset = findMaxOffset(h, tbi.rm); |
| 1694 | populateReverseAccelerationInfo(outfix.rev_info, h); |
| 1695 | } |
| 1696 | |
| 1697 | static |
| 1698 | bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) { |
| 1699 | map<flat_set<ReportID>, ReportID> report_remap; |
| 1700 | shared_ptr<NGHolder> eod_leftfix |
| 1701 | = makeRoseEodPrefix(h, build, report_remap); |
| 1702 | |
| 1703 | bool nfa_ok = isImplementableNFA(h, &build.rm, build.cc); |
| 1704 | |
| 1705 | /* TODO: check if early dfa is possible */ |
| 1706 | |
| 1707 | if (!nfa_ok) { |
| 1708 | DEBUG_PRINTF("could not build as NFA\n" ); |
| 1709 | return false; |
| 1710 | } |
| 1711 | |
| 1712 | u32 eod_event = getEodEventID(build); |
| 1713 | |
| 1714 | auto &g = build.g; |
| 1715 | for (const auto &report_mapping : report_remap) { |
| 1716 | RoseVertex v = add_vertex(g); |
| 1717 | g[v].literals.insert(eod_event); |
| 1718 | build.literal_info[eod_event].vertices.insert(v); |
| 1719 | |
| 1720 | g[v].left.graph = eod_leftfix; |
| 1721 | g[v].left.leftfix_report = report_mapping.second; |
| 1722 | g[v].left.lag = 0; |
| 1723 | RoseEdge e1 = add_edge(build.anchored_root, v, g); |
| 1724 | g[e1].minBound = 0; |
| 1725 | g[e1].maxBound = ROSE_BOUND_INF; |
| 1726 | g[v].min_offset = findMinWidth(*eod_leftfix); |
| 1727 | g[v].max_offset = ROSE_BOUND_INF; |
| 1728 | |
| 1729 | depth max_width = findMaxWidth(*g[v].left.graph); |
| 1730 | if (max_width.is_finite() && isPureAnchored(*eod_leftfix)) { |
| 1731 | g[e1].maxBound = max_width; |
| 1732 | g[v].max_offset = max_width; |
| 1733 | } |
| 1734 | |
| 1735 | g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix |
| 1736 | RoseVertex w = add_vertex(g); |
| 1737 | g[w].eod_accept = true; |
| 1738 | g[w].reports = report_mapping.first; |
| 1739 | g[w].min_offset = g[v].min_offset; |
| 1740 | g[w].max_offset = g[v].max_offset; |
| 1741 | RoseEdge e = add_edge(v, w, g); |
| 1742 | g[e].minBound = 0; |
| 1743 | g[e].maxBound = 0; |
| 1744 | g[e].history = ROSE_ROLE_HISTORY_NONE; |
| 1745 | DEBUG_PRINTF("accept eod vertex (index=%zu)\n" , g[w].index); |
| 1746 | } |
| 1747 | |
| 1748 | return true; |
| 1749 | } |
| 1750 | |
| 1751 | bool RoseBuildImpl::addOutfix(const NGHolder &h) { |
| 1752 | DEBUG_PRINTF("%zu vertices, %zu edges\n" , num_vertices(h), num_edges(h)); |
| 1753 | |
| 1754 | /* TODO: handle more than one report */ |
| 1755 | if (!in_degree(h.accept, h) |
| 1756 | && all_reports(h).size() == 1 |
| 1757 | && addEodOutfix(*this, h)) { |
| 1758 | return true; |
| 1759 | } |
| 1760 | |
| 1761 | const u32 nfa_states = isImplementableNFA(h, &rm, cc); |
| 1762 | if (nfa_states) { |
| 1763 | DEBUG_PRINTF("implementable as an NFA in %u states\n" , nfa_states); |
| 1764 | } else { |
| 1765 | DEBUG_PRINTF("not implementable as an NFA\n" ); |
| 1766 | } |
| 1767 | |
| 1768 | bool dfa_cand = !nfa_states || nfa_states > 128 /* slow model */ |
| 1769 | || can_exhaust(h, rm); /* can be pruned */ |
| 1770 | |
| 1771 | unique_ptr<raw_dfa> rdfa; |
| 1772 | |
| 1773 | if (!nfa_states || cc.grey.roseMcClellanOutfix == 2 || |
| 1774 | (cc.grey.roseMcClellanOutfix == 1 && dfa_cand)) { |
| 1775 | rdfa = buildMcClellan(h, &rm, cc.grey); |
| 1776 | } |
| 1777 | |
| 1778 | if (!nfa_states && !rdfa) { |
| 1779 | DEBUG_PRINTF("could not build as either an NFA or a DFA\n" ); |
| 1780 | return false; |
| 1781 | } |
| 1782 | |
| 1783 | if (rdfa) { |
| 1784 | outfixes.push_back(OutfixInfo(move(rdfa))); |
| 1785 | } else { |
| 1786 | outfixes.push_back(OutfixInfo(cloneHolder(h))); |
| 1787 | } |
| 1788 | |
| 1789 | populateOutfixInfo(outfixes.back(), h, *this); |
| 1790 | |
| 1791 | return true; |
| 1792 | } |
| 1793 | |
| 1794 | bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) { |
| 1795 | DEBUG_PRINTF("haig with %zu states\n" , haig.states.size()); |
| 1796 | |
| 1797 | outfixes.push_back(OutfixInfo(ue2::make_unique<raw_som_dfa>(haig))); |
| 1798 | populateOutfixInfo(outfixes.back(), h, *this); |
| 1799 | |
| 1800 | return true; /* failure is not yet an option */ |
| 1801 | } |
| 1802 | |
| 1803 | bool RoseBuildImpl::addOutfix(const raw_puff &rp) { |
| 1804 | if (!mpv_outfix) { |
| 1805 | mpv_outfix = make_unique<OutfixInfo>(MpvProto()); |
| 1806 | } |
| 1807 | |
| 1808 | auto *mpv = mpv_outfix->mpv(); |
| 1809 | assert(mpv); |
| 1810 | mpv->puffettes.push_back(rp); |
| 1811 | |
| 1812 | mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */ |
| 1813 | mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats)); |
| 1814 | mpv_outfix->maxWidth = rp.unbounded |
| 1815 | ? depth::infinity() |
| 1816 | : max(mpv_outfix->maxWidth, depth(rp.repeats)); |
| 1817 | |
| 1818 | if (mpv_outfix->maxOffset == ROSE_BOUND_INF || rp.unbounded) { |
| 1819 | mpv_outfix->maxOffset = ROSE_BOUND_INF; |
| 1820 | } else { |
| 1821 | mpv_outfix->maxOffset = MAX(mpv_outfix->maxOffset, rp.repeats); |
| 1822 | } |
| 1823 | |
| 1824 | return true; /* failure is not yet an option */ |
| 1825 | } |
| 1826 | |
| 1827 | bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out, |
| 1828 | u32 *event_out) { |
| 1829 | if (!mpv_outfix) { |
| 1830 | mpv_outfix = make_unique<OutfixInfo>(MpvProto()); |
| 1831 | } |
| 1832 | |
| 1833 | auto *mpv = mpv_outfix->mpv(); |
| 1834 | assert(mpv); |
| 1835 | mpv->triggered_puffettes.push_back(rp); |
| 1836 | |
| 1837 | mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */ |
| 1838 | mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats)); |
| 1839 | mpv_outfix->maxWidth = rp.unbounded |
| 1840 | ? depth::infinity() |
| 1841 | : max(mpv_outfix->maxWidth, depth(rp.repeats)); |
| 1842 | |
| 1843 | mpv_outfix->maxOffset = ROSE_BOUND_INF; /* TODO: we could get information from |
| 1844 | * the caller */ |
| 1845 | |
| 1846 | *queue_out = mpv_outfix->get_queue(qif); |
| 1847 | *event_out = MQE_TOP_FIRST + mpv->triggered_puffettes.size() - 1; |
| 1848 | |
| 1849 | return true; /* failure is not yet an option */ |
| 1850 | } |
| 1851 | |
| 1852 | static |
| 1853 | bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w, |
| 1854 | NFAVertex u, |
| 1855 | const vector<DepthMinMax> &vertexDepths, |
| 1856 | map<u32, DepthMinMax> &depthMap, |
| 1857 | map<NFAVertex, set<u32>> &reportMap, |
| 1858 | map<ReportID, u32> &allocated_reports, |
| 1859 | flat_set<u32> &added_lit_ids) { |
| 1860 | const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion); |
| 1861 | const size_t index = w[u].index; |
| 1862 | assert(index < vertexDepths.size()); |
| 1863 | const DepthMinMax &d = vertexDepths.at(index); |
| 1864 | |
| 1865 | for (const auto &int_report : w[u].reports) { |
| 1866 | assert(int_report != MO_INVALID_IDX); |
| 1867 | |
| 1868 | u32 lit_id; |
| 1869 | if (!contains(allocated_reports, int_report)) { |
| 1870 | lit_id = tbi.getNewLiteralId(); |
| 1871 | added_lit_ids.insert(lit_id); |
| 1872 | allocated_reports[int_report] = lit_id; |
| 1873 | } else { |
| 1874 | lit_id = allocated_reports[int_report]; |
| 1875 | } |
| 1876 | |
| 1877 | reportMap[u].insert(lit_id); |
| 1878 | |
| 1879 | if (!contains(depthMap, lit_id)) { |
| 1880 | depthMap[lit_id] = d; |
| 1881 | } else { |
| 1882 | depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d); |
| 1883 | } |
| 1884 | |
| 1885 | if (depthMap[lit_id].max > max_anchored_depth) { |
| 1886 | DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n" , |
| 1887 | depthMap[lit_id].max.str().c_str(), |
| 1888 | tbi.cc.grey.maxAnchoredRegion); |
| 1889 | return false; |
| 1890 | } |
| 1891 | } |
| 1892 | |
| 1893 | return true; |
| 1894 | } |
| 1895 | |
| 1896 | // Failure path for addAnchoredAcyclic: removes the literal IDs that have been |
| 1897 | // added to support anchored NFAs. Assumes that they are a contiguous range at |
| 1898 | // the end of the RoseBuildImpl::literal_info vector. |
| 1899 | static |
| 1900 | void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set<u32> &lit_ids) { |
| 1901 | if (lit_ids.empty()) { |
| 1902 | return; |
| 1903 | } |
| 1904 | |
| 1905 | DEBUG_PRINTF("remove last %zu literals\n" , lit_ids.size()); |
| 1906 | |
| 1907 | // lit_ids should be a contiguous range. |
| 1908 | assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1); |
| 1909 | assert(*lit_ids.rbegin() == tbi.literals.size() - 1); |
| 1910 | |
| 1911 | assert(all_of_in(lit_ids, [&](u32 lit_id) { |
| 1912 | return lit_id < tbi.literal_info.size() && |
| 1913 | tbi.literals.at(lit_id).table == ROSE_ANCHORED && |
| 1914 | tbi.literal_info[lit_id].vertices.empty(); |
| 1915 | })); |
| 1916 | |
| 1917 | tbi.literals.erase_back(lit_ids.size()); |
| 1918 | assert(tbi.literals.size() == *lit_ids.begin()); |
| 1919 | |
| 1920 | // lit_ids should be at the end of tbi.literal_info. |
| 1921 | assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1); |
| 1922 | tbi.literal_info.resize(*lit_ids.begin()); // remove all ids in lit_ids |
| 1923 | } |
| 1924 | |
| 1925 | bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) { |
| 1926 | auto vertexDepths = calcDepthsFrom(h, h.start); |
| 1927 | |
| 1928 | map<NFAVertex, set<u32> > reportMap; /* NFAVertex -> literal ids */ |
| 1929 | map<u32, DepthMinMax> depthMap; /* literal id -> min/max depth */ |
| 1930 | map<ReportID, u32> allocated_reports; /* report -> literal id */ |
| 1931 | flat_set<u32> added_lit_ids; /* literal ids added for this NFA */ |
| 1932 | |
| 1933 | for (auto v : inv_adjacent_vertices_range(h.accept, h)) { |
| 1934 | if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap, |
| 1935 | reportMap, allocated_reports, |
| 1936 | added_lit_ids)) { |
| 1937 | removeAddedLiterals(*this, added_lit_ids); |
| 1938 | return false; |
| 1939 | } |
| 1940 | } |
| 1941 | |
| 1942 | map<ReportID, u32> allocated_reports_eod; /* report -> literal id */ |
| 1943 | |
| 1944 | for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { |
| 1945 | if (v == h.accept) { |
| 1946 | continue; |
| 1947 | } |
| 1948 | if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap, |
| 1949 | reportMap, allocated_reports_eod, |
| 1950 | added_lit_ids)) { |
| 1951 | removeAddedLiterals(*this, added_lit_ids); |
| 1952 | return false; |
| 1953 | } |
| 1954 | } |
| 1955 | |
| 1956 | assert(!reportMap.empty()); |
| 1957 | |
| 1958 | int rv = addAnchoredNFA(*this, h, reportMap); |
| 1959 | if (rv != ANCHORED_FAIL) { |
| 1960 | assert(rv != ANCHORED_REMAP); |
| 1961 | DEBUG_PRINTF("added anchored nfa\n" ); |
| 1962 | /* add edges to the rose graph to bubble the match up */ |
| 1963 | for (const auto &m : allocated_reports) { |
| 1964 | const ReportID &report = m.first; |
| 1965 | const u32 &lit_id = m.second; |
| 1966 | assert(depthMap[lit_id].max.is_finite()); |
| 1967 | u32 minBound = depthMap[lit_id].min; |
| 1968 | u32 maxBound = depthMap[lit_id].max; |
| 1969 | RoseVertex v |
| 1970 | = createAnchoredVertex(this, lit_id, minBound, maxBound); |
| 1971 | g[v].reports.insert(report); |
| 1972 | } |
| 1973 | |
| 1974 | for (const auto &m : allocated_reports_eod) { |
| 1975 | const ReportID &report = m.first; |
| 1976 | const u32 &lit_id = m.second; |
| 1977 | assert(depthMap[lit_id].max.is_finite()); |
| 1978 | u32 minBound = depthMap[lit_id].min; |
| 1979 | u32 maxBound = depthMap[lit_id].max; |
| 1980 | RoseVertex v |
| 1981 | = createAnchoredVertex(this, lit_id, minBound, maxBound); |
| 1982 | RoseVertex eod = add_vertex(g); |
| 1983 | g[eod].eod_accept = true; |
| 1984 | g[eod].reports.insert(report); |
| 1985 | g[eod].min_offset = g[v].min_offset; |
| 1986 | g[eod].max_offset = g[v].max_offset; |
| 1987 | add_edge(v, eod, g); |
| 1988 | } |
| 1989 | |
| 1990 | return true; |
| 1991 | } else { |
| 1992 | DEBUG_PRINTF("failed to add anchored nfa\n" ); |
| 1993 | removeAddedLiterals(*this, added_lit_ids); |
| 1994 | return false; |
| 1995 | } |
| 1996 | } |
| 1997 | |
| 1998 | } // namespace ue2 |
| 1999 | |