| 1 | /* | 
| 2 |  * Copyright (c) 2015-2017, Intel Corporation | 
| 3 |  * | 
| 4 |  * Redistribution and use in source and binary forms, with or without | 
| 5 |  * modification, are permitted provided that the following conditions are met: | 
| 6 |  * | 
| 7 |  *  * Redistributions of source code must retain the above copyright notice, | 
| 8 |  *    this list of conditions and the following disclaimer. | 
| 9 |  *  * Redistributions in binary form must reproduce the above copyright | 
| 10 |  *    notice, this list of conditions and the following disclaimer in the | 
| 11 |  *    documentation and/or other materials provided with the distribution. | 
| 12 |  *  * Neither the name of Intel Corporation nor the names of its contributors | 
| 13 |  *    may be used to endorse or promote products derived from this software | 
| 14 |  *    without specific prior written permission. | 
| 15 |  * | 
| 16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
| 17 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
| 18 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
| 19 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
| 20 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
| 21 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
| 22 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
| 23 |  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
| 24 |  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
| 25 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
| 26 |  * POSSIBILITY OF SUCH DAMAGE. | 
| 27 |  */ | 
| 28 |  | 
| 29 | /** | 
| 30 |  * \file | 
| 31 |  * \brief SOM ("Start of Match") analysis. | 
| 32 |  */ | 
| 33 |  | 
| 34 | #include "ng_som.h" | 
| 35 |  | 
| 36 | #include "ng.h" | 
| 37 | #include "ng_dump.h" | 
| 38 | #include "ng_equivalence.h" | 
| 39 | #include "ng_execute.h" | 
| 40 | #include "ng_haig.h" | 
| 41 | #include "ng_limex.h" | 
| 42 | #include "ng_literal_analysis.h" | 
| 43 | #include "ng_prune.h" | 
| 44 | #include "ng_redundancy.h" | 
| 45 | #include "ng_region.h" | 
| 46 | #include "ng_reports.h" | 
| 47 | #include "ng_som_add_redundancy.h" | 
| 48 | #include "ng_som_util.h" | 
| 49 | #include "ng_split.h" | 
| 50 | #include "ng_util.h" | 
| 51 | #include "ng_violet.h" | 
| 52 | #include "ng_width.h" | 
| 53 | #include "grey.h" | 
| 54 | #include "ue2common.h" | 
| 55 | #include "compiler/compiler.h" | 
| 56 | #include "nfa/goughcompile.h" | 
| 57 | #include "nfa/nfa_internal.h" // for MO_INVALID_IDX | 
| 58 | #include "parser/position.h" | 
| 59 | #include "som/som.h" | 
| 60 | #include "rose/rose_build.h" | 
| 61 | #include "rose/rose_in_util.h" | 
| 62 | #include "util/alloc.h" | 
| 63 | #include "util/compare.h" | 
| 64 | #include "util/compile_error.h" | 
| 65 | #include "util/container.h" | 
| 66 | #include "util/dump_charclass.h" | 
| 67 | #include "util/graph_range.h" | 
| 68 | #include "util/make_unique.h" | 
| 69 |  | 
| 70 | #include <algorithm> | 
| 71 | #include <map> | 
| 72 | #include <unordered_map> | 
| 73 | #include <unordered_set> | 
| 74 | #include <vector> | 
| 75 |  | 
| 76 | using namespace std; | 
| 77 |  | 
| 78 | namespace ue2 { | 
| 79 |  | 
| 80 | static const size_t MAX_SOM_PLANS = 10; | 
| 81 | static const size_t MAX_SOMBE_CHAIN_VERTICES = 4000; | 
| 82 |  | 
| 83 | #define MAX_REV_NFA_PREFIX 80 | 
| 84 |  | 
| 85 | namespace { | 
| 86 | struct som_plan { | 
| 87 |     som_plan(const shared_ptr<NGHolder> &p, const CharReach &e, bool i, | 
| 88 |              u32 parent_in) : prefix(p), escapes(e), is_reset(i), | 
| 89 |                               no_implement(false), parent(parent_in) { } | 
| 90 |     shared_ptr<NGHolder> prefix; | 
| 91 |     CharReach escapes; | 
| 92 |     bool is_reset; | 
| 93 |     bool no_implement; | 
| 94 |     u32 parent; // index of parent plan in the vector. | 
| 95 |  | 
| 96 |     // Reporters: a list of vertices in the graph that must be have their | 
| 97 |     // reports updated at implementation time to report this plan's | 
| 98 |     // som_loc_out. | 
| 99 |     vector<NFAVertex> reporters; | 
| 100 |  | 
| 101 |     // Similar, but these report the som_loc_in. | 
| 102 |     vector<NFAVertex> reporters_in; | 
| 103 | }; | 
| 104 | } | 
| 105 |  | 
| 106 | static | 
| 107 | bool regionCanEstablishSom(const NGHolder &g, | 
| 108 |                            const unordered_map<NFAVertex, u32> ®ions, | 
| 109 |                            const u32 region, const vector<NFAVertex> &r_exits, | 
| 110 |                            const vector<DepthMinMax> &depths) { | 
| 111 |     if (region == regions.at(g.accept) || | 
| 112 |         region == regions.at(g.acceptEod)) { | 
| 113 |         DEBUG_PRINTF("accept in region\n" ); | 
| 114 |         return false; | 
| 115 |     } | 
| 116 |  | 
| 117 |     DEBUG_PRINTF("region %u\n" , region); | 
| 118 |     for (UNUSED auto v : r_exits) { | 
| 119 |         DEBUG_PRINTF("    exit %zu\n" , g[v].index); | 
| 120 |     } | 
| 121 |  | 
| 122 |     /* simple if each region exit is at fixed distance from SOM. Note SOM does | 
| 123 |        not include virtual starts */ | 
| 124 |     for (auto v : r_exits) { | 
| 125 |         assert(regions.at(v) == region); | 
| 126 |         const DepthMinMax &d = depths.at(g[v].index); | 
| 127 |         if (d.min != d.max) { | 
| 128 |             DEBUG_PRINTF("failing %zu as %s != %s\n" , g[v].index, | 
| 129 |                          d.min.str().c_str(), d.max.str().c_str()); | 
| 130 |             return false; | 
| 131 |         } | 
| 132 |     } | 
| 133 |     DEBUG_PRINTF("region %u/%zu is good\n" , regions.at(r_exits[0]), | 
| 134 |                  g[r_exits[0]].index); | 
| 135 |  | 
| 136 |     return true; | 
| 137 | } | 
| 138 |  | 
| 139 | namespace { | 
| 140 |  | 
| 141 | struct region_info { | 
| 142 |     region_info() : optional(false), dag(false) {} | 
| 143 |     vector<NFAVertex> enters; | 
| 144 |     vector<NFAVertex> exits; | 
| 145 |     vector<NFAVertex> full; | 
| 146 |     bool optional; /* skip edges around region */ | 
| 147 |     bool dag; /* completely acyclic */ | 
| 148 | }; | 
| 149 |  | 
| 150 | } | 
| 151 |  | 
| 152 | static | 
| 153 | void buildRegionMapping(const NGHolder &g, | 
| 154 |                         const unordered_map<NFAVertex, u32> ®ions, | 
| 155 |                         map<u32, region_info> &info, | 
| 156 |                         bool include_region_0 = false) { | 
| 157 |     for (auto v : vertices_range(g)) { | 
| 158 |         u32 region = regions.at(v); | 
| 159 |         if (!include_region_0 && (is_any_start(v, g) || region == 0)) { | 
| 160 |             continue; | 
| 161 |         } | 
| 162 |         assert(!region || !is_any_start(v, g)); | 
| 163 |  | 
| 164 |         if (is_any_accept(v, g)) { | 
| 165 |             continue; | 
| 166 |         } | 
| 167 |  | 
| 168 |         if (isRegionEntry(g, v, regions)) { | 
| 169 |             info[region].enters.push_back(v); | 
| 170 |         } | 
| 171 |         if (isRegionExit(g, v, regions)) { | 
| 172 |             info[region].exits.push_back(v); | 
| 173 |         } | 
| 174 |         info[region].full.push_back(v); | 
| 175 |     } | 
| 176 |  | 
| 177 |     for (auto &m : info) { | 
| 178 |         if (!m.second.enters.empty() | 
| 179 |             && isOptionalRegion(g, m.second.enters.front(), regions)) { | 
| 180 |             m.second.optional = true; | 
| 181 |         } | 
| 182 |         m.second.dag = true; /* will be cleared for cyclic regions later */ | 
| 183 |     } | 
| 184 |  | 
| 185 |     set<NFAEdge> be; | 
| 186 |     BackEdges<set<NFAEdge> > backEdgeVisitor(be); | 
| 187 |     boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start)); | 
| 188 |  | 
| 189 |     for (const auto &e : be) { | 
| 190 |         NFAVertex u = source(e, g); | 
| 191 |         NFAVertex v = target(e, g); | 
| 192 |         if (is_special(u, g) || is_special(v, g)) { | 
| 193 |             assert(is_special(u, g) && is_special(v, g)); | 
| 194 |             continue; | 
| 195 |         } | 
| 196 |         u32 r = regions.at(v); | 
| 197 |         assert(regions.at(u) == r); | 
| 198 |         info[r].dag = false; | 
| 199 |     } | 
| 200 |  | 
| 201 |     if (include_region_0) { | 
| 202 |         info[0].dag = false; | 
| 203 |     } | 
| 204 |  | 
| 205 |     #ifdef DEBUG | 
| 206 |     for (const auto &m : info) { | 
| 207 |         u32 r = m.first; | 
| 208 |         const region_info &r_i = m.second; | 
| 209 |         DEBUG_PRINTF("region %u:%s%s\n" , r, | 
| 210 |                      r_i.dag ? " (dag)"  : "" , | 
| 211 |                      r_i.optional ? " (optional)"  : "" ); | 
| 212 |         DEBUG_PRINTF("  enters:" ); | 
| 213 |         for (u32 i = 0; i < r_i.enters.size(); i++) { | 
| 214 |             printf(" %zu" , g[r_i.enters[i]].index); | 
| 215 |         } | 
| 216 |         printf("\n" ); | 
| 217 |         DEBUG_PRINTF("  exits:" ); | 
| 218 |         for (u32 i = 0; i < r_i.exits.size(); i++) { | 
| 219 |             printf(" %zu" , g[r_i.exits[i]].index); | 
| 220 |         } | 
| 221 |         printf("\n" ); | 
| 222 |         DEBUG_PRINTF("  all:" ); | 
| 223 |         for (u32 i = 0; i < r_i.full.size(); i++) { | 
| 224 |             printf(" %zu" , g[r_i.full[i]].index); | 
| 225 |         } | 
| 226 |         printf("\n" ); | 
| 227 |     } | 
| 228 |     #endif | 
| 229 | } | 
| 230 |  | 
| 231 | static | 
| 232 | bool validateXSL(const NGHolder &g, | 
| 233 |                  const unordered_map<NFAVertex, u32> ®ions, | 
| 234 |                  const u32 region, const CharReach &escapes, u32 *bad_region) { | 
| 235 |     /* need to check that the escapes escape all of the graph past region */ | 
| 236 |     u32 first_bad_region = ~0U; | 
| 237 |     for (auto v : vertices_range(g)) { | 
| 238 |         u32 v_region = regions.at(v); | 
| 239 |         if (!is_special(v, g) && v_region > region && | 
| 240 |             (escapes & g[v].char_reach).any()) { | 
| 241 |             DEBUG_PRINTF("problem with escapes for %zu\n" , g[v].index); | 
| 242 |             first_bad_region = MIN(first_bad_region, v_region); | 
| 243 |         } | 
| 244 |     } | 
| 245 |  | 
| 246 |     if (first_bad_region != ~0U) { | 
| 247 |         *bad_region = first_bad_region; | 
| 248 |         return false; | 
| 249 |     } | 
| 250 |  | 
| 251 |     return true; | 
| 252 | } | 
| 253 |  | 
| 254 | static | 
| 255 | bool validateEXSL(const NGHolder &g, | 
| 256 |                   const unordered_map<NFAVertex, u32> ®ions, | 
| 257 |                   const u32 region, const CharReach &escapes, | 
| 258 |                   const NGHolder &prefix, u32 *bad_region) { | 
| 259 |     /* EXSL: To be a valid EXSL with escapes e, we require that all states | 
| 260 |      * go dead after /[e][^e]*{subsequent prefix match}/. | 
| 261 |      */ | 
| 262 |  | 
| 263 |     /* TODO: this is overly conservative as it allow partial matches from the | 
| 264 |      * prefix to be considered even when the tail has processed some [^e] */ | 
| 265 |  | 
| 266 |     u32 first_bad_region = ~0U; | 
| 267 |     const vector<CharReach> escapes_vec(1, escapes); | 
| 268 |     const vector<CharReach> notescapes_vec(1, ~escapes); | 
| 269 |  | 
| 270 |     flat_set<NFAVertex> states; | 
| 271 |     /* turn on all states past the prefix */ | 
| 272 |     DEBUG_PRINTF("region %u is cutover\n" , region); | 
| 273 |     for (auto v : vertices_range(g)) { | 
| 274 |         if (!is_special(v, g) && regions.at(v) > region) { | 
| 275 |             states.insert(v); | 
| 276 |         } | 
| 277 |     } | 
| 278 |  | 
| 279 |     /* process the escapes */ | 
| 280 |     states = execute_graph(g, escapes_vec, states); | 
| 281 |  | 
| 282 |     /* flood with any number of not escapes */ | 
| 283 |     flat_set<NFAVertex> prev_states; | 
| 284 |     while (prev_states != states) { | 
| 285 |         prev_states = states; | 
| 286 |         states = execute_graph(g, notescapes_vec, states); | 
| 287 |         insert(&states, prev_states); | 
| 288 |     } | 
| 289 |  | 
| 290 |     /* find input starts to use for when we are running the prefix through as | 
| 291 |      * when the escape character arrives we may be in matching the prefix | 
| 292 |      * already */ | 
| 293 |     flat_set<NFAVertex> prefix_start_states; | 
| 294 |     for (auto v : vertices_range(prefix)) { | 
| 295 |         if (v != prefix.accept && v != prefix.acceptEod | 
| 296 |             /* and as we have already made it past the prefix once */ | 
| 297 |             && v != prefix.start) { | 
| 298 |             prefix_start_states.insert(v); | 
| 299 |         } | 
| 300 |     } | 
| 301 |  | 
| 302 |     prefix_start_states = | 
| 303 |         execute_graph(prefix, escapes_vec, prefix_start_states); | 
| 304 |  | 
| 305 |     assert(contains(prefix_start_states, prefix.startDs)); | 
| 306 |     /* see what happens after we feed it the prefix */ | 
| 307 |     states = execute_graph(g, prefix, prefix_start_states, states); | 
| 308 |  | 
| 309 |     for (auto v : states) { | 
| 310 |         assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be | 
| 311 |                                                     * on */ | 
| 312 |         DEBUG_PRINTF("state still active\n" ); | 
| 313 |         first_bad_region = MIN(first_bad_region, regions.at(v)); | 
| 314 |     } | 
| 315 |  | 
| 316 |     if (first_bad_region != ~0U) { | 
| 317 |         *bad_region = first_bad_region; | 
| 318 |         return false; | 
| 319 |     } | 
| 320 |  | 
| 321 |     return true; | 
| 322 | } | 
| 323 |  | 
| 324 | static | 
| 325 | bool isPossibleLock(const NGHolder &g, | 
| 326 |                     map<u32, region_info>::const_iterator region, | 
| 327 |                     const map<u32, region_info> &info, | 
| 328 |                     CharReach *escapes_out) { | 
| 329 |     /* TODO: we could also check for self-loops on curr region */ | 
| 330 |  | 
| 331 |     /* TODO: some straw-walking logic. lowish priority has we know there can | 
| 332 |      * only be optional regions between us and the cyclic */ | 
| 333 |  | 
| 334 |     assert(region != info.end()); | 
| 335 |     map<u32, region_info>::const_iterator next_region = region; | 
| 336 |     ++next_region; | 
| 337 |     if (next_region == info.end()) { | 
| 338 |         assert(0); /* odd */ | 
| 339 |         return false; | 
| 340 |     } | 
| 341 |  | 
| 342 |     const region_info &next_info = next_region->second; | 
| 343 |     if (next_info.enters.empty()) { | 
| 344 |         assert(0); /* odd */ | 
| 345 |         return false; | 
| 346 |     } | 
| 347 |  | 
| 348 |     if (next_info.full.size() == 1 && !next_info.dag) { | 
| 349 |        *escapes_out = ~g[next_info.full.front()].char_reach; | 
| 350 |        return true; | 
| 351 |     } | 
| 352 |  | 
| 353 |     return false; | 
| 354 | } | 
| 355 |  | 
| 356 | static | 
| 357 | unique_ptr<NGHolder> | 
| 358 | makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions, | 
| 359 |            const region_info &curr, const region_info &next, | 
| 360 |            bool renumber = true) { | 
| 361 |     const vector<NFAVertex> &curr_exits = curr.exits; | 
| 362 |     const vector<NFAVertex> &next_enters = next.enters; | 
| 363 |  | 
| 364 |     assert(!next_enters.empty()); | 
| 365 |     assert(!curr_exits.empty()); | 
| 366 |  | 
| 367 |     unique_ptr<NGHolder> prefix_ptr = ue2::make_unique<NGHolder>(); | 
| 368 |     NGHolder &prefix = *prefix_ptr; | 
| 369 |  | 
| 370 |     deque<NFAVertex> lhs_verts; | 
| 371 |     insert(&lhs_verts, lhs_verts.end(), vertices(g)); | 
| 372 |  | 
| 373 |     unordered_map<NFAVertex, NFAVertex> lhs_map; // g -> prefix | 
| 374 |     fillHolder(&prefix, g, lhs_verts, &lhs_map); | 
| 375 |     prefix.kind = NFA_OUTFIX; | 
| 376 |  | 
| 377 |     // We need a reverse mapping to track regions. | 
| 378 |     unordered_map<NFAVertex, NFAVertex> rev_map; // prefix -> g | 
| 379 |     for (const auto &e : lhs_map) { | 
| 380 |         rev_map.emplace(e.second, e.first); | 
| 381 |     } | 
| 382 |  | 
| 383 |     clear_in_edges(prefix.accept, prefix); | 
| 384 |     clear_in_edges(prefix.acceptEod, prefix); | 
| 385 |     add_edge(prefix.accept, prefix.acceptEod, prefix); | 
| 386 |  | 
| 387 |     assert(!next_enters.empty()); | 
| 388 |     assert(next_enters.front() != NGHolder::null_vertex()); | 
| 389 |     u32 dead_region = regions.at(next_enters.front()); | 
| 390 |     DEBUG_PRINTF("curr_region %u, dead_region %u\n" , | 
| 391 |                  regions.at(curr_exits.front()), dead_region); | 
| 392 |     for (auto v : inv_adjacent_vertices_range(next_enters.front(), g)) { | 
| 393 |         if (regions.at(v) >= dead_region) { | 
| 394 |             continue; | 
| 395 |         } | 
| 396 |         /* add edge to new accepts */ | 
| 397 |         NFAVertex p_v = lhs_map[v]; | 
| 398 |         add_edge(p_v, prefix.accept, prefix); | 
| 399 |     } | 
| 400 |  | 
| 401 |     assert(in_degree(prefix.accept, prefix) != 0); | 
| 402 |  | 
| 403 |     /* prune everything past the picked region */ | 
| 404 |     vector<NFAVertex> to_clear; | 
| 405 |     assert(contains(lhs_map, curr_exits.front())); | 
| 406 |     NFAVertex p_u = lhs_map[curr_exits.front()]; | 
| 407 |     DEBUG_PRINTF("p_u: %zu\n" , prefix[p_u].index); | 
| 408 |     for (auto p_v : adjacent_vertices_range(p_u, prefix)) { | 
| 409 |         auto v = rev_map.at(p_v); | 
| 410 |         if (p_v == prefix.accept || regions.at(v) < dead_region) { | 
| 411 |             continue; | 
| 412 |         } | 
| 413 |         to_clear.push_back(p_v); | 
| 414 |     } | 
| 415 |  | 
| 416 |     for (auto v : to_clear) { | 
| 417 |         DEBUG_PRINTF("clearing in_edges on %zu\n" , prefix[v].index); | 
| 418 |         clear_in_edges(v, prefix); | 
| 419 |     } | 
| 420 |  | 
| 421 |     pruneUseless(prefix, renumber /* sometimes we want no renumber to keep | 
| 422 |                                      depth map valid */); | 
| 423 |  | 
| 424 |     assert(num_vertices(prefix) > N_SPECIALS); | 
| 425 |     return prefix_ptr; | 
| 426 | } | 
| 427 |  | 
| 428 | static | 
| 429 | void replaceTempSomSlot(ReportManager &rm, NGHolder &g, u32 real_slot) { | 
| 430 |     const u32 temp_slot = UINT32_MAX; | 
| 431 |     /* update the som slot on the prefix report */ | 
| 432 |     for (auto v : inv_adjacent_vertices_range(g.accept, g)) { | 
| 433 |         auto &reports = g[v].reports; | 
| 434 |         assert(reports.size() == 1); | 
| 435 |         Report ir = rm.getReport(*reports.begin()); | 
| 436 |         if (ir.onmatch != temp_slot) { | 
| 437 |             continue; | 
| 438 |         } | 
| 439 |         ir.onmatch = real_slot; | 
| 440 |         ReportID rep = rm.getInternalId(ir); | 
| 441 |  | 
| 442 |         assert(reports.size() == 1); | 
| 443 |         reports.clear(); | 
| 444 |         reports.insert(rep); | 
| 445 |     } | 
| 446 | } | 
| 447 |  | 
| 448 | static | 
| 449 | void setPrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type, | 
| 450 |                       u32 som_loc, const vector<DepthMinMax> &depths, | 
| 451 |                       bool prefix_by_rev) { | 
| 452 |     Report ir = makeCallback(0U, 0); | 
| 453 |     ir.type = ir_type; | 
| 454 |     ir.onmatch = som_loc; | 
| 455 |  | 
| 456 |     /* add report for storing in som location on new accepts */ | 
| 457 |     for (auto v : inv_adjacent_vertices_range(g.accept, g)) { | 
| 458 |         if (prefix_by_rev) { | 
| 459 |             ir.somDistance = MO_INVALID_IDX; /* will be populated properly | 
| 460 |                                               * later */ | 
| 461 |         } else { | 
| 462 |             const DepthMinMax &d = depths.at(g[v].index); | 
| 463 |             assert(d.min == d.max); | 
| 464 |             ir.somDistance = d.max; | 
| 465 |         } | 
| 466 |         ReportID rep = rm.getInternalId(ir); | 
| 467 |  | 
| 468 |         auto &reports = g[v].reports; | 
| 469 |         reports.clear(); | 
| 470 |         reports.insert(rep); | 
| 471 |     } | 
| 472 | } | 
| 473 |  | 
| 474 | static | 
| 475 | void updatePrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type) { | 
| 476 |     /* update the som action on the prefix report */ | 
| 477 |     for (auto v : inv_adjacent_vertices_range(g.accept, g)) { | 
| 478 |         auto &reports = g[v].reports; | 
| 479 |         assert(reports.size() == 1); | 
| 480 |         Report ir = rm.getReport(*reports.begin()); | 
| 481 |         ir.type = ir_type; | 
| 482 |         ReportID rep = rm.getInternalId(ir); | 
| 483 |  | 
| 484 |         assert(reports.size() == 1); | 
| 485 |         reports.clear(); | 
| 486 |         reports.insert(rep); | 
| 487 |     } | 
| 488 | } | 
| 489 |  | 
| 490 | static | 
| 491 | void updatePrefixReportsRevNFA(ReportManager &rm, NGHolder &g, | 
| 492 |                                u32 rev_comp_id) { | 
| 493 |     /* update the action on the prefix report, to refer to a reverse nfa, | 
| 494 |      * report type is also adjusted. */ | 
| 495 |     for (auto v : inv_adjacent_vertices_range(g.accept, g)) { | 
| 496 |         auto &reports = g[v].reports; | 
| 497 |         assert(reports.size() == 1); | 
| 498 |         Report ir = rm.getReport(*reports.begin()); | 
| 499 |         switch (ir.type) { | 
| 500 |         case INTERNAL_SOM_LOC_SET: | 
| 501 |             ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA; | 
| 502 |             break; | 
| 503 |         case INTERNAL_SOM_LOC_SET_IF_UNSET: | 
| 504 |             ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET; | 
| 505 |             break; | 
| 506 |         case INTERNAL_SOM_LOC_SET_IF_WRITABLE: | 
| 507 |             ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE; | 
| 508 |             break; | 
| 509 |         default: | 
| 510 |             assert(0); | 
| 511 |             break; | 
| 512 |         } | 
| 513 |  | 
| 514 |         ir.revNfaIndex = rev_comp_id; | 
| 515 |         ReportID rep = rm.getInternalId(ir); | 
| 516 |  | 
| 517 |         assert(reports.size() == 1); | 
| 518 |         reports.clear(); | 
| 519 |         reports.insert(rep); | 
| 520 |     } | 
| 521 | } | 
| 522 |  | 
| 523 | static | 
| 524 | void setMidfixReports(ReportManager &rm, const som_plan &item, | 
| 525 |                       const u32 som_slot_in, const u32 som_slot_out) { | 
| 526 |     assert(item.prefix); | 
| 527 |     NGHolder &g = *item.prefix; | 
| 528 |  | 
| 529 |     Report ir = makeCallback(0U, 0); | 
| 530 |     ir.type = item.is_reset ? INTERNAL_SOM_LOC_COPY | 
| 531 |                             : INTERNAL_SOM_LOC_COPY_IF_WRITABLE; | 
| 532 |     ir.onmatch = som_slot_out; | 
| 533 |     ir.somDistance = som_slot_in; | 
| 534 |     ReportID rep = rm.getInternalId(ir); | 
| 535 |  | 
| 536 |     /* add report for storing in som location on new accepts */ | 
| 537 |     for (auto v : inv_adjacent_vertices_range(g.accept, g)) { | 
| 538 |         auto &reports = g[v].reports; | 
| 539 |         reports.clear(); | 
| 540 |         reports.insert(rep); | 
| 541 |     } | 
| 542 | } | 
| 543 |  | 
| 544 | static | 
| 545 | bool finalRegion(const NGHolder &g, | 
| 546 |                  const unordered_map<NFAVertex, u32> ®ions, | 
| 547 |                  NFAVertex v) { | 
| 548 |     u32 region = regions.at(v); | 
| 549 |     for (auto w : adjacent_vertices_range(v, g)) { | 
| 550 |         if (w != g.accept && w != g.acceptEod && regions.at(w) != region) { | 
| 551 |             return false; | 
| 552 |         } | 
| 553 |     } | 
| 554 |  | 
| 555 |     return true; | 
| 556 | } | 
| 557 |  | 
| 558 | static | 
| 559 | void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g, | 
| 560 |                                       NFAVertex v, ReportType ir_type, | 
| 561 |                                       u64a param) { | 
| 562 |     assert(!g[v].reports.empty()); | 
| 563 |  | 
| 564 |     flat_set<ReportID> r_new; | 
| 565 |  | 
| 566 |     for (const ReportID &report_id : g[v].reports) { | 
| 567 |         Report ir = rm.getReport(report_id); | 
| 568 |  | 
| 569 |         if (ir.type != EXTERNAL_CALLBACK) { | 
| 570 |             /* we must have already done whatever magic we needed to do to this | 
| 571 |              * report */ | 
| 572 |             r_new.insert(report_id); | 
| 573 |             continue; | 
| 574 |         } | 
| 575 |  | 
| 576 |         ir.type = ir_type; | 
| 577 |         ir.somDistance = param; | 
| 578 |         ReportID rep = rm.getInternalId(ir); | 
| 579 |  | 
| 580 |         DEBUG_PRINTF("vertex %zu, replacing report %u with %u (type %u)\n" , | 
| 581 |                      g[v].index, report_id, rep, ir_type); | 
| 582 |         r_new.insert(rep); | 
| 583 |     } | 
| 584 |     g[v].reports = r_new; | 
| 585 | } | 
| 586 |  | 
| 587 | /* updates the reports on all vertices leading to the sink */ | 
| 588 | static | 
| 589 | void makeSomRelReports(ReportManager &rm, NGHolder &g, NFAVertex sink, | 
| 590 |                        const vector<DepthMinMax> &depths) { | 
| 591 |     for (auto v : inv_adjacent_vertices_range(sink, g)) { | 
| 592 |         if (v == g.accept) { | 
| 593 |             continue; | 
| 594 |         } | 
| 595 |  | 
| 596 |         const DepthMinMax &d = depths.at(g[v].index); | 
| 597 |         assert(d.min == d.max); | 
| 598 |         replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL, | 
| 599 |                                          d.min); | 
| 600 |     } | 
| 601 | } | 
| 602 |  | 
| 603 | /* updates the reports on all the provided vertices */ | 
| 604 | static | 
| 605 | void makeSomRelReports(ReportManager &rm, NGHolder &g, | 
| 606 |                        const vector<NFAVertex> &to_update, | 
| 607 |                        const vector<DepthMinMax> &depths) { | 
| 608 |     for (auto v : to_update) { | 
| 609 |         const DepthMinMax &d = depths.at(g[v].index); | 
| 610 |         assert(d.min == d.max); | 
| 611 |         replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL, | 
| 612 |                                          d.min); | 
| 613 |     } | 
| 614 | } | 
| 615 |  | 
| 616 | static | 
| 617 | void makeSomAbsReports(ReportManager &rm, NGHolder &g, NFAVertex sink) { | 
| 618 |     for (auto v : inv_adjacent_vertices_range(sink, g)) { | 
| 619 |         if (v == g.accept) { | 
| 620 |             continue; | 
| 621 |         } | 
| 622 |         replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_ABS, | 
| 623 |                                          0); | 
| 624 |     } | 
| 625 | } | 
| 626 |  | 
| 627 | static | 
| 628 | void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, u32 som_loc) { | 
| 629 |     for (auto v : inv_adjacent_vertices_range(g.accept, g)) { | 
| 630 |         replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED, | 
| 631 |                                          som_loc); | 
| 632 |     } | 
| 633 |     for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { | 
| 634 |         if (v == g.accept) { | 
| 635 |             continue; | 
| 636 |         } | 
| 637 |         replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED, | 
| 638 |                                          som_loc); | 
| 639 |     } | 
| 640 | } | 
| 641 |  | 
| 642 | static | 
| 643 | void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, | 
| 644 |                                   const vector<NFAVertex> &to_update, | 
| 645 |                                   u32 som_loc) { | 
| 646 |     for (auto v : to_update) { | 
| 647 |         replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED, | 
| 648 |                                          som_loc); | 
| 649 |     } | 
| 650 | } | 
| 651 |  | 
| 652 | static | 
| 653 | bool createEscaper(NG &ng, const NGHolder &prefix, const CharReach &escapes, | 
| 654 |                    u32 som_loc) { | 
| 655 |     ReportManager &rm = ng.rm; | 
| 656 |  | 
| 657 |     /* escaper = /prefix[^escapes]*[escapes]/ */ | 
| 658 |     DEBUG_PRINTF("creating escaper for %u\n" , som_loc); | 
| 659 |     NGHolder h; | 
| 660 |     cloneHolder(h, prefix); | 
| 661 |     assert(h.kind == NFA_OUTFIX); | 
| 662 |  | 
| 663 |     NFAVertex u = add_vertex(h); | 
| 664 |     h[u].char_reach = ~escapes; | 
| 665 |  | 
| 666 |     NFAVertex v = add_vertex(h); | 
| 667 |     h[v].char_reach = escapes; | 
| 668 |  | 
| 669 |     for (auto w : inv_adjacent_vertices_range(h.accept, h)) { | 
| 670 |         add_edge(w, u, h); | 
| 671 |         add_edge(w, v, h); | 
| 672 |         h[w].reports.clear(); | 
| 673 |     } | 
| 674 |  | 
| 675 |     clear_in_edges(h.accept, h); | 
| 676 |  | 
| 677 |     add_edge(u, v, h); | 
| 678 |     add_edge(u, u, h); | 
| 679 |     add_edge(v, h.accept, h); | 
| 680 |  | 
| 681 |     Report ir = makeCallback(0U, 0); | 
| 682 |     ir.type = INTERNAL_SOM_LOC_MAKE_WRITABLE; | 
| 683 |     ir.onmatch = som_loc; | 
| 684 |     h[v].reports.insert(rm.getInternalId(ir)); | 
| 685 |     return ng.addHolder(h); | 
| 686 | } | 
| 687 |  | 
| 688 | static | 
| 689 | void fillHolderForLockCheck(NGHolder *out, const NGHolder &g, | 
| 690 |                             const map<u32, region_info> &info, | 
| 691 |                             map<u32, region_info>::const_iterator picked) { | 
| 692 |     /* NOTE: This is appropriate for firstMatchIsFirst */ | 
| 693 |     DEBUG_PRINTF("prepping for lock check\n" ); | 
| 694 |  | 
| 695 |     NGHolder &midfix = *out; | 
| 696 |  | 
| 697 |     map<NFAVertex, NFAVertex> v_map; | 
| 698 |     v_map[g.start] = midfix.start; | 
| 699 |     v_map[g.startDs] = midfix.startDs; | 
| 700 |  | 
| 701 |     /* include the lock region */ | 
| 702 |     assert(picked != info.end()); | 
| 703 |     auto graph_last = next(picked); | 
| 704 |  | 
| 705 |     assert(!graph_last->second.dag); | 
| 706 |     assert(graph_last->second.full.size() == 1); | 
| 707 |  | 
| 708 |     for (auto jt = graph_last; ; --jt) { | 
| 709 |         DEBUG_PRINTF("adding r %u to midfix\n" , jt->first); | 
| 710 |  | 
| 711 |         /* add all vertices in region, create mapping */ | 
| 712 |         for (auto v : jt->second.full) { | 
| 713 |             DEBUG_PRINTF("adding v %zu to midfix\n" , g[v].index); | 
| 714 |             if (contains(v_map, v)) { | 
| 715 |                 continue; | 
| 716 |             } | 
| 717 |  | 
| 718 |             /* treat all virtual starts as happening anywhere, so that the | 
| 719 |              * virtual start is not counted as part of the SoM */ | 
| 720 |             if (is_virtual_start(v, g)) { | 
| 721 |                 v_map[v] = midfix.startDs; | 
| 722 |                 continue; | 
| 723 |             } | 
| 724 |  | 
| 725 |             NFAVertex vnew = add_vertex(g[v], midfix); | 
| 726 |             v_map[v] = vnew; | 
| 727 |         } | 
| 728 |  | 
| 729 |         /* add edges leaving region verts based on mapping */ | 
| 730 |         for (auto v : jt->second.full) { | 
| 731 |             NFAVertex u = v_map[v]; | 
| 732 |             for (auto w : adjacent_vertices_range(v, g)) { | 
| 733 |                 if (w == g.accept || w == g.acceptEod) { | 
| 734 |                     add_edge_if_not_present(u, midfix.accept, midfix); | 
| 735 |                     continue; | 
| 736 |                 } | 
| 737 |                 if (!contains(v_map, w)) { | 
| 738 |                     add_edge_if_not_present(u, midfix.accept, midfix); | 
| 739 |                 } else { | 
| 740 |                     add_edge_if_not_present(u, v_map[w], midfix); | 
| 741 |                 } | 
| 742 |             } | 
| 743 |         } | 
| 744 |  | 
| 745 |         if (jt == info.begin()) { | 
| 746 |             break; | 
| 747 |         } | 
| 748 |     } | 
| 749 |  | 
| 750 |     /* add edges from startds to the enters of all the initial optional | 
| 751 |      * regions and the first mandatory region. */ | 
| 752 |     for (auto jt = info.begin(); ; ++jt) { | 
| 753 |         for (auto enter : jt->second.enters) { | 
| 754 |             assert(contains(v_map, enter)); | 
| 755 |             NFAVertex v = v_map[enter]; | 
| 756 |             add_edge_if_not_present(midfix.startDs, v, midfix); | 
| 757 |         } | 
| 758 |  | 
| 759 |         if (!jt->second.optional) { | 
| 760 |             break; | 
| 761 |         } | 
| 762 |  | 
| 763 |         if (jt == graph_last) { | 
| 764 |             /* all regions are optional - add a direct edge to accept */ | 
| 765 |             add_edge_if_not_present(midfix.startDs, midfix.accept, midfix); | 
| 766 |             break; | 
| 767 |         } | 
| 768 |     } | 
| 769 |  | 
| 770 |     assert(in_degree(midfix.accept, midfix)); | 
| 771 |     renumber_vertices(midfix); | 
| 772 | } | 
| 773 |  | 
| 774 | static | 
| 775 | void fillRoughMidfix(NGHolder *out, const NGHolder &g, | 
| 776 |                      const unordered_map<NFAVertex, u32> ®ions, | 
| 777 |                      const map<u32, region_info> &info, | 
| 778 |                      map<u32, region_info>::const_iterator picked) { | 
| 779 |     /* as we are not the first prefix, we are probably not acyclic. We need to | 
| 780 |      * generate an acyclic holder to acts a fake prefix to sentClearsTail. | 
| 781 |      * This will result in a more conservative estimate. */ | 
| 782 |     /* NOTE: This is not appropriate for firstMatchIsFirst */ | 
| 783 |     NGHolder &midfix = *out; | 
| 784 |     add_edge(midfix.startDs, midfix.accept, midfix); | 
| 785 |  | 
| 786 |     map<NFAVertex, NFAVertex> v_map; | 
| 787 |  | 
| 788 |     map<u32, region_info>::const_iterator jt = picked; | 
| 789 |     for (; jt->second.dag; --jt) { | 
| 790 |         DEBUG_PRINTF("adding r %u to midfix\n" , jt->first); | 
| 791 |         if (!jt->second.optional) { | 
| 792 |             clear_out_edges(midfix.startDs, midfix); | 
| 793 |             add_edge(midfix.startDs, midfix.startDs, midfix); | 
| 794 |         } | 
| 795 |  | 
| 796 |         /* add all vertices in region, create mapping */ | 
| 797 |         for (auto v : jt->second.full) { | 
| 798 |             DEBUG_PRINTF("adding v %zu to midfix\n" , g[v].index); | 
| 799 |             NFAVertex vnew = add_vertex(g[v], midfix); | 
| 800 |             v_map[v] = vnew; | 
| 801 |         } | 
| 802 |  | 
| 803 |         /* add edges leaving region verts based on mapping */ | 
| 804 |         for (auto v : jt->second.full) { | 
| 805 |             NFAVertex u = v_map[v]; | 
| 806 |             for (auto w : adjacent_vertices_range(v, g)) { | 
| 807 |                 if (w == g.accept || w == g.acceptEod) { | 
| 808 |                     continue; | 
| 809 |                 } | 
| 810 |                 if (!contains(v_map, w)) { | 
| 811 |                     add_edge_if_not_present(u, midfix.accept, midfix); | 
| 812 |                 } else { | 
| 813 |                     add_edge_if_not_present(u, v_map[w], midfix); | 
| 814 |                 } | 
| 815 |             } | 
| 816 |         } | 
| 817 |  | 
| 818 |         /* add edges from startds to enters */ | 
| 819 |         for (auto enter : jt->second.enters) { | 
| 820 |             assert(contains(v_map, enter)); | 
| 821 |             NFAVertex v = v_map[enter]; | 
| 822 |             add_edge(midfix.startDs, v, midfix); | 
| 823 |         } | 
| 824 |  | 
| 825 |         if (jt == info.begin()) { | 
| 826 |             break; | 
| 827 |         } | 
| 828 |     } | 
| 829 |  | 
| 830 |     /* we can include the exits of the regions leading in */ | 
| 831 |     if (!jt->second.dag) { | 
| 832 |         u32 first_early_region = jt->first; | 
| 833 |         clear_out_edges(midfix.startDs, midfix); | 
| 834 |         add_edge(midfix.startDs, midfix.startDs, midfix); | 
| 835 |  | 
| 836 |         do { | 
| 837 |             for (auto v : jt->second.exits) { | 
| 838 |                 DEBUG_PRINTF("adding v %zu to midfix\n" , g[v].index); | 
| 839 |                 NFAVertex vnew = add_vertex(g[v], midfix); | 
| 840 |                 v_map[v] = vnew; | 
| 841 |  | 
| 842 |                 /* add edges from startds to new vertices */ | 
| 843 |                 add_edge(midfix.startDs, vnew, midfix); | 
| 844 |             } | 
| 845 |  | 
| 846 |             /* add edges leaving region verts based on mapping */ | 
| 847 |             for (auto v : jt->second.exits) { | 
| 848 |                 NFAVertex u = v_map[v]; | 
| 849 |                 for (auto w : adjacent_vertices_range(v, g)) { | 
| 850 |                     if (w == g.accept || w == g.acceptEod | 
| 851 |                         || regions.at(w) <= first_early_region) { | 
| 852 |                         continue; | 
| 853 |                     } | 
| 854 |                     if (!contains(v_map, w)) { | 
| 855 |                         add_edge_if_not_present(u, midfix.accept, midfix); | 
| 856 |                     } else { | 
| 857 |                         add_edge_if_not_present(u, v_map[w], midfix); | 
| 858 |                     } | 
| 859 |                 } | 
| 860 |             } | 
| 861 |         } while (jt->second.optional && jt != info.begin() && (jt--)->first); | 
| 862 |  | 
| 863 |         if (jt->second.optional) { | 
| 864 |             assert(!jt->second.exits.empty()); | 
| 865 |             NFAVertex v = v_map[jt->second.exits.front()]; | 
| 866 |             for (auto w : adjacent_vertices_range(v, midfix)) { | 
| 867 |                 add_edge(midfix.startDs, w, midfix); | 
| 868 |             } | 
| 869 |         } | 
| 870 |     } | 
| 871 | } | 
| 872 |  | 
| 873 | static | 
| 874 | bool beginsWithDotStar(const NGHolder &g) { | 
| 875 |     bool hasDot = false; | 
| 876 |  | 
| 877 |     // We can ignore the successors of start, as matches that begin there will | 
| 878 |     // necessarily have a SOM of 0. | 
| 879 |  | 
| 880 |     set<NFAVertex> succ; | 
| 881 |     insert(&succ, adjacent_vertices(g.startDs, g)); | 
| 882 |     succ.erase(g.startDs); | 
| 883 |  | 
| 884 |     for (auto v : succ) { | 
| 885 |         // We want 'dot' states that aren't virtual starts. | 
| 886 |         if (g[v].char_reach.all() && | 
| 887 |                 !g[v].assert_flags) { | 
| 888 |             hasDot = true; | 
| 889 |             set<NFAVertex> dotsucc; | 
| 890 |             insert(&dotsucc, adjacent_vertices(v, g)); | 
| 891 |             if (dotsucc != succ) { | 
| 892 |                 DEBUG_PRINTF("failed dot-star succ check\n" ); | 
| 893 |                 return false; | 
| 894 |             } | 
| 895 |         } | 
| 896 |     } | 
| 897 |  | 
| 898 |     if (hasDot) { | 
| 899 |         DEBUG_PRINTF("begins with dot-star\n" ); | 
| 900 |     } | 
| 901 |     return hasDot; | 
| 902 | } | 
| 903 |  | 
| 904 | static | 
| 905 | bool buildMidfix(NG &ng, const som_plan &item, const u32 som_slot_in, | 
| 906 |                  const u32 som_slot_out) { | 
| 907 |     assert(item.prefix); | 
| 908 |     assert(hasCorrectlyNumberedVertices(*item.prefix)); | 
| 909 |  | 
| 910 |     /* setup escaper for second som_location if required */ | 
| 911 |     if (item.escapes.any()) { | 
| 912 |         if (!createEscaper(ng, *item.prefix, item.escapes, som_slot_out)) { | 
| 913 |             return false; | 
| 914 |         } | 
| 915 |     } | 
| 916 |  | 
| 917 |     /* ensure we copy som from prev loc */ | 
| 918 |     setMidfixReports(ng.rm, item, som_slot_in, som_slot_out); | 
| 919 |  | 
| 920 |     /* add second prefix/1st midfix */ | 
| 921 |     if (!ng.addHolder(*item.prefix)) { | 
| 922 |         DEBUG_PRINTF("---addHolder failed---\n" ); | 
| 923 |         return false; | 
| 924 |     } | 
| 925 |  | 
| 926 |     return true; | 
| 927 | } | 
| 928 |  | 
| 929 | static | 
| 930 | bool isMandRegionBetween(map<u32, region_info>::const_iterator a, | 
| 931 |                          map<u32, region_info>::const_iterator b) { | 
| 932 |     while (b != a) { | 
| 933 |         if (!b->second.optional) { | 
| 934 |             return true; | 
| 935 |         } | 
| 936 |         --b; | 
| 937 |     } | 
| 938 |  | 
| 939 |     return false; | 
| 940 | } | 
| 941 |  | 
| 942 | // Attempts to advance the current plan. Returns true if we advance to the end | 
| 943 | // (woot!); updates picked, plan and bad_region. | 
| 944 | static | 
| 945 | bool advancePlan(const NGHolder &g, | 
| 946 |                  const unordered_map<NFAVertex, u32> ®ions, | 
| 947 |                  const NGHolder &prefix, bool stuck, | 
| 948 |                  map<u32, region_info>::const_iterator &picked, | 
| 949 |                  const map<u32, region_info>::const_iterator furthest, | 
| 950 |                  const map<u32, region_info>::const_iterator furthest_lock, | 
| 951 |                  const CharReach &next_escapes, som_plan &plan, | 
| 952 |                  u32 *bad_region) { | 
| 953 |     u32 bad_region_r = 0; | 
| 954 |     u32 bad_region_x = 0; | 
| 955 |     u32 bad_region_e = 0; | 
| 956 |     DEBUG_PRINTF("curr %u\n" , picked->first); | 
| 957 |  | 
| 958 |     if (sentClearsTail(g, regions, prefix, furthest->first, &bad_region_r)) { | 
| 959 |         plan.is_reset = true; | 
| 960 |         picked = furthest; | 
| 961 |         DEBUG_PRINTF("Prefix clears tail, woot!\n" ); | 
| 962 |         return true; | 
| 963 |     } else { | 
| 964 |         DEBUG_PRINTF("Reset failed, first bad region %u\n" , bad_region_r); | 
| 965 |     } | 
| 966 |  | 
| 967 |     if (stuck) { | 
| 968 |         u32 to_region = furthest_lock->first; | 
| 969 |         if (validateXSL(g, regions, to_region, next_escapes, &bad_region_x)) { | 
| 970 |             DEBUG_PRINTF("XSL\n" ); | 
| 971 |             picked = furthest_lock; | 
| 972 |             plan.escapes = next_escapes; | 
| 973 |             return true; | 
| 974 |         } else { | 
| 975 |             DEBUG_PRINTF("XSL failed, first bad region %u\n" , bad_region_x); | 
| 976 |         } | 
| 977 |  | 
| 978 |         if (validateEXSL(g, regions, to_region, next_escapes, prefix, | 
| 979 |                          &bad_region_e)) { | 
| 980 |             DEBUG_PRINTF("EXSL\n" ); | 
| 981 |             picked = furthest_lock; | 
| 982 |             plan.escapes = next_escapes; | 
| 983 |             return true; | 
| 984 |         } else { | 
| 985 |             DEBUG_PRINTF("EXSL failed, first bad region %u\n" , bad_region_e); | 
| 986 |         } | 
| 987 |     } else { | 
| 988 |         DEBUG_PRINTF("!stuck, skipped XSL and EXSL\n" ); | 
| 989 |     } | 
| 990 |  | 
| 991 |     assert(!plan.is_reset); | 
| 992 |  | 
| 993 |     *bad_region = max(bad_region_x, bad_region_e); | 
| 994 |     if (bad_region_r >= *bad_region) { | 
| 995 |         *bad_region = bad_region_r; | 
| 996 |         plan.is_reset = true; | 
| 997 |         plan.escapes.clear(); | 
| 998 |         picked = furthest; | 
| 999 |     } else { | 
| 1000 |         picked = furthest_lock; | 
| 1001 |         plan.escapes = next_escapes; | 
| 1002 |     } | 
| 1003 |  | 
| 1004 |     DEBUG_PRINTF("first bad region now %u\n" , *bad_region); | 
| 1005 |     return false; | 
| 1006 | } | 
| 1007 |  | 
| 1008 | static | 
| 1009 | bool addPlan(vector<som_plan> &plan, u32 parent) { | 
| 1010 |     DEBUG_PRINTF("adding plan %zu with parent %u\n" , plan.size(), | 
| 1011 |                  parent); | 
| 1012 |  | 
| 1013 |     if (plan.size() >= MAX_SOM_PLANS) { | 
| 1014 |         DEBUG_PRINTF("too many plans!\n" ); | 
| 1015 |         return false; | 
| 1016 |     } | 
| 1017 |  | 
| 1018 |     plan.emplace_back(nullptr, CharReach(), false, parent); | 
| 1019 |     return true; | 
| 1020 | } | 
| 1021 |  | 
| 1022 | // Fetches all preds of {accept, acceptEod} for this graph. | 
| 1023 | static | 
| 1024 | void addReporterVertices(const NGHolder &g, vector<NFAVertex> &reporters) { | 
| 1025 |     set<NFAVertex> tmp; | 
| 1026 |     insert(&tmp, inv_adjacent_vertices(g.accept, g)); | 
| 1027 |     insert(&tmp, inv_adjacent_vertices(g.acceptEod, g)); | 
| 1028 |     tmp.erase(g.accept); | 
| 1029 |  | 
| 1030 | #ifdef DEBUG | 
| 1031 |     DEBUG_PRINTF("add reporters:" ); | 
| 1032 |     for (UNUSED auto v : tmp) { | 
| 1033 |         printf(" %zu" , g[v].index); | 
| 1034 |     } | 
| 1035 |     printf("\n" ); | 
| 1036 | #endif | 
| 1037 |  | 
| 1038 |     reporters.insert(reporters.end(), tmp.begin(), tmp.end()); | 
| 1039 | } | 
| 1040 |  | 
| 1041 | // Fetches all preds of {accept, acceptEod} in this region. | 
| 1042 | static | 
| 1043 | void addReporterVertices(const region_info &r, const NGHolder &g, | 
| 1044 |                          vector<NFAVertex> &reporters) { | 
| 1045 |     for (auto v : r.exits) { | 
| 1046 |         if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { | 
| 1047 |             DEBUG_PRINTF("add reporter %zu\n" , g[v].index); | 
| 1048 |             reporters.push_back(v); | 
| 1049 |         } | 
| 1050 |     } | 
| 1051 | } | 
| 1052 |  | 
| 1053 | // Fetches the mappings of all preds of {accept, acceptEod} in this region. | 
| 1054 | static | 
| 1055 | void addMappedReporterVertices(const region_info &r, const NGHolder &g, | 
| 1056 |                         const unordered_map<NFAVertex, NFAVertex> &mapping, | 
| 1057 |                         vector<NFAVertex> &reporters) { | 
| 1058 |     for (auto v : r.exits) { | 
| 1059 |         if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { | 
| 1060 |             DEBUG_PRINTF("adding v=%zu\n" , g[v].index); | 
| 1061 |             auto it = mapping.find(v); | 
| 1062 |             assert(it != mapping.end()); | 
| 1063 |             reporters.push_back(it->second); | 
| 1064 |         } | 
| 1065 |     } | 
| 1066 | } | 
| 1067 |  | 
| 1068 | // Clone a version of the graph, but only including the in-edges of `enter' | 
| 1069 | // from earlier regions. | 
| 1070 | static | 
| 1071 | void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g, | 
| 1072 |                        const unordered_map<NFAVertex, u32> ®ions, | 
| 1073 |                        NFAVertex entry, const vector<NFAVertex> &enters, | 
| 1074 |                        unordered_map<NFAVertex, NFAVertex> &orig_to_copy) { | 
| 1075 |     orig_to_copy.clear(); | 
| 1076 |     cloneHolder(out, g, &orig_to_copy); | 
| 1077 |  | 
| 1078 |     assert(contains(orig_to_copy, entry)); | 
| 1079 |     const u32 region = regions.at(entry); | 
| 1080 |  | 
| 1081 |     for (auto v : enters) { | 
| 1082 |         if (v == entry) { | 
| 1083 |             continue; | 
| 1084 |         } | 
| 1085 |         assert(contains(orig_to_copy, v)); | 
| 1086 |  | 
| 1087 |         for (auto u : inv_adjacent_vertices_range(v, g)) { | 
| 1088 |             if (regions.at(u) < region) { | 
| 1089 |                 assert(edge(orig_to_copy[u], orig_to_copy[v], out).second); | 
| 1090 |                 remove_edge(orig_to_copy[u], orig_to_copy[v], out); | 
| 1091 |             } | 
| 1092 |         } | 
| 1093 |     } | 
| 1094 |  | 
| 1095 |     pruneUseless(out); | 
| 1096 | } | 
| 1097 |  | 
| 1098 | static | 
| 1099 | void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> ®ions, | 
| 1100 |                  vector<NFAVertex> &enters) { | 
| 1101 |     assert(!enters.empty()); | 
| 1102 |     const u32 split_region = regions.at(enters.front()); | 
| 1103 |  | 
| 1104 |     vector<NFAVertex> new_enters; | 
| 1105 |  | 
| 1106 |     // Gather the list of vertices in the split region and subsequent regions. | 
| 1107 |     vector<NFAVertex> tail_vertices; | 
| 1108 |     for (auto v : vertices_range(g)) { | 
| 1109 |         if (is_special(v, g) || regions.at(v) < split_region) { | 
| 1110 |             continue; | 
| 1111 |         } | 
| 1112 |         tail_vertices.push_back(v); | 
| 1113 |     } | 
| 1114 |  | 
| 1115 |     for (auto enter : enters) { | 
| 1116 |         DEBUG_PRINTF("processing enter %zu\n" , g[enter].index); | 
| 1117 |         map<NFAVertex, NFAVertex> orig_to_copy; | 
| 1118 |  | 
| 1119 |         // Make a copy of all of the tail vertices, storing region info along | 
| 1120 |         // the way. | 
| 1121 |         for (auto v : tail_vertices) { | 
| 1122 |             auto v2 = clone_vertex(g, v); | 
| 1123 |             orig_to_copy[v] = v2; | 
| 1124 |             regions[v2] = regions.at(v); | 
| 1125 |         } | 
| 1126 |  | 
| 1127 |         // Wire up the edges: edges from previous regions come from the | 
| 1128 |         // original vertices, while edges internal to and beyond the split | 
| 1129 |         // region go to the copies. | 
| 1130 |  | 
| 1131 |         for (const auto &m : orig_to_copy) { | 
| 1132 |             NFAVertex v = m.first, v2 = m.second; | 
| 1133 |  | 
| 1134 |             for (const auto &e : out_edges_range(v, g)) { | 
| 1135 |                 NFAVertex t = target(e, g); | 
| 1136 |                 u32 t_region = regions.at(t); | 
| 1137 |                 if (t_region >= split_region && !is_special(t, g)) { | 
| 1138 |                     assert(contains(orig_to_copy, t)); | 
| 1139 |                     t = orig_to_copy[t]; | 
| 1140 |                 } | 
| 1141 |                 add_edge_if_not_present(v2, t, g[e], g); | 
| 1142 |             } | 
| 1143 |  | 
| 1144 |             for (const auto &e : in_edges_range(v, g)) { | 
| 1145 |                 NFAVertex u = source(e, g); | 
| 1146 |                 if (regions.at(u) >= split_region && !is_special(u, g)) { | 
| 1147 |                     assert(contains(orig_to_copy, u)); | 
| 1148 |                     u = orig_to_copy[u]; | 
| 1149 |                 } | 
| 1150 |                 add_edge_if_not_present(u, v2, g[e], g); | 
| 1151 |             } | 
| 1152 |  | 
| 1153 |         } | 
| 1154 |  | 
| 1155 |         // Clear the in-edges from earlier regions of the OTHER enters for this | 
| 1156 |         // copy of the split region. | 
| 1157 |         for (auto v : enters) { | 
| 1158 |             if (v == enter) { | 
| 1159 |                 continue; | 
| 1160 |             } | 
| 1161 |  | 
| 1162 |             remove_in_edge_if(orig_to_copy[v], | 
| 1163 |                               [&](const NFAEdge &e) { | 
| 1164 |                                     NFAVertex u = source(e, g); | 
| 1165 |                                     return regions.at(u) < split_region; | 
| 1166 |                               }, g); | 
| 1167 |         } | 
| 1168 |  | 
| 1169 |         new_enters.push_back(orig_to_copy[enter]); | 
| 1170 |     } | 
| 1171 |  | 
| 1172 |     // Remove the original set of tail vertices. | 
| 1173 |     remove_vertices(tail_vertices, g); | 
| 1174 |     pruneUseless(g); | 
| 1175 |     regions = assignRegions(g); | 
| 1176 |  | 
| 1177 |     enters.swap(new_enters); | 
| 1178 | } | 
| 1179 |  | 
| 1180 | static | 
| 1181 | bool doTreePlanningIntl(NGHolder &g, | 
| 1182 |             const unordered_map<NFAVertex, u32> ®ions, | 
| 1183 |             const map<u32, region_info> &info, | 
| 1184 |             map<u32, region_info>::const_iterator picked, u32 bad_region, | 
| 1185 |             u32 parent_plan, | 
| 1186 |             const unordered_map<NFAVertex, NFAVertex> ©_to_orig, | 
| 1187 |             vector<som_plan> &plan, const Grey &grey) { | 
| 1188 |     assert(picked != info.end()); | 
| 1189 |  | 
| 1190 |     DEBUG_PRINTF("picked=%u\n" , picked->first); | 
| 1191 |     DEBUG_PRINTF("parent is %u\n" , parent_plan); | 
| 1192 |  | 
| 1193 |     map<u32, region_info>::const_iterator furthest; | 
| 1194 |  | 
| 1195 |     bool to_end = false; | 
| 1196 |     while (!to_end) { | 
| 1197 |         DEBUG_PRINTF("picked is %u\n" , picked->first); | 
| 1198 |         DEBUG_PRINTF("first bad region now %u\n" , bad_region); | 
| 1199 |  | 
| 1200 |         furthest = info.find(bad_region); /* first bad */ | 
| 1201 |         if (furthest == info.end()) { | 
| 1202 |             DEBUG_PRINTF("no partition\n" ); | 
| 1203 |             return false; | 
| 1204 |         } | 
| 1205 |         --furthest; /* last region we can establish som for */ | 
| 1206 |  | 
| 1207 |         if (furthest->first <= picked->first) { | 
| 1208 |             DEBUG_PRINTF("failed to make any progress\n" ); | 
| 1209 |             return false; | 
| 1210 |         } | 
| 1211 |  | 
| 1212 |         map<u32, region_info>::const_iterator furthest_lock = furthest; | 
| 1213 |         CharReach next_escapes; | 
| 1214 |         bool lock_found; | 
| 1215 |         /* The last possible lock in the range that we examine should be the | 
| 1216 |          * best. If the previous plan is a lock, this follow as any early lock | 
| 1217 |          * must have a reach that is a subset of the last plan's lock. If the | 
| 1218 |          * last plan is a resetting plan ..., ?is this true? */ | 
| 1219 |         do { | 
| 1220 |             lock_found = isPossibleLock(g, furthest_lock, info, | 
| 1221 |                                           &next_escapes); | 
| 1222 |         } while (!lock_found && (--furthest_lock)->first > picked->first); | 
| 1223 |         DEBUG_PRINTF("lock possible? %d\n" , (int)lock_found); | 
| 1224 |  | 
| 1225 |         if (lock_found && !isMandRegionBetween(picked, furthest_lock)) { | 
| 1226 |             lock_found = false; | 
| 1227 |         } | 
| 1228 |  | 
| 1229 |         if (!isMandRegionBetween(picked, furthest)) { | 
| 1230 |             return false; | 
| 1231 |         } | 
| 1232 |  | 
| 1233 |         /* There is no certainty that the som at a reset location will always | 
| 1234 |          * go forward */ | 
| 1235 |         if (plan[parent_plan].is_reset && lock_found) { | 
| 1236 |             NGHolder midfix; | 
| 1237 |             DEBUG_PRINTF("checking if midfix is suitable for lock\n" ); | 
| 1238 |             fillHolderForLockCheck(&midfix, g, info, furthest_lock); | 
| 1239 |  | 
| 1240 |             if (!firstMatchIsFirst(midfix)) { | 
| 1241 |                 DEBUG_PRINTF("not stuck\n" ); | 
| 1242 |                 lock_found = false; | 
| 1243 |             } | 
| 1244 |         } | 
| 1245 |  | 
| 1246 |         if (!addPlan(plan, parent_plan)) { | 
| 1247 |             return false; | 
| 1248 |         } | 
| 1249 |  | 
| 1250 |         to_end = false; | 
| 1251 |  | 
| 1252 |         if (lock_found && next_escapes.none()) { | 
| 1253 |             picked = furthest_lock; | 
| 1254 |             to_end = true; | 
| 1255 |         } | 
| 1256 |  | 
| 1257 |         if (!to_end) { | 
| 1258 |             NGHolder conservative_midfix; /* for use in reset, exsl analysis */ | 
| 1259 |             fillRoughMidfix(&conservative_midfix, g, regions, info, furthest); | 
| 1260 |             dumpHolder(conservative_midfix, 15, "som_pathmidfix" , grey); | 
| 1261 |  | 
| 1262 |             u32 old_bad_region = bad_region; | 
| 1263 |             to_end = advancePlan(g, regions, conservative_midfix, lock_found, | 
| 1264 |                                  picked, furthest, furthest_lock, next_escapes, | 
| 1265 |                                  plan.back(), &bad_region); | 
| 1266 |             if (!to_end | 
| 1267 |                 && bad_region <= old_bad_region) { /* we failed to progress */ | 
| 1268 |                 DEBUG_PRINTF("failed to make any progress\n" ); | 
| 1269 |                 return false; | 
| 1270 |             } | 
| 1271 |         } | 
| 1272 |  | 
| 1273 |         /* handle direct edge to accepts from region */ | 
| 1274 |         if (edge(furthest->second.exits.front(), g.accept, g).second | 
| 1275 |                 || edge(furthest->second.exits.front(), g.acceptEod, g).second) { | 
| 1276 |             map<u32, region_info>::const_iterator it = furthest; | 
| 1277 |             do { | 
| 1278 |                 addMappedReporterVertices(it->second, g, copy_to_orig, | 
| 1279 |                                           plan.back().reporters_in); | 
| 1280 |             } while (it != info.begin() && it->second.optional && (it--)->first); | 
| 1281 |         } | 
| 1282 |  | 
| 1283 |         /* create second prefix */ | 
| 1284 |         plan.back().prefix = makePrefix(g, regions, furthest->second, | 
| 1285 |                                         next(furthest)->second); | 
| 1286 |         parent_plan = plan.size() - 1; | 
| 1287 |     } | 
| 1288 |  | 
| 1289 |     // The last region contributes reporters. If it's optional, the regions | 
| 1290 |     // before it do as well. | 
| 1291 |     map<u32, region_info>::const_reverse_iterator it = info.rbegin(); | 
| 1292 |     do { | 
| 1293 |         DEBUG_PRINTF("add mapped reporters for region %u\n" , it->first); | 
| 1294 |         addMappedReporterVertices(it->second, g, copy_to_orig, | 
| 1295 |                                   plan.back().reporters); | 
| 1296 |     } while (it->second.optional && it != info.rend() && | 
| 1297 |              (++it)->first > furthest->first); | 
| 1298 |  | 
| 1299 |     return true; | 
| 1300 | } | 
| 1301 |  | 
| 1302 | static | 
| 1303 | bool doTreePlanning(NGHolder &g, | 
| 1304 |                     map<u32, region_info>::const_iterator presplit, | 
| 1305 |                     map<u32, region_info>::const_iterator picked, | 
| 1306 |                     vector<som_plan> &plan, const Grey &grey) { | 
| 1307 |     DEBUG_PRINTF("picked is %u\n" , picked->first); | 
| 1308 |     DEBUG_PRINTF("presplit is %u\n" , presplit->first); | 
| 1309 |  | 
| 1310 |     map<u32, region_info>::const_iterator splitter = next(presplit); | 
| 1311 |     vector<NFAVertex> enters = splitter->second.enters; // mutable copy | 
| 1312 |     DEBUG_PRINTF("problem region has %zu entry vertices\n" , enters.size()); | 
| 1313 |  | 
| 1314 |     if (enters.size() <= 1) { | 
| 1315 |         // TODO: Splitting a region with one entry won't get us anywhere, but | 
| 1316 |         // it shouldn't create buggy analyses either. See UE-1892. | 
| 1317 |         DEBUG_PRINTF("nothing to split\n" ); | 
| 1318 |         return false; | 
| 1319 |     } | 
| 1320 |  | 
| 1321 |     if (plan.size() + enters.size() > MAX_SOM_PLANS) { | 
| 1322 |         DEBUG_PRINTF("splitting this tree would hit the plan limit.\n" ); | 
| 1323 |         return false; | 
| 1324 |     } | 
| 1325 |  | 
| 1326 |     assert(!plan.empty()); | 
| 1327 |     const u32 parent_plan = plan.size() - 1; | 
| 1328 |  | 
| 1329 |     // Make a copy of the graph, with the subgraph under each enter vertex | 
| 1330 |     // duplicated without the edges into the other enter vertices. | 
| 1331 |     // NOTE WELL: this will invalidate 'info' from the split point, but it's | 
| 1332 |     // OK... we don't use it after this. | 
| 1333 |     auto g_regions = assignRegions(g); | 
| 1334 |     expandGraph(g, g_regions, enters); | 
| 1335 |     dumpHolder(g, g_regions, 14, "som_expandedtree" , grey); | 
| 1336 |  | 
| 1337 |     for (auto v : enters) { | 
| 1338 |         DEBUG_PRINTF("enter %zu\n" , g[v].index); | 
| 1339 |  | 
| 1340 |         // For this entry vertex, construct a version of the graph without the | 
| 1341 |         // other entries in this region (g_path), and calculate its depths and | 
| 1342 |         // regions. | 
| 1343 |  | 
| 1344 |         NGHolder g_path; | 
| 1345 |         unordered_map<NFAVertex, NFAVertex> orig_to_copy; | 
| 1346 |         cloneGraphWithOneEntry(g_path, g, g_regions, v, enters, orig_to_copy); | 
| 1347 |         auto regions = assignRegions(g_path); | 
| 1348 |         dumpHolder(g_path, regions, 14, "som_treepath" , grey); | 
| 1349 |  | 
| 1350 |         map<u32, region_info> path_info; | 
| 1351 |         buildRegionMapping(g_path, regions, path_info); | 
| 1352 |  | 
| 1353 |         // Translate 'picked' to the corresponding region iterator over the | 
| 1354 |         // g_path graph. we can't trust the numbering, so we use a vertex | 
| 1355 |         // instead. | 
| 1356 |         NFAVertex picked_v = picked->second.enters.front(); | 
| 1357 |         assert(contains(orig_to_copy, picked_v)); | 
| 1358 |         u32 picked_region = regions.at(orig_to_copy[picked_v]); | 
| 1359 |         map<u32, region_info>::const_iterator path_pick = | 
| 1360 |             path_info.find(picked_region); | 
| 1361 |         if (path_pick == path_info.end()) { | 
| 1362 |             assert(0); // odd | 
| 1363 |             return false; | 
| 1364 |         } | 
| 1365 |  | 
| 1366 |         // Similarly, find our bad_region. | 
| 1367 |         assert(contains(orig_to_copy, v)); | 
| 1368 |         u32 bad_region = regions.at(orig_to_copy[v]); | 
| 1369 |  | 
| 1370 |         // It's possible that the region may have grown to include its | 
| 1371 |         // successors, in which case we (currently) run screaming. Just | 
| 1372 |         // checking the size should be sufficient here. | 
| 1373 |         if (picked->second.full.size() != path_pick->second.full.size()) { | 
| 1374 |             DEBUG_PRINTF("picked region has grown, bailing\n" ); | 
| 1375 |             return false; | 
| 1376 |         } | 
| 1377 |  | 
| 1378 |         // Construct reverse mapping from vertices in g_path to g. | 
| 1379 |         unordered_map<NFAVertex, NFAVertex> copy_to_orig; | 
| 1380 |         for (const auto &m : orig_to_copy) { | 
| 1381 |             copy_to_orig.insert(make_pair(m.second, m.first)); | 
| 1382 |         } | 
| 1383 |  | 
| 1384 |         bool to_end = doTreePlanningIntl(g_path, regions, path_info, path_pick, | 
| 1385 |                                          bad_region, parent_plan, | 
| 1386 |                                          copy_to_orig, plan, grey); | 
| 1387 |         if (!to_end) { | 
| 1388 |             return false; | 
| 1389 |         } | 
| 1390 |     } | 
| 1391 |  | 
| 1392 |     return true; | 
| 1393 | } | 
| 1394 |  | 
| 1395 | enum dsp_behaviour { | 
| 1396 |     ALLOW_MODIFY_HOLDER, | 
| 1397 |     DISALLOW_MODIFY_HOLDER /* say no to tree planning */ | 
| 1398 | }; | 
| 1399 |  | 
| 1400 | static | 
| 1401 | bool doSomPlanning(NGHolder &g, bool stuck_in, | 
| 1402 |                    const unordered_map<NFAVertex, u32> ®ions, | 
| 1403 |                    const map<u32, region_info> &info, | 
| 1404 |                    map<u32, region_info>::const_iterator picked, | 
| 1405 |                    vector<som_plan> &plan, | 
| 1406 |                    const Grey &grey, | 
| 1407 |                    dsp_behaviour behaviour = ALLOW_MODIFY_HOLDER) { | 
| 1408 |     DEBUG_PRINTF("in picked is %u\n" , picked->first); | 
| 1409 |  | 
| 1410 |     /* Need to verify how far the lock covers */ | 
| 1411 |     u32 bad_region; | 
| 1412 |     NGHolder *ap_pref = plan.back().prefix.get(); | 
| 1413 |     NGHolder ap_temp; | 
| 1414 |     if (hasBigCycles(*ap_pref)) { | 
| 1415 |         fillRoughMidfix(&ap_temp, g, regions, info, picked); | 
| 1416 |         ap_pref = &ap_temp; | 
| 1417 |     } | 
| 1418 |  | 
| 1419 |     bool to_end = advancePlan(g, regions, *ap_pref, stuck_in, picked, | 
| 1420 |                               picked, picked, plan.back().escapes, | 
| 1421 |                               plan.back(), &bad_region); | 
| 1422 |  | 
| 1423 |     if (to_end) { | 
| 1424 |         DEBUG_PRINTF("advanced through the whole graph in one go!\n" ); | 
| 1425 |         addReporterVertices(g, plan.back().reporters); | 
| 1426 |         return true; | 
| 1427 |     } | 
| 1428 |  | 
| 1429 |     map<u32, region_info>::const_iterator prev_furthest = picked; | 
| 1430 |     map<u32, region_info>::const_iterator furthest; | 
| 1431 |  | 
| 1432 |     furthest = info.find(bad_region); /* first bad */ | 
| 1433 |     if (furthest == info.begin() || furthest == info.end()) { | 
| 1434 |         DEBUG_PRINTF("no partition\n" ); | 
| 1435 |         return false; | 
| 1436 |     } | 
| 1437 |     --furthest; /* last region we can establish som for */ | 
| 1438 |  | 
| 1439 |     if (furthest->first <= picked->first) { | 
| 1440 |     do_tree: | 
| 1441 |         /* unable to establish SoM past the last picked region */ | 
| 1442 |         if (behaviour == DISALLOW_MODIFY_HOLDER) { | 
| 1443 |             /* tree planning mutates the graph */ | 
| 1444 |             return false; | 
| 1445 |         } | 
| 1446 |  | 
| 1447 |         DEBUG_PRINTF("failed to make any progress\n" ); | 
| 1448 |         assert(!plan.empty()); | 
| 1449 |         if (plan.size() == 1) { | 
| 1450 |             DEBUG_PRINTF("not handling initial alternations yet\n" ); | 
| 1451 |             return false; | 
| 1452 |         } | 
| 1453 |         plan.pop_back(); | 
| 1454 |         return doTreePlanning(g, furthest, prev_furthest, plan, grey); | 
| 1455 |     } | 
| 1456 |  | 
| 1457 |     furthest = picked; | 
| 1458 |     while (!to_end) { | 
| 1459 |         prev_furthest = furthest; | 
| 1460 |  | 
| 1461 |         DEBUG_PRINTF("prev further is %u\n" , prev_furthest->first); | 
| 1462 |         DEBUG_PRINTF("first bad region now %u\n" , bad_region); | 
| 1463 |  | 
| 1464 |         furthest = info.find(bad_region); /* first bad */ | 
| 1465 |         if (furthest == info.begin() || furthest == info.end()) { | 
| 1466 |             DEBUG_PRINTF("no partition\n" ); | 
| 1467 |             return false; | 
| 1468 |         } | 
| 1469 |         --furthest; /* last region we can establish som for */ | 
| 1470 |  | 
| 1471 |         map<u32, region_info>::const_iterator furthest_lock = furthest; | 
| 1472 |         CharReach next_escapes; | 
| 1473 |         bool stuck; | 
| 1474 |         do { | 
| 1475 |             stuck = isPossibleLock(g, furthest_lock, info, &next_escapes); | 
| 1476 |         } while (!stuck && (--furthest_lock)->first > prev_furthest->first); | 
| 1477 |         DEBUG_PRINTF("lock possible? %d\n" , (int)stuck); | 
| 1478 |         DEBUG_PRINTF("furthest_lock=%u\n" , furthest_lock->first); | 
| 1479 |  | 
| 1480 |         if (stuck && !isMandRegionBetween(prev_furthest, furthest_lock)) { | 
| 1481 |             stuck = false; | 
| 1482 |         } | 
| 1483 |  | 
| 1484 |         if (!isMandRegionBetween(prev_furthest, furthest)) { | 
| 1485 |             DEBUG_PRINTF("no mand region between %u and %u\n" , | 
| 1486 |                          prev_furthest->first, furthest->first); | 
| 1487 |             return false; | 
| 1488 |         } | 
| 1489 |  | 
| 1490 |         /* There is no certainty that the som at a reset location will always | 
| 1491 |          * go forward */ | 
| 1492 |         if (plan.back().is_reset && stuck) { | 
| 1493 |             NGHolder midfix; | 
| 1494 |             fillHolderForLockCheck(&midfix, g, info, furthest_lock); | 
| 1495 |  | 
| 1496 |             DEBUG_PRINTF("checking if midfix is suitable for lock\n" ); | 
| 1497 |             if (!firstMatchIsFirst(midfix)) { | 
| 1498 |                 DEBUG_PRINTF("not stuck\n" ); | 
| 1499 |                 stuck = false; | 
| 1500 |             } | 
| 1501 |         } | 
| 1502 |  | 
| 1503 |         assert(!plan.empty()); | 
| 1504 |         if (!addPlan(plan, plan.size() - 1)) { | 
| 1505 |             return false; | 
| 1506 |         } | 
| 1507 |  | 
| 1508 |         to_end = false; | 
| 1509 |  | 
| 1510 |         if (stuck && next_escapes.none()) { | 
| 1511 |             picked = furthest_lock; | 
| 1512 |             to_end = true; | 
| 1513 |         } | 
| 1514 |  | 
| 1515 |         if (!to_end) { | 
| 1516 |             NGHolder conservative_midfix; /* for use in reset, exsl analysis */ | 
| 1517 |             fillRoughMidfix(&conservative_midfix, g, regions, info, furthest); | 
| 1518 |  | 
| 1519 |             u32 old_bad_region = bad_region; | 
| 1520 |             to_end = advancePlan(g, regions, conservative_midfix, stuck, picked, | 
| 1521 |                                  furthest, furthest_lock, next_escapes, | 
| 1522 |                                  plan.back(), &bad_region); | 
| 1523 |  | 
| 1524 |             if (!to_end | 
| 1525 |                 && bad_region <= old_bad_region) { /* we failed to progress */ | 
| 1526 |                 goto do_tree; | 
| 1527 |             } | 
| 1528 |         } | 
| 1529 |  | 
| 1530 |         /* handle direct edge to accepts from region */ | 
| 1531 |         if (edge(furthest->second.exits.front(), g.accept, g).second | 
| 1532 |             || edge(furthest->second.exits.front(), g.acceptEod, g).second) { | 
| 1533 |             map<u32, region_info>::const_iterator it = furthest; | 
| 1534 |             do { | 
| 1535 |                 DEBUG_PRINTF("direct edge to accept from region %u\n" , | 
| 1536 |                              it->first); | 
| 1537 |                 addReporterVertices(it->second, g, plan.back().reporters_in); | 
| 1538 |             } while (it != info.begin() && it->second.optional | 
| 1539 |                      && (it--)->first); | 
| 1540 |         } | 
| 1541 |  | 
| 1542 |         /* create second prefix */ | 
| 1543 |         plan.back().prefix = makePrefix(g, regions, furthest->second, | 
| 1544 |                                         next(furthest)->second); | 
| 1545 |     } | 
| 1546 |     DEBUG_PRINTF("(final) picked is %u\n" , picked->first); | 
| 1547 |  | 
| 1548 |     // The last region contributes reporters. If it's optional, the regions | 
| 1549 |     // before it do as well. | 
| 1550 |     map<u32, region_info>::const_reverse_iterator it = info.rbegin(); | 
| 1551 |     do { | 
| 1552 |         DEBUG_PRINTF("region %u contributes reporters to last plan\n" , | 
| 1553 |                      it->first); | 
| 1554 |         addReporterVertices(it->second, g, plan.back().reporters); | 
| 1555 |     } while (it->second.optional && it != info.rend() && | 
| 1556 |              (++it)->first > furthest->first); | 
| 1557 |  | 
| 1558 |     DEBUG_PRINTF("done!\n" ); | 
| 1559 |     return true; | 
| 1560 | } | 
| 1561 |  | 
| 1562 | static | 
| 1563 | void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p, | 
| 1564 |                  UNUSED size_t num) { | 
| 1565 | #if defined(DEBUG) || defined(DUMP_PLANS) | 
| 1566 |     DEBUG_PRINTF("plan %zu: prefix=%p, escapes=%s, is_reset=%d, "  | 
| 1567 |                  "parent=%u\n" , | 
| 1568 |                  num, p.prefix.get(), | 
| 1569 |                  describeClass(p.escapes, 20, CC_OUT_TEXT).c_str(), | 
| 1570 |                  p.is_reset, p.parent); | 
| 1571 |     printf("  reporters:" ); | 
| 1572 |     for (auto v : p.reporters) { | 
| 1573 |         printf(" %zu" , g[v].index); | 
| 1574 |     } | 
| 1575 |     printf("\n" ); | 
| 1576 |     printf("  reporters_in:" ); | 
| 1577 |     for (auto v : p.reporters_in) { | 
| 1578 |         printf(" %zu" , g[v].index); | 
| 1579 |     } | 
| 1580 |     printf("\n" ); | 
| 1581 | #endif | 
| 1582 | } | 
| 1583 |  | 
| 1584 | /** | 
| 1585 |  * Note: if we fail to build a midfix/ng.addHolder, we throw a pattern too | 
| 1586 |  * large exception as (1) if previous ng modification have been applied (other | 
| 1587 |  * midfixes have been applied), ng will be an undefined state on return and (2) | 
| 1588 |  * if the head of a pattern cannot be implemented we are generally unable to | 
| 1589 |  * implement the full pattern. | 
| 1590 |  */ | 
| 1591 | static | 
| 1592 | void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id, | 
| 1593 |                       NGHolder &g, vector<som_plan> &plan, | 
| 1594 |                       const u32 first_som_slot) { | 
| 1595 |     ReportManager &rm = ng.rm; | 
| 1596 |     SomSlotManager &ssm = ng.ssm; | 
| 1597 |  | 
| 1598 |     DEBUG_PRINTF("%zu plans\n" , plan.size()); | 
| 1599 |     assert(plan.size() <= MAX_SOM_PLANS); | 
| 1600 |     assert(!plan.empty()); | 
| 1601 |  | 
| 1602 |     vector<u32> som_slots(plan.size()); | 
| 1603 |     som_slots[0] = first_som_slot; | 
| 1604 |  | 
| 1605 |     // Root plan, which already has a SOM slot assigned (first_som_slot). | 
| 1606 |     dumpSomPlan(g, plan.front(), 0); | 
| 1607 |     dumpSomSubComponent(*plan.front().prefix, "04_som" , expr.index, comp_id, 0, | 
| 1608 |                         ng.cc.grey); | 
| 1609 |     assert(plan.front().prefix); | 
| 1610 |     if (plan.front().escapes.any() && !plan.front().is_reset) { | 
| 1611 |         /* setup escaper for first som location */ | 
| 1612 |         if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes, | 
| 1613 |                            first_som_slot)) { | 
| 1614 |             throw CompileError(expr.index, "Pattern is too large." ); | 
| 1615 |         } | 
| 1616 |     } | 
| 1617 |  | 
| 1618 |     assert(plan.front().reporters_in.empty()); | 
| 1619 |     updateReportToUseRecordedSom(rm, g, plan.front().reporters, first_som_slot); | 
| 1620 |  | 
| 1621 |     // Tree of plans, encoded in a vector. | 
| 1622 |     vector<som_plan>::const_iterator it = plan.begin(); | 
| 1623 |     for (++it; it != plan.end(); ++it) { | 
| 1624 |         const u32 plan_num = it - plan.begin(); | 
| 1625 |         dumpSomPlan(g, *it, plan_num); | 
| 1626 |         dumpSomSubComponent(*it->prefix, "04_som" , expr.index, comp_id, | 
| 1627 |                             plan_num, ng.cc.grey); | 
| 1628 |  | 
| 1629 |         assert(it->parent < plan_num); | 
| 1630 |         u32 som_slot_in = som_slots[it->parent]; | 
| 1631 |         u32 som_slot_out = ssm.getSomSlot(*it->prefix, it->escapes, | 
| 1632 |                                           it->is_reset, som_slot_in); | 
| 1633 |         som_slots[plan_num] = som_slot_out; | 
| 1634 |  | 
| 1635 |         assert(!it->no_implement); | 
| 1636 |         if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) { | 
| 1637 |             throw CompileError(expr.index, "Pattern is too large." ); | 
| 1638 |         } | 
| 1639 |         updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in); | 
| 1640 |         updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out); | 
| 1641 |     } | 
| 1642 |  | 
| 1643 |     /* create prefix to set the som_loc */ | 
| 1644 |     if (!plan.front().no_implement) { | 
| 1645 |         renumber_vertices(*plan.front().prefix); | 
| 1646 |         assert(plan.front().prefix->kind == NFA_OUTFIX); | 
| 1647 |         if (!ng.addHolder(*plan.front().prefix)) { | 
| 1648 |             throw CompileError(expr.index, "Pattern is too large." ); | 
| 1649 |         } | 
| 1650 |     } | 
| 1651 | } | 
| 1652 |  | 
| 1653 | static | 
| 1654 | void anchorStarts(NGHolder &g) { | 
| 1655 |     vector<NFAEdge> dead; | 
| 1656 |     for (const auto &e : out_edges_range(g.startDs, g)) { | 
| 1657 |         NFAVertex v = target(e, g); | 
| 1658 |         if (v == g.startDs) { | 
| 1659 |             continue; | 
| 1660 |         } | 
| 1661 |         add_edge_if_not_present(g.start, v, g[e], g); | 
| 1662 |         dead.push_back(e); | 
| 1663 |     } | 
| 1664 |     remove_edges(dead, g); | 
| 1665 | } | 
| 1666 |  | 
| 1667 | static | 
| 1668 | void setZeroReports(NGHolder &g) { | 
| 1669 |     set<NFAVertex> acceptors; | 
| 1670 |     insert(&acceptors, inv_adjacent_vertices(g.accept, g)); | 
| 1671 |     insert(&acceptors, inv_adjacent_vertices(g.acceptEod, g)); | 
| 1672 |     acceptors.erase(g.accept); | 
| 1673 |  | 
| 1674 |     for (auto v : vertices_range(g)) { | 
| 1675 |         auto &reports = g[v].reports; | 
| 1676 |         reports.clear(); | 
| 1677 |  | 
| 1678 |         if (!contains(acceptors, v)) { | 
| 1679 |             continue; | 
| 1680 |         } | 
| 1681 |  | 
| 1682 |         // We use the report ID to store the offset adjustment used for virtual | 
| 1683 |         // starts. | 
| 1684 |  | 
| 1685 |         if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { | 
| 1686 |             reports.insert(1); | 
| 1687 |         } else { | 
| 1688 |             reports.insert(0); | 
| 1689 |         } | 
| 1690 |     } | 
| 1691 | } | 
| 1692 |  | 
| 1693 | /* updates the reports on all vertices leading to the sink */ | 
| 1694 | static | 
| 1695 | void makeSomRevNfaReports(ReportManager &rm, NGHolder &g, NFAVertex sink, | 
| 1696 |                           const ReportID report, const u32 comp_id) { | 
| 1697 |     // Construct replacement report. | 
| 1698 |     Report ir = rm.getReport(report); | 
| 1699 |     ir.type = EXTERNAL_CALLBACK_SOM_REV_NFA; | 
| 1700 |     ir.revNfaIndex = comp_id; | 
| 1701 |     ReportID new_report = rm.getInternalId(ir); | 
| 1702 |  | 
| 1703 |     for (auto v : inv_adjacent_vertices_range(sink, g)) { | 
| 1704 |         if (v == g.accept) { | 
| 1705 |             continue; | 
| 1706 |         } | 
| 1707 |  | 
| 1708 |         auto &r = g[v].reports; | 
| 1709 |         if (contains(r, report)) { | 
| 1710 |             r.erase(report); | 
| 1711 |             r.insert(new_report); | 
| 1712 |         } | 
| 1713 |     } | 
| 1714 | } | 
| 1715 |  | 
| 1716 | static | 
| 1717 | void clearProperInEdges(NGHolder &g, const NFAVertex sink) { | 
| 1718 |     vector<NFAEdge> dead; | 
| 1719 |     for (const auto &e : in_edges_range(sink, g)) { | 
| 1720 |         if (source(e, g) == g.accept) { | 
| 1721 |             continue; | 
| 1722 |         } | 
| 1723 |         dead.push_back(e); | 
| 1724 |     } | 
| 1725 |  | 
| 1726 |     if (dead.empty()) { | 
| 1727 |         return; | 
| 1728 |     } | 
| 1729 |  | 
| 1730 |     remove_edges(dead, g); | 
| 1731 |     pruneUseless(g, false); | 
| 1732 | } | 
| 1733 |  | 
| 1734 | namespace { | 
| 1735 | struct SomRevNfa { | 
| 1736 |     SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr<NFA> n) | 
| 1737 |         : sink(s), report(r), nfa(move(n)) {} | 
| 1738 |     NFAVertex sink; | 
| 1739 |     ReportID report; | 
| 1740 |     bytecode_ptr<NFA> nfa; | 
| 1741 | }; | 
| 1742 | } | 
| 1743 |  | 
| 1744 | static | 
| 1745 | bytecode_ptr<NFA> makeBareSomRevNfa(const NGHolder &g, | 
| 1746 |                                     const CompileContext &cc) { | 
| 1747 |     // Create a reversed anchored version of this NFA which fires a zero report | 
| 1748 |     // ID on accept. | 
| 1749 |     NGHolder g_rev; | 
| 1750 |     reverseHolder(g, g_rev); | 
| 1751 |     anchorStarts(g_rev); | 
| 1752 |     setZeroReports(g_rev); | 
| 1753 |  | 
| 1754 |     // Prep for actual construction. | 
| 1755 |     renumber_vertices(g_rev); | 
| 1756 |     g_rev.kind = NFA_REV_PREFIX; | 
| 1757 |     reduceGraphEquivalences(g_rev, cc); | 
| 1758 |     removeRedundancy(g_rev, SOM_NONE); | 
| 1759 |  | 
| 1760 |     DEBUG_PRINTF("building a rev NFA with %zu vertices\n" , num_vertices(g_rev)); | 
| 1761 |  | 
| 1762 |     auto nfa = constructReversedNFA(g_rev, cc); | 
| 1763 |     if (!nfa) { | 
| 1764 |         return nfa; | 
| 1765 |     } | 
| 1766 |  | 
| 1767 |     // Set some useful properties. | 
| 1768 |     depth maxWidth = findMaxWidth(g); | 
| 1769 |     if (maxWidth.is_finite()) { | 
| 1770 |         nfa->maxWidth = (u32)maxWidth; | 
| 1771 |     } else { | 
| 1772 |         nfa->maxWidth = 0; | 
| 1773 |     } | 
| 1774 |     depth minWidth = findMinWidth(g); | 
| 1775 |     nfa->minWidth = (u32)minWidth; | 
| 1776 |  | 
| 1777 |     return nfa; | 
| 1778 | } | 
| 1779 |  | 
| 1780 | static | 
| 1781 | bool makeSomRevNfa(vector<SomRevNfa> &som_nfas, const NGHolder &g, | 
| 1782 |                    const ReportID report, const NFAVertex sink, | 
| 1783 |                    const CompileContext &cc) { | 
| 1784 |     // Clone the graph with ONLY the given report vertices on the given sink. | 
| 1785 |     NGHolder g2; | 
| 1786 |     cloneHolder(g2, g); | 
| 1787 |     clearProperInEdges(g2, sink == g.accept ? g2.acceptEod : g2.accept); | 
| 1788 |     pruneAllOtherReports(g2, report); | 
| 1789 |  | 
| 1790 |     if (in_degree(g2.accept, g2) == 0 && in_degree(g2.acceptEod, g2) == 1) { | 
| 1791 |         DEBUG_PRINTF("no work to do for this sink\n" ); | 
| 1792 |         return true; | 
| 1793 |     } | 
| 1794 |  | 
| 1795 |     renumber_vertices(g2); // for findMinWidth, findMaxWidth. | 
| 1796 |  | 
| 1797 |     auto nfa = makeBareSomRevNfa(g2, cc); | 
| 1798 |     if (!nfa) { | 
| 1799 |         DEBUG_PRINTF("couldn't build rev nfa\n" ); | 
| 1800 |         return false; | 
| 1801 |     } | 
| 1802 |  | 
| 1803 |     som_nfas.emplace_back(sink, report, move(nfa)); | 
| 1804 |     return true; | 
| 1805 | } | 
| 1806 |  | 
| 1807 | static | 
| 1808 | bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) { | 
| 1809 |     ReportManager &rm = ng.rm; | 
| 1810 |  | 
| 1811 |     // FIXME might want to work on a graph without extra redundancy? | 
| 1812 |     depth maxWidth = findMaxWidth(g); | 
| 1813 |     DEBUG_PRINTF("maxWidth=%s\n" , maxWidth.str().c_str()); | 
| 1814 |  | 
| 1815 |     if (maxWidth > depth(ng.maxSomRevHistoryAvailable)) { | 
| 1816 |         DEBUG_PRINTF("too wide\n" ); | 
| 1817 |         return false; | 
| 1818 |     } | 
| 1819 |  | 
| 1820 |     set<ReportID> reports = all_reports(g); | 
| 1821 |     DEBUG_PRINTF("%zu reports\n" , reports.size()); | 
| 1822 |  | 
| 1823 |     // We distinguish between reports and accept/acceptEod sinks in order to | 
| 1824 |     // correctly handle cases which do different things on eod/normal accepts. | 
| 1825 |     // Later, it might be more elegant to do this with a single NFA and | 
| 1826 |     // multi-tops. | 
| 1827 |  | 
| 1828 |     vector<SomRevNfa> som_nfas; | 
| 1829 |  | 
| 1830 |     for (auto report : reports) { | 
| 1831 |         if (!makeSomRevNfa(som_nfas, g, report, g.accept, cc)) { | 
| 1832 |             return false; | 
| 1833 |         } | 
| 1834 |         if (!makeSomRevNfa(som_nfas, g, report, g.acceptEod, cc)) { | 
| 1835 |             return false; | 
| 1836 |         } | 
| 1837 |     } | 
| 1838 |  | 
| 1839 |     for (auto &som_nfa : som_nfas) { | 
| 1840 |         assert(som_nfa.nfa); | 
| 1841 |  | 
| 1842 |         // Transfer ownership of the NFA to the SOM slot manager. | 
| 1843 |         u32 comp_id = ng.ssm.addRevNfa(move(som_nfa.nfa), maxWidth); | 
| 1844 |  | 
| 1845 |         // Replace this report on 'g' with a SOM_REV_NFA report pointing at our | 
| 1846 |         // new component. | 
| 1847 |         makeSomRevNfaReports(rm, g, som_nfa.sink, som_nfa.report, comp_id); | 
| 1848 |     } | 
| 1849 |  | 
| 1850 |     if (ng.cc.streaming) { | 
| 1851 |         assert(ng.ssm.somHistoryRequired() <= | 
| 1852 |                max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable)); | 
| 1853 |     } | 
| 1854 |  | 
| 1855 |     return true; | 
| 1856 | } | 
| 1857 |  | 
| 1858 | static | 
| 1859 | u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g, | 
| 1860 |                       const CompileContext &cc) { | 
| 1861 |     depth maxWidth = findMaxWidth(g); | 
| 1862 |  | 
| 1863 |     assert(maxWidth <= depth(ng.maxSomRevHistoryAvailable)); | 
| 1864 |     assert(all_reports(g).size() == 1); | 
| 1865 |  | 
| 1866 |     auto nfa = makeBareSomRevNfa(g, cc); | 
| 1867 |     if (!nfa) { | 
| 1868 |         throw CompileError(expr.index, "Pattern is too large." ); | 
| 1869 |     } | 
| 1870 |  | 
| 1871 |     if (ng.cc.streaming) { | 
| 1872 |         assert(ng.ssm.somHistoryRequired() <= | 
| 1873 |                max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable)); | 
| 1874 |     } | 
| 1875 |  | 
| 1876 |     return ng.ssm.addRevNfa(move(nfa), maxWidth); | 
| 1877 | } | 
| 1878 |  | 
| 1879 | static | 
| 1880 | bool is_literable(const NGHolder &g, NFAVertex v) { | 
| 1881 |     const CharReach &cr = g[v].char_reach; | 
| 1882 |     return cr.count() == 1 || cr.isCaselessChar(); | 
| 1883 | } | 
| 1884 |  | 
| 1885 | static | 
| 1886 | void append(ue2_literal &s, const CharReach &cr) { | 
| 1887 |     assert(cr.count() == 1 || cr.isCaselessChar()); | 
| 1888 |     s.push_back(cr.find_first(), cr.isCaselessChar()); | 
| 1889 | } | 
| 1890 |  | 
| 1891 | static | 
| 1892 | map<u32, region_info>::const_iterator findLaterLiteral(const NGHolder &g, | 
| 1893 |                             const map<u32, region_info> &info, | 
| 1894 |                             map<u32, region_info>::const_iterator lower_bound, | 
| 1895 |                             ue2_literal &s_out, const Grey &grey) { | 
| 1896 | #define MIN_LITERAL_LENGTH 3 | 
| 1897 |     s_out.clear(); | 
| 1898 |     bool past_lower = false; | 
| 1899 |     ue2_literal s; | 
| 1900 |     map<u32, region_info>::const_iterator it; | 
| 1901 |     for (it = info.begin(); it != info.end(); ++it) { | 
| 1902 |         if (it == lower_bound) { | 
| 1903 |             past_lower = true; | 
| 1904 |         } | 
| 1905 |         if (!it->second.optional && it->second.dag | 
| 1906 |             && it->second.full.size() == 1 | 
| 1907 |             && is_literable(g, it->second.full.front())) { | 
| 1908 |             append(s, g[it->second.full.front()].char_reach); | 
| 1909 |  | 
| 1910 |             if (s.length() >= grey.maxHistoryAvailable && past_lower) { | 
| 1911 |                 goto exit; | 
| 1912 |             } | 
| 1913 |         } else { | 
| 1914 |             if (past_lower && it != lower_bound | 
| 1915 |                 && s.length() >= MIN_LITERAL_LENGTH) { | 
| 1916 |                 --it; | 
| 1917 |                 goto exit; | 
| 1918 |             } | 
| 1919 |             s.clear(); | 
| 1920 |         } | 
| 1921 |     } | 
| 1922 |  | 
| 1923 |     if (past_lower && it != lower_bound && s.length() >= MIN_LITERAL_LENGTH) { | 
| 1924 |         --it; | 
| 1925 |         s_out = s; | 
| 1926 |         return it; | 
| 1927 |     } | 
| 1928 |  exit: | 
| 1929 |     if (s.length() > grey.maxHistoryAvailable) { | 
| 1930 |         ue2_literal::const_iterator jt = s.end() - grey.maxHistoryAvailable; | 
| 1931 |         for (; jt != s.end(); ++jt) { | 
| 1932 |             s_out.push_back(*jt); | 
| 1933 |         } | 
| 1934 |     } else { | 
| 1935 |         s_out = s; | 
| 1936 |     } | 
| 1937 |     return it; | 
| 1938 | } | 
| 1939 |  | 
| 1940 | static | 
| 1941 | bool attemptToBuildChainAfterSombe(SomSlotManager &ssm, NGHolder &g, | 
| 1942 |                   const unordered_map<NFAVertex, u32> ®ions, | 
| 1943 |                   const map<u32, region_info> &info, | 
| 1944 |                   map<u32, region_info>::const_iterator picked, | 
| 1945 |                   const Grey &grey, | 
| 1946 |                   vector<som_plan> *plan) { | 
| 1947 |     DEBUG_PRINTF("trying to chain from %u\n" , picked->first); | 
| 1948 |     const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */ | 
| 1949 |  | 
| 1950 |     shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second, | 
| 1951 |                                              next(picked)->second); | 
| 1952 |  | 
| 1953 |     // Quick check to stop us from trying this on huge graphs, which causes us | 
| 1954 |     // to spend forever in ng_execute looking at cases that will most like | 
| 1955 |     // fail. See UE-2078. | 
| 1956 |     size_t prefix_size = num_vertices(*prefix); | 
| 1957 |     size_t total_size = num_vertices(g); | 
| 1958 |     assert(total_size >= prefix_size); | 
| 1959 |     if (total_size - prefix_size > MAX_SOMBE_CHAIN_VERTICES) { | 
| 1960 |         DEBUG_PRINTF("suffix has %zu vertices, fail\n" , | 
| 1961 |                      total_size - prefix_size); | 
| 1962 |         return false; | 
| 1963 |     } | 
| 1964 |  | 
| 1965 |     clearReports(*prefix); | 
| 1966 |     for (auto u : inv_adjacent_vertices_range(prefix->accept, *prefix)) { | 
| 1967 |         (*prefix)[u].reports.insert(0); | 
| 1968 |     } | 
| 1969 |  | 
| 1970 |     dumpHolder(*prefix, 0, "full_haiglit_prefix" , grey); | 
| 1971 |  | 
| 1972 |     CharReach escapes; | 
| 1973 |     bool stuck = isPossibleLock(g, picked, info, &escapes); | 
| 1974 |     if (stuck) { | 
| 1975 |         NGHolder gg; | 
| 1976 |         fillHolderForLockCheck(&gg, g, info, picked); | 
| 1977 |  | 
| 1978 |         stuck = firstMatchIsFirst(gg); | 
| 1979 |     } | 
| 1980 |  | 
| 1981 |     DEBUG_PRINTF("stuck = %d\n" , (int)stuck); | 
| 1982 |  | 
| 1983 |     // Note: no-one should ever pay attention to the root plan's som_loc_in. | 
| 1984 |     plan->emplace_back(prefix, escapes, false, 0); | 
| 1985 |     plan->back().no_implement = true; | 
| 1986 |  | 
| 1987 |     dumpHolder(*plan->back().prefix, 22, "som_prefix" , grey); | 
| 1988 |  | 
| 1989 |     /* don't allow tree planning to mutate the graph */ | 
| 1990 |     if (!doSomPlanning(g, stuck, regions, info, picked, *plan, grey, | 
| 1991 |                        DISALLOW_MODIFY_HOLDER)) { | 
| 1992 |         // Rollback SOM locations. | 
| 1993 |         ssm.rollbackSomTo(numSomLocsBefore); | 
| 1994 |  | 
| 1995 |         DEBUG_PRINTF("fail to chain\n" ); | 
| 1996 |         return false; | 
| 1997 |     } | 
| 1998 |  | 
| 1999 |     return true; | 
| 2000 | } | 
| 2001 |  | 
| 2002 | static | 
| 2003 | void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) { | 
| 2004 |     ReportID haig_report_id = rose.getNewNfaReport(); | 
| 2005 |     DEBUG_PRINTF("setting report id of %u\n" , haig_report_id); | 
| 2006 |  | 
| 2007 |     clearReports(h); | 
| 2008 |     for (auto u : inv_adjacent_vertices_range(h.accept, h)) { | 
| 2009 |         h[u].reports.clear(); | 
| 2010 |         h[u].reports.insert(haig_report_id); | 
| 2011 |     } | 
| 2012 | } | 
| 2013 |  | 
| 2014 | static | 
| 2015 | bool tryHaig(RoseBuild &rose, NGHolder &g, | 
| 2016 |              const unordered_map<NFAVertex, u32> ®ions, | 
| 2017 |              som_type som, u32 somPrecision, | 
| 2018 |              map<u32, region_info>::const_iterator picked, | 
| 2019 |              shared_ptr<raw_som_dfa> *haig, shared_ptr<NGHolder> *haig_prefix, | 
| 2020 |              const Grey &grey) { | 
| 2021 |     DEBUG_PRINTF("trying to build a haig\n" ); | 
| 2022 |     shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second, | 
| 2023 |                                              next(picked)->second); | 
| 2024 |     prefix->kind = NFA_PREFIX; | 
| 2025 |     setReportOnHaigPrefix(rose, *prefix); | 
| 2026 |     dumpHolder(*prefix, 0, "haig_prefix" , grey); | 
| 2027 |     vector<vector<CharReach> > triggers; /* empty for prefix */ | 
| 2028 |     *haig = attemptToBuildHaig(*prefix, som, somPrecision, triggers, grey); | 
| 2029 |     if (!*haig) { | 
| 2030 |         DEBUG_PRINTF("failed to haig\n" ); | 
| 2031 |         return false; | 
| 2032 |     } | 
| 2033 |     *haig_prefix = prefix; | 
| 2034 |     return true; | 
| 2035 | } | 
| 2036 |  | 
| 2037 | static | 
| 2038 | void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix, | 
| 2039 |                         const shared_ptr<raw_som_dfa> &haig, | 
| 2040 |                         const ue2_literal &lit, const set<ReportID> &reports) { | 
| 2041 |     assert(prefix && haig); | 
| 2042 |  | 
| 2043 |     DEBUG_PRINTF("trying to build a sombe from %s\n" , dumpString(lit).c_str()); | 
| 2044 |  | 
| 2045 |     RoseInGraph ig; | 
| 2046 |     RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig); | 
| 2047 |     RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); | 
| 2048 |  | 
| 2049 |     add_edge(s, v, RoseInEdgeProps(prefix, haig, lit.length()), ig); | 
| 2050 |  | 
| 2051 |     assert(!reports.empty()); | 
| 2052 |     RoseInVertex a = add_vertex(RoseInVertexProps::makeAccept(reports), ig); | 
| 2053 |     add_edge(v, a, RoseInEdgeProps(0U, 0U), ig); | 
| 2054 |  | 
| 2055 |     calcVertexOffsets(ig); | 
| 2056 |  | 
| 2057 |     UNUSED bool rv = tb.addSombeRose(ig); | 
| 2058 |     assert(rv); // TODO: recover from addRose failure | 
| 2059 | } | 
| 2060 |  | 
| 2061 | static | 
| 2062 | sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, | 
| 2063 |                       u32 comp_id, som_type som, | 
| 2064 |                       const unordered_map<NFAVertex, u32> ®ions, | 
| 2065 |                       const map<u32, region_info> &info, | 
| 2066 |                       map<u32, region_info>::const_iterator lower_bound) { | 
| 2067 |     DEBUG_PRINTF("entry\n" ); | 
| 2068 |     assert(g.kind == NFA_OUTFIX); | 
| 2069 |     const CompileContext &cc = ng.cc; | 
| 2070 |     ReportManager &rm = ng.rm; | 
| 2071 |     SomSlotManager &ssm = ng.ssm; | 
| 2072 |  | 
| 2073 |     if (!cc.grey.allowHaigLit) { | 
| 2074 |         return SOMBE_FAIL; | 
| 2075 |     } | 
| 2076 |  | 
| 2077 |     const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */ | 
| 2078 |     u32 som_loc = ssm.getPrivateSomSlot(); | 
| 2079 |  | 
| 2080 |     if (!checkViolet(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) { | 
| 2081 |         // This is an optimisation: if we can't build a Haig from a portion of | 
| 2082 |         // the graph, then we won't be able to manage it as an outfix either | 
| 2083 |         // when we fall back. | 
| 2084 |         throw CompileError(expr.index, "Pattern is too large." ); | 
| 2085 |     } | 
| 2086 |  | 
| 2087 |     while (1) { | 
| 2088 |         DEBUG_PRINTF("lower bound is %u\n" , lower_bound->first); | 
| 2089 |         ue2_literal s; | 
| 2090 |         map<u32, region_info>::const_iterator lit | 
| 2091 |             = findLaterLiteral(g, info, lower_bound, s, cc.grey); | 
| 2092 |         if (lit == info.end()) { | 
| 2093 |             DEBUG_PRINTF("failed to find literal\n" ); | 
| 2094 |             ssm.rollbackSomTo(numSomLocsBefore); | 
| 2095 |             return SOMBE_FAIL; | 
| 2096 |         } | 
| 2097 |         DEBUG_PRINTF("test literal: %s [r=%u]\n" , dumpString(s).c_str(), | 
| 2098 |                      lit->first); | 
| 2099 |  | 
| 2100 |         if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) { | 
| 2101 |             DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n" ); | 
| 2102 |             lower_bound = lit; | 
| 2103 |             ++lower_bound; | 
| 2104 |             continue; | 
| 2105 |         } | 
| 2106 |  | 
| 2107 |         shared_ptr<raw_som_dfa> haig; | 
| 2108 |         shared_ptr<NGHolder> haig_prefix; | 
| 2109 |         map<u32, region_info>::const_iterator haig_reg = lit; | 
| 2110 |  | 
| 2111 |         if (edge(lit->second.exits.front(), g.acceptEod, g).second) { | 
| 2112 |             /* TODO: handle */ | 
| 2113 |             ssm.rollbackSomTo(numSomLocsBefore); | 
| 2114 |             return SOMBE_FAIL; | 
| 2115 |         } | 
| 2116 |  | 
| 2117 |         advance(haig_reg, -(s32)s.length()); | 
| 2118 |  | 
| 2119 |         if (!haig_reg->first && haig_reg->second.full.size() == 2) { | 
| 2120 |             /* just starts */ | 
| 2121 |  | 
| 2122 |             /* TODO: make below assertion true, reset checks could be stronger | 
| 2123 |              * (12356) | 
| 2124 |              */ | 
| 2125 |             /* assert(!attemptToBuildChainAfterSombe(ng, g, info, lit, cc.grey, | 
| 2126 |                &plan)); */ | 
| 2127 |  | 
| 2128 |             lower_bound = lit; | 
| 2129 |             ++lower_bound; | 
| 2130 |             continue; /* somebody else should have been able to chain */ | 
| 2131 |         } | 
| 2132 |  | 
| 2133 |         bool ok = true; | 
| 2134 |         set<ReportID> rep; | 
| 2135 |         if (next(lit) != info.end()) { | 
| 2136 |             /* non terminal literal */ | 
| 2137 |  | 
| 2138 |             /* TODO: handle edges to accept ? */ | 
| 2139 |             vector<som_plan> plan; | 
| 2140 |             if (edge(lit->second.exits.front(), g.accept, g).second) { | 
| 2141 |                 insert(&rep, g[lit->second.exits.front()].reports); | 
| 2142 |                 remove_edge(lit->second.exits.front(), g.accept, g); | 
| 2143 |                 g[lit->second.exits.front()].reports.clear(); | 
| 2144 |  | 
| 2145 |                 /* Note: we can mess with the graph as this is the last literal | 
| 2146 |                  * we will find and on failure the graph will be thrown away */ | 
| 2147 |             } | 
| 2148 |  | 
| 2149 |             ok = attemptToBuildChainAfterSombe(ssm, g, regions, info, lit, | 
| 2150 |                                                cc.grey, &plan); | 
| 2151 |             ok = ok && tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), | 
| 2152 |                                haig_reg, &haig, &haig_prefix, cc.grey); | 
| 2153 |  | 
| 2154 |             if (!ok) { | 
| 2155 |                 DEBUG_PRINTF(":( going to next attempt\n" ); | 
| 2156 |                 goto next_try; | 
| 2157 |             } | 
| 2158 |  | 
| 2159 |             implementSomPlan(ng, expr, comp_id, g, plan, som_loc); | 
| 2160 |  | 
| 2161 |             Report ir = makeCallback(0U, 0); | 
| 2162 |             assert(!plan.empty()); | 
| 2163 |             if (plan.front().is_reset) { | 
| 2164 |                 ir.type = INTERNAL_SOM_LOC_SET_FROM; | 
| 2165 |             } else { | 
| 2166 |                 ir.type = INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE; | 
| 2167 |             } | 
| 2168 |             ir.onmatch = som_loc; | 
| 2169 |             rep.insert(rm.getInternalId(ir)); | 
| 2170 |         } else { | 
| 2171 |             /* terminal literal */ | 
| 2172 |             ok = tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), haig_reg, | 
| 2173 |                          &haig, &haig_prefix, cc.grey); | 
| 2174 |  | 
| 2175 |             /* find report */ | 
| 2176 |             insert(&rep, g[lit->second.exits.front()].reports); | 
| 2177 |  | 
| 2178 |             /* TODO: som_loc is unused */ | 
| 2179 |         } | 
| 2180 |  | 
| 2181 |         if (ok) { | 
| 2182 |             roseAddHaigLiteral(*ng.rose, haig_prefix, haig, s, rep); | 
| 2183 |             if (next(lit) != info.end()) { | 
| 2184 |                 return SOMBE_HANDLED_INTERNAL; | 
| 2185 |             } else { | 
| 2186 |                 ssm.rollbackSomTo(numSomLocsBefore); | 
| 2187 |                 return SOMBE_HANDLED_ALL; | 
| 2188 |             } | 
| 2189 |         } | 
| 2190 | next_try: | 
| 2191 |         lower_bound = lit; | 
| 2192 |         ++lower_bound; | 
| 2193 |     } | 
| 2194 |     assert(0); | 
| 2195 |     return SOMBE_FAIL; | 
| 2196 | } | 
| 2197 |  | 
| 2198 | static | 
| 2199 | bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits, | 
| 2200 |                      set<NFAVertex> *terminals) { | 
| 2201 |     /* TODO: smarter (topo) */ | 
| 2202 | #define MAX_LEADING_LITERALS 20 | 
| 2203 |     set<NFAVertex> s_succ; | 
| 2204 |     insert(&s_succ, adjacent_vertices(g.start, g)); | 
| 2205 |  | 
| 2206 |     set<NFAVertex> sds_succ; | 
| 2207 |     insert(&sds_succ, adjacent_vertices(g.startDs, g)); | 
| 2208 |  | 
| 2209 |     if (!is_subset_of(s_succ, sds_succ)) { | 
| 2210 |         DEBUG_PRINTF("not floating\n" ); | 
| 2211 |         return false; | 
| 2212 |     } | 
| 2213 |  | 
| 2214 |     sds_succ.erase(g.startDs); | 
| 2215 |  | 
| 2216 |     map<NFAVertex, vector<ue2_literal> > curr; | 
| 2217 |     curr[g.startDs].push_back(ue2_literal()); | 
| 2218 |  | 
| 2219 |     map<NFAVertex, set<NFAVertex> > seen; | 
| 2220 |     map<NFAVertex, vector<ue2_literal> > next; | 
| 2221 |  | 
| 2222 |     bool did_expansion = true; | 
| 2223 |     while (did_expansion) { | 
| 2224 |         did_expansion = false; | 
| 2225 |         u32 count = 0; | 
| 2226 |         assert(!curr.empty()); | 
| 2227 |         for (const auto &m : curr) { | 
| 2228 |             const NFAVertex u = m.first; | 
| 2229 |             const vector<ue2_literal> &base = m.second; | 
| 2230 |             DEBUG_PRINTF("expanding from %zu\n" , g[u].index); | 
| 2231 |             for (auto v : adjacent_vertices_range(u, g)) { | 
| 2232 |                 if (v == g.startDs) { | 
| 2233 |                     continue; | 
| 2234 |                 } | 
| 2235 |                 if (contains(seen[u], v)) { | 
| 2236 |                     DEBUG_PRINTF("loop\n" ); | 
| 2237 |                     goto skip_to_next_terminal; | 
| 2238 |                 } | 
| 2239 |                 if (is_any_accept(v, g) || is_match_vertex(v, g)) { | 
| 2240 |                     DEBUG_PRINTF("match\n" ); | 
| 2241 |                     goto skip_to_next_terminal; | 
| 2242 |                 } | 
| 2243 |                 if (g[v].char_reach.count() > 2 * MAX_LEADING_LITERALS) { | 
| 2244 |                     DEBUG_PRINTF("wide\n" ); | 
| 2245 |                     goto skip_to_next_terminal; | 
| 2246 |                 } | 
| 2247 |             } | 
| 2248 |  | 
| 2249 |             for (auto v : adjacent_vertices_range(u, g)) { | 
| 2250 |                 assert(!contains(seen[u], v)); | 
| 2251 |                 if (v == g.startDs) { | 
| 2252 |                     continue; | 
| 2253 |                 } | 
| 2254 |                 insert(&seen[v], seen[u]); | 
| 2255 |                 seen[v].insert(v); | 
| 2256 |                 CharReach cr = g[v].char_reach; | 
| 2257 |                 vector<ue2_literal> &out = next[v]; | 
| 2258 |  | 
| 2259 |                 DEBUG_PRINTF("expanding to %zu (|| = %zu)\n" , g[v].index, | 
| 2260 |                              cr.count()); | 
| 2261 |                 for (size_t c = cr.find_first(); c != CharReach::npos; | 
| 2262 |                      c = cr.find_next(c)) { | 
| 2263 |                     bool nocase = ourisalpha(c) && cr.test(mytoupper(c)) | 
| 2264 |                         && cr.test(mytolower(c)); | 
| 2265 |  | 
| 2266 |                     if (nocase && (char)c == mytolower(c)) { | 
| 2267 |                         continue; /* uppercase already handled us */ | 
| 2268 |                     } | 
| 2269 |  | 
| 2270 |                     for (const auto &lit : base) { | 
| 2271 |                         if (count >= MAX_LEADING_LITERALS) { | 
| 2272 |                             DEBUG_PRINTF("count %u\n" , count); | 
| 2273 |                             goto exit; | 
| 2274 |                         } | 
| 2275 |                         did_expansion = true; | 
| 2276 |                         out.push_back(lit); | 
| 2277 |                         out.back().push_back(c, nocase); | 
| 2278 |                         count++; | 
| 2279 |                         if (out.back().length() > MAX_MASK2_WIDTH | 
| 2280 |                             && mixed_sensitivity(out.back())) { | 
| 2281 |                             goto exit; | 
| 2282 |                         } | 
| 2283 |  | 
| 2284 |                     } | 
| 2285 |                 } | 
| 2286 |             } | 
| 2287 |             if (0) { | 
| 2288 |             skip_to_next_terminal: | 
| 2289 |                 insert(&next[u], next[u].end(), base); | 
| 2290 |                 count += base.size(); | 
| 2291 |                 if (count > MAX_LEADING_LITERALS) { | 
| 2292 |                     DEBUG_PRINTF("count %u\n" , count); | 
| 2293 |                     goto exit; | 
| 2294 |                 } | 
| 2295 |             } | 
| 2296 |         } | 
| 2297 |  | 
| 2298 |         curr.swap(next); | 
| 2299 |         next.clear(); | 
| 2300 |     }; | 
| 2301 |  exit:; | 
| 2302 |     for (const auto &m : curr) { | 
| 2303 |         NFAVertex t = m.first; | 
| 2304 |         if (t == g.startDs) { | 
| 2305 |             assert(curr.size() == 1); | 
| 2306 |             return false; | 
| 2307 |         } | 
| 2308 |         assert(!is_special(t, g)); | 
| 2309 |         terminals->insert(t); | 
| 2310 |         insert(lits, m.second); | 
| 2311 |     } | 
| 2312 |     assert(lits->size() <= MAX_LEADING_LITERALS); | 
| 2313 |     return !lits->empty(); | 
| 2314 | } | 
| 2315 |  | 
| 2316 | static | 
| 2317 | bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out, | 
| 2318 |                              NGHolder *rhs) { | 
| 2319 |     DEBUG_PRINTF("looking for a leading literals\n" ); | 
| 2320 |  | 
| 2321 |     set<NFAVertex> terms; | 
| 2322 |     if (!leadingLiterals(g, lit_out, &terms)) { | 
| 2323 |         return false; | 
| 2324 |     } | 
| 2325 |  | 
| 2326 |     for (UNUSED const auto &lit : *lit_out) { | 
| 2327 |         DEBUG_PRINTF("literal is '%s' (len %zu)\n" , dumpString(lit).c_str(), | 
| 2328 |                      lit.length()); | 
| 2329 |     } | 
| 2330 |  | 
| 2331 |     /* need to validate that it is a clean split */ | 
| 2332 |     assert(!terms.empty()); | 
| 2333 |     set<NFAVertex> adj_term1; | 
| 2334 |     insert(&adj_term1, adjacent_vertices(*terms.begin(), g)); | 
| 2335 |     for (auto v : terms) { | 
| 2336 |         DEBUG_PRINTF("term %zu\n" , g[v].index); | 
| 2337 |         set<NFAVertex> temp; | 
| 2338 |         insert(&temp, adjacent_vertices(v, g)); | 
| 2339 |         if (temp != adj_term1) { | 
| 2340 |             DEBUG_PRINTF("bad split\n" ); | 
| 2341 |             return false; | 
| 2342 |         } | 
| 2343 |     } | 
| 2344 |  | 
| 2345 |     unordered_map<NFAVertex, NFAVertex> rhs_map; | 
| 2346 |     vector<NFAVertex> pivots; | 
| 2347 |     insert(&pivots, pivots.end(), adj_term1); | 
| 2348 |     splitRHS(g, pivots, rhs, &rhs_map); | 
| 2349 |  | 
| 2350 |     assert(is_triggered(*rhs)); | 
| 2351 |     return true; | 
| 2352 | } | 
| 2353 |  | 
| 2354 | static | 
| 2355 | void findBestLiteral(const NGHolder &g, | 
| 2356 |                      const unordered_map<NFAVertex, u32> ®ions, | 
| 2357 |                      ue2_literal *lit_out, NFAVertex *v, | 
| 2358 |                      const CompileContext &cc) { | 
| 2359 |     map<u32, region_info> info; | 
| 2360 |     buildRegionMapping(g, regions, info, false); | 
| 2361 |  | 
| 2362 |     ue2_literal best; | 
| 2363 |     NFAVertex best_v = NGHolder::null_vertex(); | 
| 2364 |  | 
| 2365 |     map<u32, region_info>::const_iterator lit = info.begin(); | 
| 2366 |     while (1) { | 
| 2367 |         ue2_literal s; | 
| 2368 |         lit = findLaterLiteral(g, info, lit, s, cc.grey); | 
| 2369 |         if (lit == info.end()) { | 
| 2370 |             break; | 
| 2371 |         } | 
| 2372 |         DEBUG_PRINTF("test literal: %s [r=%u]\n" , dumpString(s).c_str(), | 
| 2373 |                      lit->first); | 
| 2374 |  | 
| 2375 |         if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) { | 
| 2376 |             DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n" ); | 
| 2377 |             ++lit; | 
| 2378 |             continue; | 
| 2379 |         } | 
| 2380 |  | 
| 2381 |         if (s.length() > best.length()) { | 
| 2382 |             best = s; | 
| 2383 |             assert(!lit->second.exits.empty()); | 
| 2384 |             best_v = lit->second.exits[0]; | 
| 2385 |         } | 
| 2386 |  | 
| 2387 |         ++lit; | 
| 2388 |     } | 
| 2389 |  | 
| 2390 |     lit_out->swap(best); | 
| 2391 |     *v = best_v; | 
| 2392 | } | 
| 2393 |  | 
| 2394 | static | 
| 2395 | bool splitOffBestLiteral(const NGHolder &g, | 
| 2396 |                          const unordered_map<NFAVertex, u32> ®ions, | 
| 2397 |                          ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs, | 
| 2398 |                          const CompileContext &cc) { | 
| 2399 |     NFAVertex v = NGHolder::null_vertex(); | 
| 2400 |  | 
| 2401 |     findBestLiteral(g, regions, lit_out, &v, cc); | 
| 2402 |     if (lit_out->empty()) { | 
| 2403 |         return false; | 
| 2404 |     } | 
| 2405 |  | 
| 2406 |     DEBUG_PRINTF("literal is '%s'\n" , dumpString(*lit_out).c_str()); | 
| 2407 |  | 
| 2408 |     unordered_map<NFAVertex, NFAVertex> lhs_map; | 
| 2409 |     unordered_map<NFAVertex, NFAVertex> rhs_map; | 
| 2410 |  | 
| 2411 |     splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map); | 
| 2412 |  | 
| 2413 |     DEBUG_PRINTF("v = %zu\n" , g[v].index); | 
| 2414 |  | 
| 2415 |     return true; | 
| 2416 | } | 
| 2417 |  | 
| 2418 | /** | 
| 2419 |  * Replace the given graph's EXTERNAL_CALLBACK reports with | 
| 2420 |  * EXTERNAL_CALLBACK_SOM_PASS reports. | 
| 2421 |  */ | 
| 2422 | void makeReportsSomPass(ReportManager &rm, NGHolder &g) { | 
| 2423 |     for (const auto &v : vertices_range(g)) { | 
| 2424 |         const auto &reports = g[v].reports; | 
| 2425 |         if (reports.empty()) { | 
| 2426 |             continue; | 
| 2427 |         } | 
| 2428 |  | 
| 2429 |         flat_set<ReportID> new_reports; | 
| 2430 |         for (const ReportID &id : reports) { | 
| 2431 |             const Report &report = rm.getReport(id); | 
| 2432 |             if (report.type != EXTERNAL_CALLBACK) { | 
| 2433 |                 new_reports.insert(id); | 
| 2434 |                 continue; | 
| 2435 |             } | 
| 2436 |             Report report2 = report; | 
| 2437 |             report2.type = EXTERNAL_CALLBACK_SOM_PASS; | 
| 2438 |             new_reports.insert(rm.getInternalId(report2)); | 
| 2439 |         } | 
| 2440 |  | 
| 2441 |         g[v].reports = new_reports; | 
| 2442 |     } | 
| 2443 | } | 
| 2444 |  | 
| 2445 | static | 
| 2446 | bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) { | 
| 2447 |     ue2_literal lit; | 
| 2448 |     shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); | 
| 2449 |     if (!ng.cc.grey.allowLitHaig) { | 
| 2450 |         return false; | 
| 2451 |     } | 
| 2452 |  | 
| 2453 |     dumpHolder(g, 90, "lithaig_full" , ng.cc.grey); | 
| 2454 |  | 
| 2455 |     if (!splitOffLeadingLiteral(g, &lit, &*rhs)) { | 
| 2456 |         DEBUG_PRINTF("no literal\n" ); | 
| 2457 |         return false; | 
| 2458 |     } | 
| 2459 |  | 
| 2460 |     if (lit.length() < ng.cc.grey.minRoseLiteralLength) { | 
| 2461 |         DEBUG_PRINTF("lit too short\n" ); | 
| 2462 |         return false; | 
| 2463 |     } | 
| 2464 |  | 
| 2465 |     assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); | 
| 2466 |  | 
| 2467 |     makeReportsSomPass(ng.rm, *rhs); | 
| 2468 |  | 
| 2469 |     dumpHolder(*rhs, 91, "lithaig_rhs" , ng.cc.grey); | 
| 2470 |  | 
| 2471 |     vector<vector<CharReach> > triggers; | 
| 2472 |     triggers.push_back(as_cr_seq(lit)); | 
| 2473 |  | 
| 2474 |     assert(rhs->kind == NFA_SUFFIX); | 
| 2475 |     shared_ptr<raw_som_dfa> haig | 
| 2476 |         = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers, | 
| 2477 |                              ng.cc.grey, false /* lit implies adv som */); | 
| 2478 |     if (!haig) { | 
| 2479 |         DEBUG_PRINTF("failed to haig\n" ); | 
| 2480 |         return false; | 
| 2481 |     } | 
| 2482 |     DEBUG_PRINTF("haig %p\n" , haig.get()); | 
| 2483 |  | 
| 2484 |     RoseInGraph ig; | 
| 2485 |     RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig); | 
| 2486 |     RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); | 
| 2487 |     add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig); | 
| 2488 |  | 
| 2489 |     RoseInVertex a | 
| 2490 |         = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig); | 
| 2491 |     add_edge(v, a, RoseInEdgeProps(haig), ig); | 
| 2492 |  | 
| 2493 |     calcVertexOffsets(ig); | 
| 2494 |  | 
| 2495 |     return ng.rose->addSombeRose(ig); | 
| 2496 | } | 
| 2497 |  | 
| 2498 | static | 
| 2499 | bool doHaigLitHaigSom(NG &ng, NGHolder &g, | 
| 2500 |                       const unordered_map<NFAVertex, u32> ®ions, | 
| 2501 |                       som_type som) { | 
| 2502 |     if (!ng.cc.grey.allowLitHaig) { | 
| 2503 |         return false; | 
| 2504 |     } | 
| 2505 |  | 
| 2506 |     // In streaming mode, we can only delay up to our max available history. | 
| 2507 |     const u32 max_delay = | 
| 2508 |         ng.cc.streaming ? ng.cc.grey.maxHistoryAvailable : MO_INVALID_IDX; | 
| 2509 |  | 
| 2510 |     ue2_literal lit; | 
| 2511 |     shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); | 
| 2512 |     shared_ptr<NGHolder> lhs = make_shared<NGHolder>(); | 
| 2513 |     if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) { | 
| 2514 |         return false; | 
| 2515 |     } | 
| 2516 |  | 
| 2517 |     DEBUG_PRINTF("split off best lit '%s' (len=%zu)\n" , dumpString(lit).c_str(), | 
| 2518 |                  lit.length()); | 
| 2519 |  | 
| 2520 |     if (lit.length() < ng.cc.grey.minRoseLiteralLength) { | 
| 2521 |         DEBUG_PRINTF("lit too short\n" ); | 
| 2522 |         return false; | 
| 2523 |     } | 
| 2524 |  | 
| 2525 |     assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); | 
| 2526 |  | 
| 2527 |     if (edge(rhs->start, rhs->acceptEod, *rhs).second) { | 
| 2528 |         return false; /* TODO: handle */ | 
| 2529 |     } | 
| 2530 |  | 
| 2531 |     makeReportsSomPass(ng.rm, *rhs); | 
| 2532 |  | 
| 2533 |     dumpHolder(*lhs, 92, "haiglithaig_lhs" , ng.cc.grey); | 
| 2534 |     dumpHolder(*rhs, 93, "haiglithaig_rhs" , ng.cc.grey); | 
| 2535 |  | 
| 2536 |     u32 delay = removeTrailingLiteralStates(*lhs, lit, max_delay); | 
| 2537 |  | 
| 2538 |     RoseInGraph ig; | 
| 2539 |     RoseInVertex s | 
| 2540 |         = add_vertex(RoseInVertexProps::makeStart(false), ig); | 
| 2541 |     RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); | 
| 2542 |  | 
| 2543 |     bool lhs_all_vac = true; | 
| 2544 |     NGHolder::adjacency_iterator ai, ae; | 
| 2545 |     for (tie(ai, ae) = adjacent_vertices(lhs->startDs, *lhs); | 
| 2546 |          ai != ae && lhs_all_vac; ++ai) { | 
| 2547 |         if (!is_special(*ai, *lhs)) { | 
| 2548 |             lhs_all_vac = false; | 
| 2549 |         } | 
| 2550 |     } | 
| 2551 |     for (tie(ai, ae) = adjacent_vertices(lhs->start, *lhs); | 
| 2552 |          ai != ae && lhs_all_vac; ++ai) { | 
| 2553 |         if (!is_special(*ai, *lhs)) { | 
| 2554 |             lhs_all_vac = false; | 
| 2555 |         } | 
| 2556 |     } | 
| 2557 |  | 
| 2558 |     if (lhs_all_vac) { | 
| 2559 |         /* lhs is completely vacuous --> no prefix needed */ | 
| 2560 |         add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig); | 
| 2561 |     } else { | 
| 2562 |         assert(delay == lit.length()); | 
| 2563 |         setReportOnHaigPrefix(*ng.rose, *lhs); | 
| 2564 |         vector<vector<CharReach> > prefix_triggers; /* empty for prefix */ | 
| 2565 |         assert(lhs->kind == NFA_PREFIX); | 
| 2566 |         shared_ptr<raw_som_dfa> l_haig | 
| 2567 |             = attemptToBuildHaig(*lhs, som, ng.ssm.somPrecision(), | 
| 2568 |                                  prefix_triggers, ng.cc.grey); | 
| 2569 |         if (!l_haig) { | 
| 2570 |             DEBUG_PRINTF("failed to haig\n" ); | 
| 2571 |             return false; | 
| 2572 |         } | 
| 2573 |         DEBUG_PRINTF("lhs haig %p\n" , l_haig.get()); | 
| 2574 |  | 
| 2575 |         add_edge(s, v, RoseInEdgeProps(lhs, l_haig, delay), ig); | 
| 2576 |     } | 
| 2577 |  | 
| 2578 |     if (!edge(rhs->start, rhs->accept, *rhs).second) { | 
| 2579 |         assert(rhs->kind == NFA_SUFFIX); | 
| 2580 |  | 
| 2581 |         vector<vector<CharReach> > triggers; | 
| 2582 |         triggers.push_back(as_cr_seq(lit)); | 
| 2583 |  | 
| 2584 |         ue2_literal lit2; | 
| 2585 |         if (getTrailingLiteral(g, &lit2) | 
| 2586 |             && lit2.length() >= ng.cc.grey.minRoseLiteralLength | 
| 2587 |             && minStringPeriod(lit2) >= 2) { | 
| 2588 |  | 
| 2589 |             /* TODO: handle delay */ | 
| 2590 |             size_t overlap = maxOverlap(lit, lit2, 0); | 
| 2591 |             u32 delay2 = min((size_t)max_delay, lit2.length() - overlap); | 
| 2592 |             delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2); | 
| 2593 |             rhs->kind = NFA_INFIX; | 
| 2594 |             assert(delay2 <= lit2.length()); | 
| 2595 |             setReportOnHaigPrefix(*ng.rose, *rhs); | 
| 2596 |  | 
| 2597 |             shared_ptr<raw_som_dfa> m_haig | 
| 2598 |                 = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), | 
| 2599 |                                      triggers, ng.cc.grey, true); | 
| 2600 |             DEBUG_PRINTF("mhs haig %p\n" , m_haig.get()); | 
| 2601 |             if (!m_haig) { | 
| 2602 |                 DEBUG_PRINTF("failed to haig\n" ); | 
| 2603 |                 return false; | 
| 2604 |             } | 
| 2605 |  | 
| 2606 |             RoseInVertex w | 
| 2607 |                 = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig); | 
| 2608 |             add_edge(v, w, RoseInEdgeProps(rhs, m_haig, delay2), ig); | 
| 2609 |  | 
| 2610 |             NFAVertex reporter = getSoleSourceVertex(g, g.accept); | 
| 2611 |             assert(reporter); | 
| 2612 |             const auto &reports = g[reporter].reports; | 
| 2613 |             RoseInVertex a = | 
| 2614 |                 add_vertex(RoseInVertexProps::makeAccept(reports), ig); | 
| 2615 |             add_edge(w, a, RoseInEdgeProps(0U, 0U), ig); | 
| 2616 |         } else { | 
| 2617 |             /* TODO: analysis to see if som is in fact always increasing */ | 
| 2618 |             shared_ptr<raw_som_dfa> r_haig | 
| 2619 |                 = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), | 
| 2620 |                                      triggers, ng.cc.grey, true); | 
| 2621 |             DEBUG_PRINTF("rhs haig %p\n" , r_haig.get()); | 
| 2622 |             if (!r_haig) { | 
| 2623 |                 DEBUG_PRINTF("failed to haig\n" ); | 
| 2624 |                 return false; | 
| 2625 |             } | 
| 2626 |             RoseInVertex a | 
| 2627 |                 = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), | 
| 2628 |                              ig); | 
| 2629 |             add_edge(v, a, RoseInEdgeProps(r_haig), ig); | 
| 2630 |         } | 
| 2631 |     } else { | 
| 2632 |         DEBUG_PRINTF("has start->accept edge\n" ); | 
| 2633 |         if (in_degree(g.acceptEod, g) > 1) { | 
| 2634 |             DEBUG_PRINTF("also has a path to EOD\n" ); | 
| 2635 |             return false; | 
| 2636 |         } | 
| 2637 |         NFAVertex reporter = getSoleSourceVertex(g, g.accept); | 
| 2638 |         if (!reporter) { | 
| 2639 |             return false; /* TODO: later */ | 
| 2640 |         } | 
| 2641 |         const auto &reports = g[reporter].reports; | 
| 2642 |         assert(!reports.empty()); | 
| 2643 |         RoseInVertex a = | 
| 2644 |             add_vertex(RoseInVertexProps::makeAccept(reports), ig); | 
| 2645 |         add_edge(v, a, RoseInEdgeProps(0U, 0U), ig); | 
| 2646 |     } | 
| 2647 |  | 
| 2648 |     calcVertexOffsets(ig); | 
| 2649 |  | 
| 2650 |     return ng.rose->addSombeRose(ig); | 
| 2651 | } | 
| 2652 |  | 
| 2653 | static | 
| 2654 | bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) { | 
| 2655 |     set<ue2_literal> lits; | 
| 2656 |     shared_ptr<NGHolder> rhs = make_shared<NGHolder>(); | 
| 2657 |     if (!ng.cc.grey.allowLitHaig) { | 
| 2658 |         return false; | 
| 2659 |     } | 
| 2660 |  | 
| 2661 |     dumpHolder(g, 90, "lithaig_full" , ng.cc.grey); | 
| 2662 |  | 
| 2663 |     if (!splitOffLeadingLiterals(g, &lits, &*rhs)) { | 
| 2664 |         DEBUG_PRINTF("no literal\n" ); | 
| 2665 |         return false; | 
| 2666 |     } | 
| 2667 |  | 
| 2668 |     makeReportsSomPass(ng.rm, *rhs); | 
| 2669 |  | 
| 2670 |     dumpHolder(*rhs, 91, "lithaig_rhs" , ng.cc.grey); | 
| 2671 |  | 
| 2672 |     vector<vector<CharReach>> triggers; | 
| 2673 |     for (const auto &lit : lits) { | 
| 2674 |         if (lit.length() < ng.cc.grey.minRoseLiteralLength) { | 
| 2675 |             DEBUG_PRINTF("lit too short\n" ); | 
| 2676 |             return false; | 
| 2677 |         } | 
| 2678 |  | 
| 2679 |         assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); | 
| 2680 |         triggers.push_back(as_cr_seq(lit)); | 
| 2681 |     } | 
| 2682 |  | 
| 2683 |     bool unordered_som_triggers = true; /* TODO: check overlaps to ensure that | 
| 2684 |                                          * we can promise ordering */ | 
| 2685 |  | 
| 2686 |     assert(rhs->kind == NFA_SUFFIX); | 
| 2687 |     shared_ptr<raw_som_dfa> haig | 
| 2688 |         = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers, | 
| 2689 |                              ng.cc.grey, unordered_som_triggers); | 
| 2690 |     if (!haig) { | 
| 2691 |         DEBUG_PRINTF("failed to haig\n" ); | 
| 2692 |         return false; | 
| 2693 |     } | 
| 2694 |     DEBUG_PRINTF("haig %p\n" , haig.get()); | 
| 2695 |  | 
| 2696 |     RoseInGraph ig; | 
| 2697 |     RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig); | 
| 2698 |  | 
| 2699 |     RoseInVertex a | 
| 2700 |         = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig); | 
| 2701 |  | 
| 2702 |     for (const auto &lit : lits) { | 
| 2703 |         RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); | 
| 2704 |         add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig); | 
| 2705 |         add_edge(v, a, RoseInEdgeProps(haig), ig); | 
| 2706 |     } | 
| 2707 |  | 
| 2708 |     calcVertexOffsets(ig); | 
| 2709 |  | 
| 2710 |     return ng.rose->addSombeRose(ig); | 
| 2711 | } | 
| 2712 |  | 
| 2713 | static | 
| 2714 | bool trySombe(NG &ng, NGHolder &g, som_type som) { | 
| 2715 |     if (doLitHaigSom(ng, g, som)) { | 
| 2716 |         return true; | 
| 2717 |     } | 
| 2718 |  | 
| 2719 |     auto regions = assignRegions(g); | 
| 2720 |  | 
| 2721 |     if (doHaigLitHaigSom(ng, g, regions, som)) { | 
| 2722 |         return true; | 
| 2723 |     } | 
| 2724 |  | 
| 2725 |     if (doMultiLitHaigSom(ng, g, som)) { | 
| 2726 |         return true; | 
| 2727 |     } | 
| 2728 |  | 
| 2729 |     return false; | 
| 2730 | } | 
| 2731 |  | 
| 2732 | static | 
| 2733 | map<u32, region_info>::const_iterator pickInitialSomCut(const NGHolder &g, | 
| 2734 |                         const unordered_map<NFAVertex, u32> ®ions, | 
| 2735 |                         const map<u32, region_info> &info, | 
| 2736 |                         const vector<DepthMinMax> &depths) { | 
| 2737 |     map<u32, region_info>::const_iterator picked = info.end(); | 
| 2738 |     for (map<u32, region_info>::const_iterator it = info.begin(); | 
| 2739 |          it != info.end(); ++it) { | 
| 2740 |         if (it->second.exits.empty()) { | 
| 2741 |             assert(it == info.begin()); | 
| 2742 |             continue; | 
| 2743 |         } | 
| 2744 |  | 
| 2745 |         if (!regionCanEstablishSom(g, regions, it->first, it->second.exits, | 
| 2746 |                                    depths)) { | 
| 2747 |             /* last region is as far as we can go */ | 
| 2748 |             DEBUG_PRINTF("region %u is beyond the fixed region\n" , it->first); | 
| 2749 |             break; | 
| 2750 |         } | 
| 2751 |         picked = it; | 
| 2752 |     } | 
| 2753 |  | 
| 2754 |     return picked; | 
| 2755 | } | 
| 2756 |  | 
| 2757 | static | 
| 2758 | map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g, | 
| 2759 |                               const unordered_map<NFAVertex, u32> ®ions, | 
| 2760 |                               const map<u32, region_info> &info, | 
| 2761 |                               const vector<DepthMinMax> &depths, | 
| 2762 |                               const map<u32, region_info>::const_iterator &orig, | 
| 2763 |                               const CompileContext &cc) { | 
| 2764 |     DEBUG_PRINTF("trying for later rev nfa cut\n" ); | 
| 2765 |     assert(orig != info.end()); | 
| 2766 |  | 
| 2767 |     vector<map<u32, region_info>::const_iterator> cands; | 
| 2768 |  | 
| 2769 |     map<u32, region_info>::const_iterator it = orig; | 
| 2770 |     ++it; | 
| 2771 |     for (; it != info.end(); ++it) { | 
| 2772 |         /* for simplicity */ | 
| 2773 |         if (it->second.exits.size() != 1 || it->second.optional) { | 
| 2774 |             continue; | 
| 2775 |         } | 
| 2776 |         NFAVertex v = *it->second.exits.begin(); | 
| 2777 |  | 
| 2778 |         if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) { | 
| 2779 |             continue; /* for simplicity would require external som nfa reports | 
| 2780 |                        * as well. */ | 
| 2781 |         } | 
| 2782 |  | 
| 2783 |         const depth &max_depth = depths[g[v].index].max; | 
| 2784 |         if (max_depth > | 
| 2785 |             depth(cc.grey.somMaxRevNfaLength - 1)) { /* virtual starts */ | 
| 2786 |             continue; | 
| 2787 |         } | 
| 2788 |  | 
| 2789 |         if (max_depth > depth(MAX_REV_NFA_PREFIX)) { | 
| 2790 |             /* probably not a good idea, anyway */ | 
| 2791 |             continue; | 
| 2792 |         } | 
| 2793 |  | 
| 2794 |         cands.push_back(it); | 
| 2795 |     } | 
| 2796 |  | 
| 2797 |     while (!cands.empty()) { | 
| 2798 |         map<u32, region_info>::const_iterator rv = cands.back(); | 
| 2799 |         cands.pop_back(); | 
| 2800 |  | 
| 2801 |         NFAVertex v = *rv->second.exits.begin(); | 
| 2802 |  | 
| 2803 |         set<ue2_literal> lits = getLiteralSet(g, v); | 
| 2804 |         compressAndScore(lits); | 
| 2805 |         if (lits.empty()) { | 
| 2806 |         next_region: | 
| 2807 |             continue; | 
| 2808 |         } | 
| 2809 |         for (const auto &lit : lits) { | 
| 2810 |             if (lit.length() <= 3 || minStringPeriod(lit) < 2) { | 
| 2811 |                 goto next_region; | 
| 2812 |             } | 
| 2813 |         } | 
| 2814 |  | 
| 2815 |         if (rv->second.enters.empty() | 
| 2816 |             || find(rv->second.full.begin(), rv->second.full.end(), g.startDs) | 
| 2817 |                     != rv->second.full.end()) { | 
| 2818 |             continue; | 
| 2819 |         } | 
| 2820 |  | 
| 2821 |         if (!isMandRegionBetween(info.begin(), rv) | 
| 2822 |             && info.begin()->second.optional) { | 
| 2823 |             continue; | 
| 2824 |         } | 
| 2825 |  | 
| 2826 |         /* check to see if it is a reasonable size */ | 
| 2827 |         auto prefix = | 
| 2828 |             makePrefix(g, regions, rv->second, next(rv)->second, false); | 
| 2829 |  | 
| 2830 |         NGHolder g_rev; | 
| 2831 |         reverseHolder(*prefix, g_rev); | 
| 2832 |         anchorStarts(g_rev); | 
| 2833 |  | 
| 2834 |         renumber_vertices(g_rev); | 
| 2835 |         g_rev.kind = NFA_REV_PREFIX; | 
| 2836 |         reduceGraphEquivalences(g_rev, cc); | 
| 2837 |         removeRedundancy(g_rev, SOM_NONE); | 
| 2838 |  | 
| 2839 |         if (num_vertices(g_rev) > 128) { /* too big */ | 
| 2840 |             continue; | 
| 2841 |         } | 
| 2842 |  | 
| 2843 |         return rv; | 
| 2844 |     } | 
| 2845 |  | 
| 2846 |     return info.end(); | 
| 2847 | } | 
| 2848 |  | 
| 2849 | static | 
| 2850 | unique_ptr<NGHolder> makePrefixForChain(NGHolder &g, | 
| 2851 |                          const unordered_map<NFAVertex, u32> ®ions, | 
| 2852 |                          const map<u32, region_info> &info, | 
| 2853 |                          const map<u32, region_info>::const_iterator &picked, | 
| 2854 |                          vector<DepthMinMax> *depths, bool prefix_by_rev, | 
| 2855 |                          ReportManager &rm) { | 
| 2856 |     DEBUG_PRINTF("making prefix for chain attempt\n" ); | 
| 2857 |     auto prefix = | 
| 2858 |         makePrefix(g, regions, picked->second, next(picked)->second, false); | 
| 2859 |  | 
| 2860 |     /* For the root SOM plan, we use a temporary SOM slot to start with so that | 
| 2861 |      * we don't have to do any complicated rollback operations if the call to | 
| 2862 |      * doSomPlanning() below fails. The temporary SOM slot is replaced with a | 
| 2863 |      * real one afterwards. */ | 
| 2864 |     const u32 temp_som_loc = UINT32_MAX; | 
| 2865 |     setPrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_WRITABLE, | 
| 2866 |                      temp_som_loc, *depths, prefix_by_rev); | 
| 2867 |  | 
| 2868 |     /* handle direct edge to accepts from region */ | 
| 2869 |     if (edge(picked->second.exits.front(), g.accept, g).second | 
| 2870 |             || edge(picked->second.exits.front(), g.acceptEod, g).second) { | 
| 2871 |         map<u32, region_info>::const_iterator it = picked; | 
| 2872 |         do { | 
| 2873 |             makeSomRelReports(rm, g, it->second.exits, *depths); | 
| 2874 |         } while (it != info.begin() && it->second.optional && (it--)->first); | 
| 2875 |     } | 
| 2876 |  | 
| 2877 |     depths->clear(); /* renumbering invalidates depths */ | 
| 2878 |     renumber_vertices(*prefix); | 
| 2879 |  | 
| 2880 |     DEBUG_PRINTF("done\n" ); | 
| 2881 |     return prefix; | 
| 2882 | } | 
| 2883 |  | 
| 2884 | sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, | 
| 2885 |                som_type som) { | 
| 2886 |     assert(som); | 
| 2887 |     DEBUG_PRINTF("som hello\n" ); | 
| 2888 |     ReportManager &rm = ng.rm; | 
| 2889 |     SomSlotManager &ssm = ng.ssm; | 
| 2890 |     const CompileContext &cc = ng.cc; | 
| 2891 |  | 
| 2892 |     // Special case: if g is completely anchored or begins with a dot-star, we | 
| 2893 |     // know that we have an absolute SOM of zero all the time. | 
| 2894 |     if (!proper_out_degree(g.startDs, g) || beginsWithDotStar(g)) { | 
| 2895 |         makeSomAbsReports(rm, g, g.accept); | 
| 2896 |         makeSomAbsReports(rm, g, g.acceptEod); | 
| 2897 |         return SOMBE_HANDLED_INTERNAL; | 
| 2898 |     } | 
| 2899 |  | 
| 2900 |     if (!cc.grey.allowSomChain) { | 
| 2901 |         return SOMBE_FAIL; | 
| 2902 |     } | 
| 2903 |  | 
| 2904 |     // A pristine copy of the input graph, which must be restored to in paths | 
| 2905 |     // that return false. Also used as the forward graph for som rev nfa | 
| 2906 |     // construction. | 
| 2907 |     NGHolder g_pristine; | 
| 2908 |     cloneHolder(g_pristine, g); | 
| 2909 |  | 
| 2910 |     vector<DepthMinMax> depths = getDistancesFromSOM(g); | 
| 2911 |  | 
| 2912 |     // try a redundancy pass. | 
| 2913 |     if (addSomRedundancy(g, depths)) { | 
| 2914 |         depths = getDistancesFromSOM(g); // recalc | 
| 2915 |     } | 
| 2916 |  | 
| 2917 |     auto regions = assignRegions(g); | 
| 2918 |  | 
| 2919 |     dumpHolder(g, regions, 11, "som_explode" , cc.grey); | 
| 2920 |  | 
| 2921 |     map<u32, region_info> info; | 
| 2922 |     buildRegionMapping(g, regions, info); | 
| 2923 |  | 
| 2924 |     map<u32, region_info>::const_iterator picked | 
| 2925 |         = pickInitialSomCut(g, regions, info, depths); | 
| 2926 |     DEBUG_PRINTF("picked %u\n" , picked->first); | 
| 2927 |     if (picked == info.end() || picked->second.exits.empty()) { | 
| 2928 |         DEBUG_PRINTF("no regions/no progress possible\n" ); | 
| 2929 |         clear_graph(g); | 
| 2930 |         cloneHolder(g, g_pristine); | 
| 2931 |         if (doSomRevNfa(ng, g, cc)) { | 
| 2932 |             return SOMBE_HANDLED_INTERNAL; | 
| 2933 |         } else { | 
| 2934 |             return SOMBE_FAIL; | 
| 2935 |         } | 
| 2936 |     } | 
| 2937 |  | 
| 2938 |     if (finalRegion(g, regions, picked->second.exits[0])) { | 
| 2939 |         makeSomRelReports(rm, g, g.accept, depths); | 
| 2940 |         makeSomRelReports(rm, g, g.acceptEod, depths); | 
| 2941 |         return SOMBE_HANDLED_INTERNAL; | 
| 2942 |     } | 
| 2943 |  | 
| 2944 |     if (doSomRevNfa(ng, g_pristine, cc)) { | 
| 2945 |         clear_graph(g); | 
| 2946 |         cloneHolder(g, g_pristine); | 
| 2947 |         return SOMBE_HANDLED_INTERNAL; | 
| 2948 |     } | 
| 2949 |  | 
| 2950 |     bool prefix_by_rev = false; | 
| 2951 |     map<u32, region_info>::const_iterator picked_old = picked; | 
| 2952 |     map<u32, region_info>::const_iterator rev_pick | 
| 2953 |         = tryForLaterRevNfaCut(g, regions, info, depths, picked, cc); | 
| 2954 |     if (rev_pick != info.end()) { | 
| 2955 |         DEBUG_PRINTF("found later rev prefix cut point\n" ); | 
| 2956 |         assert(rev_pick != picked); | 
| 2957 |         picked = rev_pick; | 
| 2958 |         prefix_by_rev = true; | 
| 2959 |     } else { | 
| 2960 |         /* sanity checks for picked region, these checks have already been done | 
| 2961 |          * if we are using a prefix reverse nfa. */ | 
| 2962 |         if (picked->second.enters.empty() | 
| 2963 |             || find(picked->second.full.begin(), picked->second.full.end(), | 
| 2964 |                     g.startDs) != picked->second.full.end()) { | 
| 2965 |             clear_graph(g); | 
| 2966 |             cloneHolder(g, g_pristine); | 
| 2967 |             return SOMBE_FAIL; | 
| 2968 |         } | 
| 2969 |  | 
| 2970 |         if (!isMandRegionBetween(info.begin(), picked) | 
| 2971 |             && info.begin()->second.optional) { | 
| 2972 |             clear_graph(g); | 
| 2973 |             cloneHolder(g, g_pristine); | 
| 2974 |             return SOMBE_FAIL; | 
| 2975 |         } | 
| 2976 |     } | 
| 2977 |  | 
| 2978 |     DEBUG_PRINTF("region %u is the final\n" , picked->first); | 
| 2979 |  | 
| 2980 |     shared_ptr<NGHolder> prefix = makePrefixForChain( | 
| 2981 |         g, regions, info, picked, &depths, prefix_by_rev, rm); | 
| 2982 |     /* note depths cleared as we have renumbered */ | 
| 2983 |  | 
| 2984 |     CharReach escapes; | 
| 2985 |     bool stuck = isPossibleLock(g, picked, info, &escapes); | 
| 2986 |     if (stuck) { | 
| 2987 |         DEBUG_PRINTF("investigating potential lock\n" ); | 
| 2988 |  | 
| 2989 |         NGHolder gg; | 
| 2990 |         fillHolderForLockCheck(&gg, g, info, picked); | 
| 2991 |  | 
| 2992 |         stuck = firstMatchIsFirst(gg); | 
| 2993 |     } | 
| 2994 |  | 
| 2995 |     if (stuck && escapes.none()) { | 
| 2996 |         /* leads directly to .* --> woot */ | 
| 2997 |         DEBUG_PRINTF("initial slot is full lock\n" ); | 
| 2998 |         u32 som_loc = ssm.getSomSlot(*prefix, escapes, false, | 
| 2999 |                                      SomSlotManager::NO_PARENT); | 
| 3000 |         replaceTempSomSlot(rm, *prefix, som_loc); | 
| 3001 |  | 
| 3002 |         /* update all reports on g to report the som_loc's som */ | 
| 3003 |         updateReportToUseRecordedSom(rm, g, som_loc); | 
| 3004 |  | 
| 3005 |         /* create prefix to set the som_loc */ | 
| 3006 |         updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET); | 
| 3007 |         if (prefix_by_rev) { | 
| 3008 |             u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); | 
| 3009 |             updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); | 
| 3010 |         } | 
| 3011 |         renumber_vertices(*prefix); | 
| 3012 |         if (!ng.addHolder(*prefix)) { | 
| 3013 |             DEBUG_PRINTF("failed to add holder\n" ); | 
| 3014 |             clear_graph(g); | 
| 3015 |             cloneHolder(g, g_pristine); | 
| 3016 |             return SOMBE_FAIL; | 
| 3017 |         } | 
| 3018 |  | 
| 3019 |         DEBUG_PRINTF("ok found initial lock\n" ); | 
| 3020 |         return SOMBE_HANDLED_INTERNAL; | 
| 3021 |     } | 
| 3022 |  | 
| 3023 |     vector<som_plan> plan; | 
| 3024 |  retry: | 
| 3025 |     // Note: no-one should ever pay attention to the root plan's parent. | 
| 3026 |     plan.push_back(som_plan(prefix, escapes, false, 0)); | 
| 3027 |     dumpHolder(*plan.back().prefix, 12, "som_prefix" , cc.grey); | 
| 3028 |     if (!prefix_by_rev) { | 
| 3029 |         if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey)) { | 
| 3030 |             DEBUG_PRINTF("failed\n" ); | 
| 3031 |             clear_graph(g); | 
| 3032 |             cloneHolder(g, g_pristine); | 
| 3033 |             return SOMBE_FAIL; | 
| 3034 |         } | 
| 3035 |     } else { | 
| 3036 |         DEBUG_PRINTF("trying for som plan\n" ); | 
| 3037 |         if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey, | 
| 3038 |                            DISALLOW_MODIFY_HOLDER)) { | 
| 3039 |             /* Note: the larger prefixes generated by reverse nfas may not | 
| 3040 |              * advance as fair as the original prefix - so we should retry | 
| 3041 |              * with a smaller prefix. */ | 
| 3042 |  | 
| 3043 |             prefix_by_rev = false; | 
| 3044 |             stuck = false; /* if we reached a lock, then prefix_by_rev would not | 
| 3045 |                             * have advanced. */ | 
| 3046 |             picked = picked_old; | 
| 3047 |             plan.clear(); | 
| 3048 |             depths = getDistancesFromSOM(g); /* due to renumbering, need to | 
| 3049 |                                               * regenerate */ | 
| 3050 |             prefix = makePrefixForChain(g, regions, info, picked, &depths, | 
| 3051 |                                         prefix_by_rev, rm); | 
| 3052 |             escapes.clear(); | 
| 3053 |             DEBUG_PRINTF("retrying\n" ); | 
| 3054 |             goto retry; | 
| 3055 |         } | 
| 3056 |     } | 
| 3057 |     DEBUG_PRINTF("som planning ok\n" ); | 
| 3058 |  | 
| 3059 |     /* if the initial prefix is weak is if sombe approaches are better */ | 
| 3060 |     if (findMinWidth(*prefix) <= depth(2)) { | 
| 3061 |         DEBUG_PRINTF("weak prefix... seeing if sombe can help out\n" ); | 
| 3062 |         NGHolder g2; | 
| 3063 |         cloneHolder(g2, g_pristine); | 
| 3064 |         if (trySombe(ng, g2, som)) { | 
| 3065 |             return SOMBE_HANDLED_ALL; | 
| 3066 |         } | 
| 3067 |     } | 
| 3068 |  | 
| 3069 |     /* From this point we know that we are going to succeed or die horribly with | 
| 3070 |      * a pattern too large. Anything done past this point can be considered | 
| 3071 |      * committed to the compile. */ | 
| 3072 |  | 
| 3073 |     regions = assignRegions(g); // Update as g may have changed. | 
| 3074 |  | 
| 3075 |     DEBUG_PRINTF("-- get slot for initial plan\n" ); | 
| 3076 |     u32 som_loc; | 
| 3077 |     if (plan[0].is_reset) { | 
| 3078 |         som_loc = ssm.getInitialResetSomSlot(*prefix, g, regions, | 
| 3079 |                                 picked->first, &plan[0].no_implement); | 
| 3080 |     } else { | 
| 3081 |         som_loc = ssm.getSomSlot(*prefix, escapes, false, | 
| 3082 |                                  SomSlotManager::NO_PARENT); | 
| 3083 |     } | 
| 3084 |  | 
| 3085 |     replaceTempSomSlot(rm, *prefix, som_loc); | 
| 3086 |  | 
| 3087 |     if (plan.front().is_reset) { | 
| 3088 |         updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET); | 
| 3089 |     } | 
| 3090 |     if (prefix_by_rev && !plan.front().no_implement) { | 
| 3091 |         u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); | 
| 3092 |         updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); | 
| 3093 |     } | 
| 3094 |  | 
| 3095 |     implementSomPlan(ng, expr, comp_id, g, plan, som_loc); | 
| 3096 |  | 
| 3097 |     DEBUG_PRINTF("success\n" ); | 
| 3098 |     return SOMBE_HANDLED_INTERNAL; | 
| 3099 | } | 
| 3100 |  | 
| 3101 | sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr, | 
| 3102 |                        u32 comp_id, som_type som) { | 
| 3103 |     assert(som); | 
| 3104 |  | 
| 3105 |     DEBUG_PRINTF("som+haig hello\n" ); | 
| 3106 |  | 
| 3107 |     // A pristine copy of the input graph, which must be restored to in paths | 
| 3108 |     // that return false. Also used as the forward graph for som rev nfa | 
| 3109 |     // construction. | 
| 3110 |     NGHolder g_pristine; | 
| 3111 |     cloneHolder(g_pristine, g); | 
| 3112 |  | 
| 3113 |     if (trySombe(ng, g, som)) { | 
| 3114 |         return SOMBE_HANDLED_ALL; | 
| 3115 |     } | 
| 3116 |  | 
| 3117 |     if (!ng.cc.grey.allowHaigLit || !ng.cc.grey.allowSomChain) { | 
| 3118 |         return SOMBE_FAIL; | 
| 3119 |     } | 
| 3120 |  | 
| 3121 |     // know that we have an absolute SOM of zero all the time. | 
| 3122 |     assert(edge(g.startDs, g.startDs, g).second); | 
| 3123 |  | 
| 3124 |     vector<DepthMinMax> depths = getDistancesFromSOM(g); | 
| 3125 |  | 
| 3126 |     // try a redundancy pass. | 
| 3127 |     if (addSomRedundancy(g, depths)) { | 
| 3128 |         depths = getDistancesFromSOM(g); | 
| 3129 |     } | 
| 3130 |  | 
| 3131 |     auto regions = assignRegions(g); | 
| 3132 |  | 
| 3133 |     dumpHolder(g, regions, 21, "som_explode" , ng.cc.grey); | 
| 3134 |  | 
| 3135 |     map<u32, region_info> info; | 
| 3136 |     buildRegionMapping(g, regions, info, true); | 
| 3137 |  | 
| 3138 |     sombe_rv rv = | 
| 3139 |         doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin()); | 
| 3140 |     if (rv == SOMBE_FAIL) { | 
| 3141 |         clear_graph(g); | 
| 3142 |         cloneHolder(g, g_pristine); | 
| 3143 |     } | 
| 3144 |     return rv; | 
| 3145 | } | 
| 3146 |  | 
| 3147 | } // namespace ue2 | 
| 3148 |  |