1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * \file
31 * \brief SOM ("Start of Match") analysis.
32 */
33
34#include "ng_som.h"
35
36#include "ng.h"
37#include "ng_dump.h"
38#include "ng_equivalence.h"
39#include "ng_execute.h"
40#include "ng_haig.h"
41#include "ng_limex.h"
42#include "ng_literal_analysis.h"
43#include "ng_prune.h"
44#include "ng_redundancy.h"
45#include "ng_region.h"
46#include "ng_reports.h"
47#include "ng_som_add_redundancy.h"
48#include "ng_som_util.h"
49#include "ng_split.h"
50#include "ng_util.h"
51#include "ng_violet.h"
52#include "ng_width.h"
53#include "grey.h"
54#include "ue2common.h"
55#include "compiler/compiler.h"
56#include "nfa/goughcompile.h"
57#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
58#include "parser/position.h"
59#include "som/som.h"
60#include "rose/rose_build.h"
61#include "rose/rose_in_util.h"
62#include "util/alloc.h"
63#include "util/compare.h"
64#include "util/compile_error.h"
65#include "util/container.h"
66#include "util/dump_charclass.h"
67#include "util/graph_range.h"
68#include "util/make_unique.h"
69
70#include <algorithm>
71#include <map>
72#include <unordered_map>
73#include <unordered_set>
74#include <vector>
75
76using namespace std;
77
78namespace ue2 {
79
80static const size_t MAX_SOM_PLANS = 10;
81static const size_t MAX_SOMBE_CHAIN_VERTICES = 4000;
82
83#define MAX_REV_NFA_PREFIX 80
84
85namespace {
86struct som_plan {
87 som_plan(const shared_ptr<NGHolder> &p, const CharReach &e, bool i,
88 u32 parent_in) : prefix(p), escapes(e), is_reset(i),
89 no_implement(false), parent(parent_in) { }
90 shared_ptr<NGHolder> prefix;
91 CharReach escapes;
92 bool is_reset;
93 bool no_implement;
94 u32 parent; // index of parent plan in the vector.
95
96 // Reporters: a list of vertices in the graph that must be have their
97 // reports updated at implementation time to report this plan's
98 // som_loc_out.
99 vector<NFAVertex> reporters;
100
101 // Similar, but these report the som_loc_in.
102 vector<NFAVertex> reporters_in;
103};
104}
105
106static
107bool regionCanEstablishSom(const NGHolder &g,
108 const unordered_map<NFAVertex, u32> &regions,
109 const u32 region, const vector<NFAVertex> &r_exits,
110 const vector<DepthMinMax> &depths) {
111 if (region == regions.at(g.accept) ||
112 region == regions.at(g.acceptEod)) {
113 DEBUG_PRINTF("accept in region\n");
114 return false;
115 }
116
117 DEBUG_PRINTF("region %u\n", region);
118 for (UNUSED auto v : r_exits) {
119 DEBUG_PRINTF(" exit %zu\n", g[v].index);
120 }
121
122 /* simple if each region exit is at fixed distance from SOM. Note SOM does
123 not include virtual starts */
124 for (auto v : r_exits) {
125 assert(regions.at(v) == region);
126 const DepthMinMax &d = depths.at(g[v].index);
127 if (d.min != d.max) {
128 DEBUG_PRINTF("failing %zu as %s != %s\n", g[v].index,
129 d.min.str().c_str(), d.max.str().c_str());
130 return false;
131 }
132 }
133 DEBUG_PRINTF("region %u/%zu is good\n", regions.at(r_exits[0]),
134 g[r_exits[0]].index);
135
136 return true;
137}
138
139namespace {
140
141struct region_info {
142 region_info() : optional(false), dag(false) {}
143 vector<NFAVertex> enters;
144 vector<NFAVertex> exits;
145 vector<NFAVertex> full;
146 bool optional; /* skip edges around region */
147 bool dag; /* completely acyclic */
148};
149
150}
151
152static
153void buildRegionMapping(const NGHolder &g,
154 const unordered_map<NFAVertex, u32> &regions,
155 map<u32, region_info> &info,
156 bool include_region_0 = false) {
157 for (auto v : vertices_range(g)) {
158 u32 region = regions.at(v);
159 if (!include_region_0 && (is_any_start(v, g) || region == 0)) {
160 continue;
161 }
162 assert(!region || !is_any_start(v, g));
163
164 if (is_any_accept(v, g)) {
165 continue;
166 }
167
168 if (isRegionEntry(g, v, regions)) {
169 info[region].enters.push_back(v);
170 }
171 if (isRegionExit(g, v, regions)) {
172 info[region].exits.push_back(v);
173 }
174 info[region].full.push_back(v);
175 }
176
177 for (auto &m : info) {
178 if (!m.second.enters.empty()
179 && isOptionalRegion(g, m.second.enters.front(), regions)) {
180 m.second.optional = true;
181 }
182 m.second.dag = true; /* will be cleared for cyclic regions later */
183 }
184
185 set<NFAEdge> be;
186 BackEdges<set<NFAEdge> > backEdgeVisitor(be);
187 boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start));
188
189 for (const auto &e : be) {
190 NFAVertex u = source(e, g);
191 NFAVertex v = target(e, g);
192 if (is_special(u, g) || is_special(v, g)) {
193 assert(is_special(u, g) && is_special(v, g));
194 continue;
195 }
196 u32 r = regions.at(v);
197 assert(regions.at(u) == r);
198 info[r].dag = false;
199 }
200
201 if (include_region_0) {
202 info[0].dag = false;
203 }
204
205 #ifdef DEBUG
206 for (const auto &m : info) {
207 u32 r = m.first;
208 const region_info &r_i = m.second;
209 DEBUG_PRINTF("region %u:%s%s\n", r,
210 r_i.dag ? " (dag)" : "",
211 r_i.optional ? " (optional)" : "");
212 DEBUG_PRINTF(" enters:");
213 for (u32 i = 0; i < r_i.enters.size(); i++) {
214 printf(" %zu", g[r_i.enters[i]].index);
215 }
216 printf("\n");
217 DEBUG_PRINTF(" exits:");
218 for (u32 i = 0; i < r_i.exits.size(); i++) {
219 printf(" %zu", g[r_i.exits[i]].index);
220 }
221 printf("\n");
222 DEBUG_PRINTF(" all:");
223 for (u32 i = 0; i < r_i.full.size(); i++) {
224 printf(" %zu", g[r_i.full[i]].index);
225 }
226 printf("\n");
227 }
228 #endif
229}
230
231static
232bool validateXSL(const NGHolder &g,
233 const unordered_map<NFAVertex, u32> &regions,
234 const u32 region, const CharReach &escapes, u32 *bad_region) {
235 /* need to check that the escapes escape all of the graph past region */
236 u32 first_bad_region = ~0U;
237 for (auto v : vertices_range(g)) {
238 u32 v_region = regions.at(v);
239 if (!is_special(v, g) && v_region > region &&
240 (escapes & g[v].char_reach).any()) {
241 DEBUG_PRINTF("problem with escapes for %zu\n", g[v].index);
242 first_bad_region = MIN(first_bad_region, v_region);
243 }
244 }
245
246 if (first_bad_region != ~0U) {
247 *bad_region = first_bad_region;
248 return false;
249 }
250
251 return true;
252}
253
254static
255bool validateEXSL(const NGHolder &g,
256 const unordered_map<NFAVertex, u32> &regions,
257 const u32 region, const CharReach &escapes,
258 const NGHolder &prefix, u32 *bad_region) {
259 /* EXSL: To be a valid EXSL with escapes e, we require that all states
260 * go dead after /[e][^e]*{subsequent prefix match}/.
261 */
262
263 /* TODO: this is overly conservative as it allow partial matches from the
264 * prefix to be considered even when the tail has processed some [^e] */
265
266 u32 first_bad_region = ~0U;
267 const vector<CharReach> escapes_vec(1, escapes);
268 const vector<CharReach> notescapes_vec(1, ~escapes);
269
270 flat_set<NFAVertex> states;
271 /* turn on all states past the prefix */
272 DEBUG_PRINTF("region %u is cutover\n", region);
273 for (auto v : vertices_range(g)) {
274 if (!is_special(v, g) && regions.at(v) > region) {
275 states.insert(v);
276 }
277 }
278
279 /* process the escapes */
280 states = execute_graph(g, escapes_vec, states);
281
282 /* flood with any number of not escapes */
283 flat_set<NFAVertex> prev_states;
284 while (prev_states != states) {
285 prev_states = states;
286 states = execute_graph(g, notescapes_vec, states);
287 insert(&states, prev_states);
288 }
289
290 /* find input starts to use for when we are running the prefix through as
291 * when the escape character arrives we may be in matching the prefix
292 * already */
293 flat_set<NFAVertex> prefix_start_states;
294 for (auto v : vertices_range(prefix)) {
295 if (v != prefix.accept && v != prefix.acceptEod
296 /* and as we have already made it past the prefix once */
297 && v != prefix.start) {
298 prefix_start_states.insert(v);
299 }
300 }
301
302 prefix_start_states =
303 execute_graph(prefix, escapes_vec, prefix_start_states);
304
305 assert(contains(prefix_start_states, prefix.startDs));
306 /* see what happens after we feed it the prefix */
307 states = execute_graph(g, prefix, prefix_start_states, states);
308
309 for (auto v : states) {
310 assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
311 * on */
312 DEBUG_PRINTF("state still active\n");
313 first_bad_region = MIN(first_bad_region, regions.at(v));
314 }
315
316 if (first_bad_region != ~0U) {
317 *bad_region = first_bad_region;
318 return false;
319 }
320
321 return true;
322}
323
324static
325bool isPossibleLock(const NGHolder &g,
326 map<u32, region_info>::const_iterator region,
327 const map<u32, region_info> &info,
328 CharReach *escapes_out) {
329 /* TODO: we could also check for self-loops on curr region */
330
331 /* TODO: some straw-walking logic. lowish priority has we know there can
332 * only be optional regions between us and the cyclic */
333
334 assert(region != info.end());
335 map<u32, region_info>::const_iterator next_region = region;
336 ++next_region;
337 if (next_region == info.end()) {
338 assert(0); /* odd */
339 return false;
340 }
341
342 const region_info &next_info = next_region->second;
343 if (next_info.enters.empty()) {
344 assert(0); /* odd */
345 return false;
346 }
347
348 if (next_info.full.size() == 1 && !next_info.dag) {
349 *escapes_out = ~g[next_info.full.front()].char_reach;
350 return true;
351 }
352
353 return false;
354}
355
356static
357unique_ptr<NGHolder>
358makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> &regions,
359 const region_info &curr, const region_info &next,
360 bool renumber = true) {
361 const vector<NFAVertex> &curr_exits = curr.exits;
362 const vector<NFAVertex> &next_enters = next.enters;
363
364 assert(!next_enters.empty());
365 assert(!curr_exits.empty());
366
367 unique_ptr<NGHolder> prefix_ptr = ue2::make_unique<NGHolder>();
368 NGHolder &prefix = *prefix_ptr;
369
370 deque<NFAVertex> lhs_verts;
371 insert(&lhs_verts, lhs_verts.end(), vertices(g));
372
373 unordered_map<NFAVertex, NFAVertex> lhs_map; // g -> prefix
374 fillHolder(&prefix, g, lhs_verts, &lhs_map);
375 prefix.kind = NFA_OUTFIX;
376
377 // We need a reverse mapping to track regions.
378 unordered_map<NFAVertex, NFAVertex> rev_map; // prefix -> g
379 for (const auto &e : lhs_map) {
380 rev_map.emplace(e.second, e.first);
381 }
382
383 clear_in_edges(prefix.accept, prefix);
384 clear_in_edges(prefix.acceptEod, prefix);
385 add_edge(prefix.accept, prefix.acceptEod, prefix);
386
387 assert(!next_enters.empty());
388 assert(next_enters.front() != NGHolder::null_vertex());
389 u32 dead_region = regions.at(next_enters.front());
390 DEBUG_PRINTF("curr_region %u, dead_region %u\n",
391 regions.at(curr_exits.front()), dead_region);
392 for (auto v : inv_adjacent_vertices_range(next_enters.front(), g)) {
393 if (regions.at(v) >= dead_region) {
394 continue;
395 }
396 /* add edge to new accepts */
397 NFAVertex p_v = lhs_map[v];
398 add_edge(p_v, prefix.accept, prefix);
399 }
400
401 assert(in_degree(prefix.accept, prefix) != 0);
402
403 /* prune everything past the picked region */
404 vector<NFAVertex> to_clear;
405 assert(contains(lhs_map, curr_exits.front()));
406 NFAVertex p_u = lhs_map[curr_exits.front()];
407 DEBUG_PRINTF("p_u: %zu\n", prefix[p_u].index);
408 for (auto p_v : adjacent_vertices_range(p_u, prefix)) {
409 auto v = rev_map.at(p_v);
410 if (p_v == prefix.accept || regions.at(v) < dead_region) {
411 continue;
412 }
413 to_clear.push_back(p_v);
414 }
415
416 for (auto v : to_clear) {
417 DEBUG_PRINTF("clearing in_edges on %zu\n", prefix[v].index);
418 clear_in_edges(v, prefix);
419 }
420
421 pruneUseless(prefix, renumber /* sometimes we want no renumber to keep
422 depth map valid */);
423
424 assert(num_vertices(prefix) > N_SPECIALS);
425 return prefix_ptr;
426}
427
428static
429void replaceTempSomSlot(ReportManager &rm, NGHolder &g, u32 real_slot) {
430 const u32 temp_slot = UINT32_MAX;
431 /* update the som slot on the prefix report */
432 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
433 auto &reports = g[v].reports;
434 assert(reports.size() == 1);
435 Report ir = rm.getReport(*reports.begin());
436 if (ir.onmatch != temp_slot) {
437 continue;
438 }
439 ir.onmatch = real_slot;
440 ReportID rep = rm.getInternalId(ir);
441
442 assert(reports.size() == 1);
443 reports.clear();
444 reports.insert(rep);
445 }
446}
447
448static
449void setPrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type,
450 u32 som_loc, const vector<DepthMinMax> &depths,
451 bool prefix_by_rev) {
452 Report ir = makeCallback(0U, 0);
453 ir.type = ir_type;
454 ir.onmatch = som_loc;
455
456 /* add report for storing in som location on new accepts */
457 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
458 if (prefix_by_rev) {
459 ir.somDistance = MO_INVALID_IDX; /* will be populated properly
460 * later */
461 } else {
462 const DepthMinMax &d = depths.at(g[v].index);
463 assert(d.min == d.max);
464 ir.somDistance = d.max;
465 }
466 ReportID rep = rm.getInternalId(ir);
467
468 auto &reports = g[v].reports;
469 reports.clear();
470 reports.insert(rep);
471 }
472}
473
474static
475void updatePrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type) {
476 /* update the som action on the prefix report */
477 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
478 auto &reports = g[v].reports;
479 assert(reports.size() == 1);
480 Report ir = rm.getReport(*reports.begin());
481 ir.type = ir_type;
482 ReportID rep = rm.getInternalId(ir);
483
484 assert(reports.size() == 1);
485 reports.clear();
486 reports.insert(rep);
487 }
488}
489
490static
491void updatePrefixReportsRevNFA(ReportManager &rm, NGHolder &g,
492 u32 rev_comp_id) {
493 /* update the action on the prefix report, to refer to a reverse nfa,
494 * report type is also adjusted. */
495 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
496 auto &reports = g[v].reports;
497 assert(reports.size() == 1);
498 Report ir = rm.getReport(*reports.begin());
499 switch (ir.type) {
500 case INTERNAL_SOM_LOC_SET:
501 ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA;
502 break;
503 case INTERNAL_SOM_LOC_SET_IF_UNSET:
504 ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET;
505 break;
506 case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
507 ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE;
508 break;
509 default:
510 assert(0);
511 break;
512 }
513
514 ir.revNfaIndex = rev_comp_id;
515 ReportID rep = rm.getInternalId(ir);
516
517 assert(reports.size() == 1);
518 reports.clear();
519 reports.insert(rep);
520 }
521}
522
523static
524void setMidfixReports(ReportManager &rm, const som_plan &item,
525 const u32 som_slot_in, const u32 som_slot_out) {
526 assert(item.prefix);
527 NGHolder &g = *item.prefix;
528
529 Report ir = makeCallback(0U, 0);
530 ir.type = item.is_reset ? INTERNAL_SOM_LOC_COPY
531 : INTERNAL_SOM_LOC_COPY_IF_WRITABLE;
532 ir.onmatch = som_slot_out;
533 ir.somDistance = som_slot_in;
534 ReportID rep = rm.getInternalId(ir);
535
536 /* add report for storing in som location on new accepts */
537 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
538 auto &reports = g[v].reports;
539 reports.clear();
540 reports.insert(rep);
541 }
542}
543
544static
545bool finalRegion(const NGHolder &g,
546 const unordered_map<NFAVertex, u32> &regions,
547 NFAVertex v) {
548 u32 region = regions.at(v);
549 for (auto w : adjacent_vertices_range(v, g)) {
550 if (w != g.accept && w != g.acceptEod && regions.at(w) != region) {
551 return false;
552 }
553 }
554
555 return true;
556}
557
558static
559void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g,
560 NFAVertex v, ReportType ir_type,
561 u64a param) {
562 assert(!g[v].reports.empty());
563
564 flat_set<ReportID> r_new;
565
566 for (const ReportID &report_id : g[v].reports) {
567 Report ir = rm.getReport(report_id);
568
569 if (ir.type != EXTERNAL_CALLBACK) {
570 /* we must have already done whatever magic we needed to do to this
571 * report */
572 r_new.insert(report_id);
573 continue;
574 }
575
576 ir.type = ir_type;
577 ir.somDistance = param;
578 ReportID rep = rm.getInternalId(ir);
579
580 DEBUG_PRINTF("vertex %zu, replacing report %u with %u (type %u)\n",
581 g[v].index, report_id, rep, ir_type);
582 r_new.insert(rep);
583 }
584 g[v].reports = r_new;
585}
586
587/* updates the reports on all vertices leading to the sink */
588static
589void makeSomRelReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
590 const vector<DepthMinMax> &depths) {
591 for (auto v : inv_adjacent_vertices_range(sink, g)) {
592 if (v == g.accept) {
593 continue;
594 }
595
596 const DepthMinMax &d = depths.at(g[v].index);
597 assert(d.min == d.max);
598 replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
599 d.min);
600 }
601}
602
603/* updates the reports on all the provided vertices */
604static
605void makeSomRelReports(ReportManager &rm, NGHolder &g,
606 const vector<NFAVertex> &to_update,
607 const vector<DepthMinMax> &depths) {
608 for (auto v : to_update) {
609 const DepthMinMax &d = depths.at(g[v].index);
610 assert(d.min == d.max);
611 replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
612 d.min);
613 }
614}
615
616static
617void makeSomAbsReports(ReportManager &rm, NGHolder &g, NFAVertex sink) {
618 for (auto v : inv_adjacent_vertices_range(sink, g)) {
619 if (v == g.accept) {
620 continue;
621 }
622 replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_ABS,
623 0);
624 }
625}
626
627static
628void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, u32 som_loc) {
629 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
630 replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
631 som_loc);
632 }
633 for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
634 if (v == g.accept) {
635 continue;
636 }
637 replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
638 som_loc);
639 }
640}
641
642static
643void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g,
644 const vector<NFAVertex> &to_update,
645 u32 som_loc) {
646 for (auto v : to_update) {
647 replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
648 som_loc);
649 }
650}
651
652static
653bool createEscaper(NG &ng, const NGHolder &prefix, const CharReach &escapes,
654 u32 som_loc) {
655 ReportManager &rm = ng.rm;
656
657 /* escaper = /prefix[^escapes]*[escapes]/ */
658 DEBUG_PRINTF("creating escaper for %u\n", som_loc);
659 NGHolder h;
660 cloneHolder(h, prefix);
661 assert(h.kind == NFA_OUTFIX);
662
663 NFAVertex u = add_vertex(h);
664 h[u].char_reach = ~escapes;
665
666 NFAVertex v = add_vertex(h);
667 h[v].char_reach = escapes;
668
669 for (auto w : inv_adjacent_vertices_range(h.accept, h)) {
670 add_edge(w, u, h);
671 add_edge(w, v, h);
672 h[w].reports.clear();
673 }
674
675 clear_in_edges(h.accept, h);
676
677 add_edge(u, v, h);
678 add_edge(u, u, h);
679 add_edge(v, h.accept, h);
680
681 Report ir = makeCallback(0U, 0);
682 ir.type = INTERNAL_SOM_LOC_MAKE_WRITABLE;
683 ir.onmatch = som_loc;
684 h[v].reports.insert(rm.getInternalId(ir));
685 return ng.addHolder(h);
686}
687
688static
689void fillHolderForLockCheck(NGHolder *out, const NGHolder &g,
690 const map<u32, region_info> &info,
691 map<u32, region_info>::const_iterator picked) {
692 /* NOTE: This is appropriate for firstMatchIsFirst */
693 DEBUG_PRINTF("prepping for lock check\n");
694
695 NGHolder &midfix = *out;
696
697 map<NFAVertex, NFAVertex> v_map;
698 v_map[g.start] = midfix.start;
699 v_map[g.startDs] = midfix.startDs;
700
701 /* include the lock region */
702 assert(picked != info.end());
703 auto graph_last = next(picked);
704
705 assert(!graph_last->second.dag);
706 assert(graph_last->second.full.size() == 1);
707
708 for (auto jt = graph_last; ; --jt) {
709 DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
710
711 /* add all vertices in region, create mapping */
712 for (auto v : jt->second.full) {
713 DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
714 if (contains(v_map, v)) {
715 continue;
716 }
717
718 /* treat all virtual starts as happening anywhere, so that the
719 * virtual start is not counted as part of the SoM */
720 if (is_virtual_start(v, g)) {
721 v_map[v] = midfix.startDs;
722 continue;
723 }
724
725 NFAVertex vnew = add_vertex(g[v], midfix);
726 v_map[v] = vnew;
727 }
728
729 /* add edges leaving region verts based on mapping */
730 for (auto v : jt->second.full) {
731 NFAVertex u = v_map[v];
732 for (auto w : adjacent_vertices_range(v, g)) {
733 if (w == g.accept || w == g.acceptEod) {
734 add_edge_if_not_present(u, midfix.accept, midfix);
735 continue;
736 }
737 if (!contains(v_map, w)) {
738 add_edge_if_not_present(u, midfix.accept, midfix);
739 } else {
740 add_edge_if_not_present(u, v_map[w], midfix);
741 }
742 }
743 }
744
745 if (jt == info.begin()) {
746 break;
747 }
748 }
749
750 /* add edges from startds to the enters of all the initial optional
751 * regions and the first mandatory region. */
752 for (auto jt = info.begin(); ; ++jt) {
753 for (auto enter : jt->second.enters) {
754 assert(contains(v_map, enter));
755 NFAVertex v = v_map[enter];
756 add_edge_if_not_present(midfix.startDs, v, midfix);
757 }
758
759 if (!jt->second.optional) {
760 break;
761 }
762
763 if (jt == graph_last) {
764 /* all regions are optional - add a direct edge to accept */
765 add_edge_if_not_present(midfix.startDs, midfix.accept, midfix);
766 break;
767 }
768 }
769
770 assert(in_degree(midfix.accept, midfix));
771 renumber_vertices(midfix);
772}
773
774static
775void fillRoughMidfix(NGHolder *out, const NGHolder &g,
776 const unordered_map<NFAVertex, u32> &regions,
777 const map<u32, region_info> &info,
778 map<u32, region_info>::const_iterator picked) {
779 /* as we are not the first prefix, we are probably not acyclic. We need to
780 * generate an acyclic holder to acts a fake prefix to sentClearsTail.
781 * This will result in a more conservative estimate. */
782 /* NOTE: This is not appropriate for firstMatchIsFirst */
783 NGHolder &midfix = *out;
784 add_edge(midfix.startDs, midfix.accept, midfix);
785
786 map<NFAVertex, NFAVertex> v_map;
787
788 map<u32, region_info>::const_iterator jt = picked;
789 for (; jt->second.dag; --jt) {
790 DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
791 if (!jt->second.optional) {
792 clear_out_edges(midfix.startDs, midfix);
793 add_edge(midfix.startDs, midfix.startDs, midfix);
794 }
795
796 /* add all vertices in region, create mapping */
797 for (auto v : jt->second.full) {
798 DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
799 NFAVertex vnew = add_vertex(g[v], midfix);
800 v_map[v] = vnew;
801 }
802
803 /* add edges leaving region verts based on mapping */
804 for (auto v : jt->second.full) {
805 NFAVertex u = v_map[v];
806 for (auto w : adjacent_vertices_range(v, g)) {
807 if (w == g.accept || w == g.acceptEod) {
808 continue;
809 }
810 if (!contains(v_map, w)) {
811 add_edge_if_not_present(u, midfix.accept, midfix);
812 } else {
813 add_edge_if_not_present(u, v_map[w], midfix);
814 }
815 }
816 }
817
818 /* add edges from startds to enters */
819 for (auto enter : jt->second.enters) {
820 assert(contains(v_map, enter));
821 NFAVertex v = v_map[enter];
822 add_edge(midfix.startDs, v, midfix);
823 }
824
825 if (jt == info.begin()) {
826 break;
827 }
828 }
829
830 /* we can include the exits of the regions leading in */
831 if (!jt->second.dag) {
832 u32 first_early_region = jt->first;
833 clear_out_edges(midfix.startDs, midfix);
834 add_edge(midfix.startDs, midfix.startDs, midfix);
835
836 do {
837 for (auto v : jt->second.exits) {
838 DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
839 NFAVertex vnew = add_vertex(g[v], midfix);
840 v_map[v] = vnew;
841
842 /* add edges from startds to new vertices */
843 add_edge(midfix.startDs, vnew, midfix);
844 }
845
846 /* add edges leaving region verts based on mapping */
847 for (auto v : jt->second.exits) {
848 NFAVertex u = v_map[v];
849 for (auto w : adjacent_vertices_range(v, g)) {
850 if (w == g.accept || w == g.acceptEod
851 || regions.at(w) <= first_early_region) {
852 continue;
853 }
854 if (!contains(v_map, w)) {
855 add_edge_if_not_present(u, midfix.accept, midfix);
856 } else {
857 add_edge_if_not_present(u, v_map[w], midfix);
858 }
859 }
860 }
861 } while (jt->second.optional && jt != info.begin() && (jt--)->first);
862
863 if (jt->second.optional) {
864 assert(!jt->second.exits.empty());
865 NFAVertex v = v_map[jt->second.exits.front()];
866 for (auto w : adjacent_vertices_range(v, midfix)) {
867 add_edge(midfix.startDs, w, midfix);
868 }
869 }
870 }
871}
872
873static
874bool beginsWithDotStar(const NGHolder &g) {
875 bool hasDot = false;
876
877 // We can ignore the successors of start, as matches that begin there will
878 // necessarily have a SOM of 0.
879
880 set<NFAVertex> succ;
881 insert(&succ, adjacent_vertices(g.startDs, g));
882 succ.erase(g.startDs);
883
884 for (auto v : succ) {
885 // We want 'dot' states that aren't virtual starts.
886 if (g[v].char_reach.all() &&
887 !g[v].assert_flags) {
888 hasDot = true;
889 set<NFAVertex> dotsucc;
890 insert(&dotsucc, adjacent_vertices(v, g));
891 if (dotsucc != succ) {
892 DEBUG_PRINTF("failed dot-star succ check\n");
893 return false;
894 }
895 }
896 }
897
898 if (hasDot) {
899 DEBUG_PRINTF("begins with dot-star\n");
900 }
901 return hasDot;
902}
903
904static
905bool buildMidfix(NG &ng, const som_plan &item, const u32 som_slot_in,
906 const u32 som_slot_out) {
907 assert(item.prefix);
908 assert(hasCorrectlyNumberedVertices(*item.prefix));
909
910 /* setup escaper for second som_location if required */
911 if (item.escapes.any()) {
912 if (!createEscaper(ng, *item.prefix, item.escapes, som_slot_out)) {
913 return false;
914 }
915 }
916
917 /* ensure we copy som from prev loc */
918 setMidfixReports(ng.rm, item, som_slot_in, som_slot_out);
919
920 /* add second prefix/1st midfix */
921 if (!ng.addHolder(*item.prefix)) {
922 DEBUG_PRINTF("---addHolder failed---\n");
923 return false;
924 }
925
926 return true;
927}
928
929static
930bool isMandRegionBetween(map<u32, region_info>::const_iterator a,
931 map<u32, region_info>::const_iterator b) {
932 while (b != a) {
933 if (!b->second.optional) {
934 return true;
935 }
936 --b;
937 }
938
939 return false;
940}
941
942// Attempts to advance the current plan. Returns true if we advance to the end
943// (woot!); updates picked, plan and bad_region.
944static
945bool advancePlan(const NGHolder &g,
946 const unordered_map<NFAVertex, u32> &regions,
947 const NGHolder &prefix, bool stuck,
948 map<u32, region_info>::const_iterator &picked,
949 const map<u32, region_info>::const_iterator furthest,
950 const map<u32, region_info>::const_iterator furthest_lock,
951 const CharReach &next_escapes, som_plan &plan,
952 u32 *bad_region) {
953 u32 bad_region_r = 0;
954 u32 bad_region_x = 0;
955 u32 bad_region_e = 0;
956 DEBUG_PRINTF("curr %u\n", picked->first);
957
958 if (sentClearsTail(g, regions, prefix, furthest->first, &bad_region_r)) {
959 plan.is_reset = true;
960 picked = furthest;
961 DEBUG_PRINTF("Prefix clears tail, woot!\n");
962 return true;
963 } else {
964 DEBUG_PRINTF("Reset failed, first bad region %u\n", bad_region_r);
965 }
966
967 if (stuck) {
968 u32 to_region = furthest_lock->first;
969 if (validateXSL(g, regions, to_region, next_escapes, &bad_region_x)) {
970 DEBUG_PRINTF("XSL\n");
971 picked = furthest_lock;
972 plan.escapes = next_escapes;
973 return true;
974 } else {
975 DEBUG_PRINTF("XSL failed, first bad region %u\n", bad_region_x);
976 }
977
978 if (validateEXSL(g, regions, to_region, next_escapes, prefix,
979 &bad_region_e)) {
980 DEBUG_PRINTF("EXSL\n");
981 picked = furthest_lock;
982 plan.escapes = next_escapes;
983 return true;
984 } else {
985 DEBUG_PRINTF("EXSL failed, first bad region %u\n", bad_region_e);
986 }
987 } else {
988 DEBUG_PRINTF("!stuck, skipped XSL and EXSL\n");
989 }
990
991 assert(!plan.is_reset);
992
993 *bad_region = max(bad_region_x, bad_region_e);
994 if (bad_region_r >= *bad_region) {
995 *bad_region = bad_region_r;
996 plan.is_reset = true;
997 plan.escapes.clear();
998 picked = furthest;
999 } else {
1000 picked = furthest_lock;
1001 plan.escapes = next_escapes;
1002 }
1003
1004 DEBUG_PRINTF("first bad region now %u\n", *bad_region);
1005 return false;
1006}
1007
1008static
1009bool addPlan(vector<som_plan> &plan, u32 parent) {
1010 DEBUG_PRINTF("adding plan %zu with parent %u\n", plan.size(),
1011 parent);
1012
1013 if (plan.size() >= MAX_SOM_PLANS) {
1014 DEBUG_PRINTF("too many plans!\n");
1015 return false;
1016 }
1017
1018 plan.emplace_back(nullptr, CharReach(), false, parent);
1019 return true;
1020}
1021
1022// Fetches all preds of {accept, acceptEod} for this graph.
1023static
1024void addReporterVertices(const NGHolder &g, vector<NFAVertex> &reporters) {
1025 set<NFAVertex> tmp;
1026 insert(&tmp, inv_adjacent_vertices(g.accept, g));
1027 insert(&tmp, inv_adjacent_vertices(g.acceptEod, g));
1028 tmp.erase(g.accept);
1029
1030#ifdef DEBUG
1031 DEBUG_PRINTF("add reporters:");
1032 for (UNUSED auto v : tmp) {
1033 printf(" %zu", g[v].index);
1034 }
1035 printf("\n");
1036#endif
1037
1038 reporters.insert(reporters.end(), tmp.begin(), tmp.end());
1039}
1040
1041// Fetches all preds of {accept, acceptEod} in this region.
1042static
1043void addReporterVertices(const region_info &r, const NGHolder &g,
1044 vector<NFAVertex> &reporters) {
1045 for (auto v : r.exits) {
1046 if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
1047 DEBUG_PRINTF("add reporter %zu\n", g[v].index);
1048 reporters.push_back(v);
1049 }
1050 }
1051}
1052
1053// Fetches the mappings of all preds of {accept, acceptEod} in this region.
1054static
1055void addMappedReporterVertices(const region_info &r, const NGHolder &g,
1056 const unordered_map<NFAVertex, NFAVertex> &mapping,
1057 vector<NFAVertex> &reporters) {
1058 for (auto v : r.exits) {
1059 if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
1060 DEBUG_PRINTF("adding v=%zu\n", g[v].index);
1061 auto it = mapping.find(v);
1062 assert(it != mapping.end());
1063 reporters.push_back(it->second);
1064 }
1065 }
1066}
1067
1068// Clone a version of the graph, but only including the in-edges of `enter'
1069// from earlier regions.
1070static
1071void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g,
1072 const unordered_map<NFAVertex, u32> &regions,
1073 NFAVertex entry, const vector<NFAVertex> &enters,
1074 unordered_map<NFAVertex, NFAVertex> &orig_to_copy) {
1075 orig_to_copy.clear();
1076 cloneHolder(out, g, &orig_to_copy);
1077
1078 assert(contains(orig_to_copy, entry));
1079 const u32 region = regions.at(entry);
1080
1081 for (auto v : enters) {
1082 if (v == entry) {
1083 continue;
1084 }
1085 assert(contains(orig_to_copy, v));
1086
1087 for (auto u : inv_adjacent_vertices_range(v, g)) {
1088 if (regions.at(u) < region) {
1089 assert(edge(orig_to_copy[u], orig_to_copy[v], out).second);
1090 remove_edge(orig_to_copy[u], orig_to_copy[v], out);
1091 }
1092 }
1093 }
1094
1095 pruneUseless(out);
1096}
1097
1098static
1099void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> &regions,
1100 vector<NFAVertex> &enters) {
1101 assert(!enters.empty());
1102 const u32 split_region = regions.at(enters.front());
1103
1104 vector<NFAVertex> new_enters;
1105
1106 // Gather the list of vertices in the split region and subsequent regions.
1107 vector<NFAVertex> tail_vertices;
1108 for (auto v : vertices_range(g)) {
1109 if (is_special(v, g) || regions.at(v) < split_region) {
1110 continue;
1111 }
1112 tail_vertices.push_back(v);
1113 }
1114
1115 for (auto enter : enters) {
1116 DEBUG_PRINTF("processing enter %zu\n", g[enter].index);
1117 map<NFAVertex, NFAVertex> orig_to_copy;
1118
1119 // Make a copy of all of the tail vertices, storing region info along
1120 // the way.
1121 for (auto v : tail_vertices) {
1122 auto v2 = clone_vertex(g, v);
1123 orig_to_copy[v] = v2;
1124 regions[v2] = regions.at(v);
1125 }
1126
1127 // Wire up the edges: edges from previous regions come from the
1128 // original vertices, while edges internal to and beyond the split
1129 // region go to the copies.
1130
1131 for (const auto &m : orig_to_copy) {
1132 NFAVertex v = m.first, v2 = m.second;
1133
1134 for (const auto &e : out_edges_range(v, g)) {
1135 NFAVertex t = target(e, g);
1136 u32 t_region = regions.at(t);
1137 if (t_region >= split_region && !is_special(t, g)) {
1138 assert(contains(orig_to_copy, t));
1139 t = orig_to_copy[t];
1140 }
1141 add_edge_if_not_present(v2, t, g[e], g);
1142 }
1143
1144 for (const auto &e : in_edges_range(v, g)) {
1145 NFAVertex u = source(e, g);
1146 if (regions.at(u) >= split_region && !is_special(u, g)) {
1147 assert(contains(orig_to_copy, u));
1148 u = orig_to_copy[u];
1149 }
1150 add_edge_if_not_present(u, v2, g[e], g);
1151 }
1152
1153 }
1154
1155 // Clear the in-edges from earlier regions of the OTHER enters for this
1156 // copy of the split region.
1157 for (auto v : enters) {
1158 if (v == enter) {
1159 continue;
1160 }
1161
1162 remove_in_edge_if(orig_to_copy[v],
1163 [&](const NFAEdge &e) {
1164 NFAVertex u = source(e, g);
1165 return regions.at(u) < split_region;
1166 }, g);
1167 }
1168
1169 new_enters.push_back(orig_to_copy[enter]);
1170 }
1171
1172 // Remove the original set of tail vertices.
1173 remove_vertices(tail_vertices, g);
1174 pruneUseless(g);
1175 regions = assignRegions(g);
1176
1177 enters.swap(new_enters);
1178}
1179
1180static
1181bool doTreePlanningIntl(NGHolder &g,
1182 const unordered_map<NFAVertex, u32> &regions,
1183 const map<u32, region_info> &info,
1184 map<u32, region_info>::const_iterator picked, u32 bad_region,
1185 u32 parent_plan,
1186 const unordered_map<NFAVertex, NFAVertex> &copy_to_orig,
1187 vector<som_plan> &plan, const Grey &grey) {
1188 assert(picked != info.end());
1189
1190 DEBUG_PRINTF("picked=%u\n", picked->first);
1191 DEBUG_PRINTF("parent is %u\n", parent_plan);
1192
1193 map<u32, region_info>::const_iterator furthest;
1194
1195 bool to_end = false;
1196 while (!to_end) {
1197 DEBUG_PRINTF("picked is %u\n", picked->first);
1198 DEBUG_PRINTF("first bad region now %u\n", bad_region);
1199
1200 furthest = info.find(bad_region); /* first bad */
1201 if (furthest == info.end()) {
1202 DEBUG_PRINTF("no partition\n");
1203 return false;
1204 }
1205 --furthest; /* last region we can establish som for */
1206
1207 if (furthest->first <= picked->first) {
1208 DEBUG_PRINTF("failed to make any progress\n");
1209 return false;
1210 }
1211
1212 map<u32, region_info>::const_iterator furthest_lock = furthest;
1213 CharReach next_escapes;
1214 bool lock_found;
1215 /* The last possible lock in the range that we examine should be the
1216 * best. If the previous plan is a lock, this follow as any early lock
1217 * must have a reach that is a subset of the last plan's lock. If the
1218 * last plan is a resetting plan ..., ?is this true? */
1219 do {
1220 lock_found = isPossibleLock(g, furthest_lock, info,
1221 &next_escapes);
1222 } while (!lock_found && (--furthest_lock)->first > picked->first);
1223 DEBUG_PRINTF("lock possible? %d\n", (int)lock_found);
1224
1225 if (lock_found && !isMandRegionBetween(picked, furthest_lock)) {
1226 lock_found = false;
1227 }
1228
1229 if (!isMandRegionBetween(picked, furthest)) {
1230 return false;
1231 }
1232
1233 /* There is no certainty that the som at a reset location will always
1234 * go forward */
1235 if (plan[parent_plan].is_reset && lock_found) {
1236 NGHolder midfix;
1237 DEBUG_PRINTF("checking if midfix is suitable for lock\n");
1238 fillHolderForLockCheck(&midfix, g, info, furthest_lock);
1239
1240 if (!firstMatchIsFirst(midfix)) {
1241 DEBUG_PRINTF("not stuck\n");
1242 lock_found = false;
1243 }
1244 }
1245
1246 if (!addPlan(plan, parent_plan)) {
1247 return false;
1248 }
1249
1250 to_end = false;
1251
1252 if (lock_found && next_escapes.none()) {
1253 picked = furthest_lock;
1254 to_end = true;
1255 }
1256
1257 if (!to_end) {
1258 NGHolder conservative_midfix; /* for use in reset, exsl analysis */
1259 fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
1260 dumpHolder(conservative_midfix, 15, "som_pathmidfix", grey);
1261
1262 u32 old_bad_region = bad_region;
1263 to_end = advancePlan(g, regions, conservative_midfix, lock_found,
1264 picked, furthest, furthest_lock, next_escapes,
1265 plan.back(), &bad_region);
1266 if (!to_end
1267 && bad_region <= old_bad_region) { /* we failed to progress */
1268 DEBUG_PRINTF("failed to make any progress\n");
1269 return false;
1270 }
1271 }
1272
1273 /* handle direct edge to accepts from region */
1274 if (edge(furthest->second.exits.front(), g.accept, g).second
1275 || edge(furthest->second.exits.front(), g.acceptEod, g).second) {
1276 map<u32, region_info>::const_iterator it = furthest;
1277 do {
1278 addMappedReporterVertices(it->second, g, copy_to_orig,
1279 plan.back().reporters_in);
1280 } while (it != info.begin() && it->second.optional && (it--)->first);
1281 }
1282
1283 /* create second prefix */
1284 plan.back().prefix = makePrefix(g, regions, furthest->second,
1285 next(furthest)->second);
1286 parent_plan = plan.size() - 1;
1287 }
1288
1289 // The last region contributes reporters. If it's optional, the regions
1290 // before it do as well.
1291 map<u32, region_info>::const_reverse_iterator it = info.rbegin();
1292 do {
1293 DEBUG_PRINTF("add mapped reporters for region %u\n", it->first);
1294 addMappedReporterVertices(it->second, g, copy_to_orig,
1295 plan.back().reporters);
1296 } while (it->second.optional && it != info.rend() &&
1297 (++it)->first > furthest->first);
1298
1299 return true;
1300}
1301
1302static
1303bool doTreePlanning(NGHolder &g,
1304 map<u32, region_info>::const_iterator presplit,
1305 map<u32, region_info>::const_iterator picked,
1306 vector<som_plan> &plan, const Grey &grey) {
1307 DEBUG_PRINTF("picked is %u\n", picked->first);
1308 DEBUG_PRINTF("presplit is %u\n", presplit->first);
1309
1310 map<u32, region_info>::const_iterator splitter = next(presplit);
1311 vector<NFAVertex> enters = splitter->second.enters; // mutable copy
1312 DEBUG_PRINTF("problem region has %zu entry vertices\n", enters.size());
1313
1314 if (enters.size() <= 1) {
1315 // TODO: Splitting a region with one entry won't get us anywhere, but
1316 // it shouldn't create buggy analyses either. See UE-1892.
1317 DEBUG_PRINTF("nothing to split\n");
1318 return false;
1319 }
1320
1321 if (plan.size() + enters.size() > MAX_SOM_PLANS) {
1322 DEBUG_PRINTF("splitting this tree would hit the plan limit.\n");
1323 return false;
1324 }
1325
1326 assert(!plan.empty());
1327 const u32 parent_plan = plan.size() - 1;
1328
1329 // Make a copy of the graph, with the subgraph under each enter vertex
1330 // duplicated without the edges into the other enter vertices.
1331 // NOTE WELL: this will invalidate 'info' from the split point, but it's
1332 // OK... we don't use it after this.
1333 auto g_regions = assignRegions(g);
1334 expandGraph(g, g_regions, enters);
1335 dumpHolder(g, g_regions, 14, "som_expandedtree", grey);
1336
1337 for (auto v : enters) {
1338 DEBUG_PRINTF("enter %zu\n", g[v].index);
1339
1340 // For this entry vertex, construct a version of the graph without the
1341 // other entries in this region (g_path), and calculate its depths and
1342 // regions.
1343
1344 NGHolder g_path;
1345 unordered_map<NFAVertex, NFAVertex> orig_to_copy;
1346 cloneGraphWithOneEntry(g_path, g, g_regions, v, enters, orig_to_copy);
1347 auto regions = assignRegions(g_path);
1348 dumpHolder(g_path, regions, 14, "som_treepath", grey);
1349
1350 map<u32, region_info> path_info;
1351 buildRegionMapping(g_path, regions, path_info);
1352
1353 // Translate 'picked' to the corresponding region iterator over the
1354 // g_path graph. we can't trust the numbering, so we use a vertex
1355 // instead.
1356 NFAVertex picked_v = picked->second.enters.front();
1357 assert(contains(orig_to_copy, picked_v));
1358 u32 picked_region = regions.at(orig_to_copy[picked_v]);
1359 map<u32, region_info>::const_iterator path_pick =
1360 path_info.find(picked_region);
1361 if (path_pick == path_info.end()) {
1362 assert(0); // odd
1363 return false;
1364 }
1365
1366 // Similarly, find our bad_region.
1367 assert(contains(orig_to_copy, v));
1368 u32 bad_region = regions.at(orig_to_copy[v]);
1369
1370 // It's possible that the region may have grown to include its
1371 // successors, in which case we (currently) run screaming. Just
1372 // checking the size should be sufficient here.
1373 if (picked->second.full.size() != path_pick->second.full.size()) {
1374 DEBUG_PRINTF("picked region has grown, bailing\n");
1375 return false;
1376 }
1377
1378 // Construct reverse mapping from vertices in g_path to g.
1379 unordered_map<NFAVertex, NFAVertex> copy_to_orig;
1380 for (const auto &m : orig_to_copy) {
1381 copy_to_orig.insert(make_pair(m.second, m.first));
1382 }
1383
1384 bool to_end = doTreePlanningIntl(g_path, regions, path_info, path_pick,
1385 bad_region, parent_plan,
1386 copy_to_orig, plan, grey);
1387 if (!to_end) {
1388 return false;
1389 }
1390 }
1391
1392 return true;
1393}
1394
1395enum dsp_behaviour {
1396 ALLOW_MODIFY_HOLDER,
1397 DISALLOW_MODIFY_HOLDER /* say no to tree planning */
1398};
1399
1400static
1401bool doSomPlanning(NGHolder &g, bool stuck_in,
1402 const unordered_map<NFAVertex, u32> &regions,
1403 const map<u32, region_info> &info,
1404 map<u32, region_info>::const_iterator picked,
1405 vector<som_plan> &plan,
1406 const Grey &grey,
1407 dsp_behaviour behaviour = ALLOW_MODIFY_HOLDER) {
1408 DEBUG_PRINTF("in picked is %u\n", picked->first);
1409
1410 /* Need to verify how far the lock covers */
1411 u32 bad_region;
1412 NGHolder *ap_pref = plan.back().prefix.get();
1413 NGHolder ap_temp;
1414 if (hasBigCycles(*ap_pref)) {
1415 fillRoughMidfix(&ap_temp, g, regions, info, picked);
1416 ap_pref = &ap_temp;
1417 }
1418
1419 bool to_end = advancePlan(g, regions, *ap_pref, stuck_in, picked,
1420 picked, picked, plan.back().escapes,
1421 plan.back(), &bad_region);
1422
1423 if (to_end) {
1424 DEBUG_PRINTF("advanced through the whole graph in one go!\n");
1425 addReporterVertices(g, plan.back().reporters);
1426 return true;
1427 }
1428
1429 map<u32, region_info>::const_iterator prev_furthest = picked;
1430 map<u32, region_info>::const_iterator furthest;
1431
1432 furthest = info.find(bad_region); /* first bad */
1433 if (furthest == info.begin() || furthest == info.end()) {
1434 DEBUG_PRINTF("no partition\n");
1435 return false;
1436 }
1437 --furthest; /* last region we can establish som for */
1438
1439 if (furthest->first <= picked->first) {
1440 do_tree:
1441 /* unable to establish SoM past the last picked region */
1442 if (behaviour == DISALLOW_MODIFY_HOLDER) {
1443 /* tree planning mutates the graph */
1444 return false;
1445 }
1446
1447 DEBUG_PRINTF("failed to make any progress\n");
1448 assert(!plan.empty());
1449 if (plan.size() == 1) {
1450 DEBUG_PRINTF("not handling initial alternations yet\n");
1451 return false;
1452 }
1453 plan.pop_back();
1454 return doTreePlanning(g, furthest, prev_furthest, plan, grey);
1455 }
1456
1457 furthest = picked;
1458 while (!to_end) {
1459 prev_furthest = furthest;
1460
1461 DEBUG_PRINTF("prev further is %u\n", prev_furthest->first);
1462 DEBUG_PRINTF("first bad region now %u\n", bad_region);
1463
1464 furthest = info.find(bad_region); /* first bad */
1465 if (furthest == info.begin() || furthest == info.end()) {
1466 DEBUG_PRINTF("no partition\n");
1467 return false;
1468 }
1469 --furthest; /* last region we can establish som for */
1470
1471 map<u32, region_info>::const_iterator furthest_lock = furthest;
1472 CharReach next_escapes;
1473 bool stuck;
1474 do {
1475 stuck = isPossibleLock(g, furthest_lock, info, &next_escapes);
1476 } while (!stuck && (--furthest_lock)->first > prev_furthest->first);
1477 DEBUG_PRINTF("lock possible? %d\n", (int)stuck);
1478 DEBUG_PRINTF("furthest_lock=%u\n", furthest_lock->first);
1479
1480 if (stuck && !isMandRegionBetween(prev_furthest, furthest_lock)) {
1481 stuck = false;
1482 }
1483
1484 if (!isMandRegionBetween(prev_furthest, furthest)) {
1485 DEBUG_PRINTF("no mand region between %u and %u\n",
1486 prev_furthest->first, furthest->first);
1487 return false;
1488 }
1489
1490 /* There is no certainty that the som at a reset location will always
1491 * go forward */
1492 if (plan.back().is_reset && stuck) {
1493 NGHolder midfix;
1494 fillHolderForLockCheck(&midfix, g, info, furthest_lock);
1495
1496 DEBUG_PRINTF("checking if midfix is suitable for lock\n");
1497 if (!firstMatchIsFirst(midfix)) {
1498 DEBUG_PRINTF("not stuck\n");
1499 stuck = false;
1500 }
1501 }
1502
1503 assert(!plan.empty());
1504 if (!addPlan(plan, plan.size() - 1)) {
1505 return false;
1506 }
1507
1508 to_end = false;
1509
1510 if (stuck && next_escapes.none()) {
1511 picked = furthest_lock;
1512 to_end = true;
1513 }
1514
1515 if (!to_end) {
1516 NGHolder conservative_midfix; /* for use in reset, exsl analysis */
1517 fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
1518
1519 u32 old_bad_region = bad_region;
1520 to_end = advancePlan(g, regions, conservative_midfix, stuck, picked,
1521 furthest, furthest_lock, next_escapes,
1522 plan.back(), &bad_region);
1523
1524 if (!to_end
1525 && bad_region <= old_bad_region) { /* we failed to progress */
1526 goto do_tree;
1527 }
1528 }
1529
1530 /* handle direct edge to accepts from region */
1531 if (edge(furthest->second.exits.front(), g.accept, g).second
1532 || edge(furthest->second.exits.front(), g.acceptEod, g).second) {
1533 map<u32, region_info>::const_iterator it = furthest;
1534 do {
1535 DEBUG_PRINTF("direct edge to accept from region %u\n",
1536 it->first);
1537 addReporterVertices(it->second, g, plan.back().reporters_in);
1538 } while (it != info.begin() && it->second.optional
1539 && (it--)->first);
1540 }
1541
1542 /* create second prefix */
1543 plan.back().prefix = makePrefix(g, regions, furthest->second,
1544 next(furthest)->second);
1545 }
1546 DEBUG_PRINTF("(final) picked is %u\n", picked->first);
1547
1548 // The last region contributes reporters. If it's optional, the regions
1549 // before it do as well.
1550 map<u32, region_info>::const_reverse_iterator it = info.rbegin();
1551 do {
1552 DEBUG_PRINTF("region %u contributes reporters to last plan\n",
1553 it->first);
1554 addReporterVertices(it->second, g, plan.back().reporters);
1555 } while (it->second.optional && it != info.rend() &&
1556 (++it)->first > furthest->first);
1557
1558 DEBUG_PRINTF("done!\n");
1559 return true;
1560}
1561
1562static
1563void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p,
1564 UNUSED size_t num) {
1565#if defined(DEBUG) || defined(DUMP_PLANS)
1566 DEBUG_PRINTF("plan %zu: prefix=%p, escapes=%s, is_reset=%d, "
1567 "parent=%u\n",
1568 num, p.prefix.get(),
1569 describeClass(p.escapes, 20, CC_OUT_TEXT).c_str(),
1570 p.is_reset, p.parent);
1571 printf(" reporters:");
1572 for (auto v : p.reporters) {
1573 printf(" %zu", g[v].index);
1574 }
1575 printf("\n");
1576 printf(" reporters_in:");
1577 for (auto v : p.reporters_in) {
1578 printf(" %zu", g[v].index);
1579 }
1580 printf("\n");
1581#endif
1582}
1583
1584/**
1585 * Note: if we fail to build a midfix/ng.addHolder, we throw a pattern too
1586 * large exception as (1) if previous ng modification have been applied (other
1587 * midfixes have been applied), ng will be an undefined state on return and (2)
1588 * if the head of a pattern cannot be implemented we are generally unable to
1589 * implement the full pattern.
1590 */
1591static
1592void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id,
1593 NGHolder &g, vector<som_plan> &plan,
1594 const u32 first_som_slot) {
1595 ReportManager &rm = ng.rm;
1596 SomSlotManager &ssm = ng.ssm;
1597
1598 DEBUG_PRINTF("%zu plans\n", plan.size());
1599 assert(plan.size() <= MAX_SOM_PLANS);
1600 assert(!plan.empty());
1601
1602 vector<u32> som_slots(plan.size());
1603 som_slots[0] = first_som_slot;
1604
1605 // Root plan, which already has a SOM slot assigned (first_som_slot).
1606 dumpSomPlan(g, plan.front(), 0);
1607 dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0,
1608 ng.cc.grey);
1609 assert(plan.front().prefix);
1610 if (plan.front().escapes.any() && !plan.front().is_reset) {
1611 /* setup escaper for first som location */
1612 if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes,
1613 first_som_slot)) {
1614 throw CompileError(expr.index, "Pattern is too large.");
1615 }
1616 }
1617
1618 assert(plan.front().reporters_in.empty());
1619 updateReportToUseRecordedSom(rm, g, plan.front().reporters, first_som_slot);
1620
1621 // Tree of plans, encoded in a vector.
1622 vector<som_plan>::const_iterator it = plan.begin();
1623 for (++it; it != plan.end(); ++it) {
1624 const u32 plan_num = it - plan.begin();
1625 dumpSomPlan(g, *it, plan_num);
1626 dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id,
1627 plan_num, ng.cc.grey);
1628
1629 assert(it->parent < plan_num);
1630 u32 som_slot_in = som_slots[it->parent];
1631 u32 som_slot_out = ssm.getSomSlot(*it->prefix, it->escapes,
1632 it->is_reset, som_slot_in);
1633 som_slots[plan_num] = som_slot_out;
1634
1635 assert(!it->no_implement);
1636 if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) {
1637 throw CompileError(expr.index, "Pattern is too large.");
1638 }
1639 updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in);
1640 updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out);
1641 }
1642
1643 /* create prefix to set the som_loc */
1644 if (!plan.front().no_implement) {
1645 renumber_vertices(*plan.front().prefix);
1646 assert(plan.front().prefix->kind == NFA_OUTFIX);
1647 if (!ng.addHolder(*plan.front().prefix)) {
1648 throw CompileError(expr.index, "Pattern is too large.");
1649 }
1650 }
1651}
1652
1653static
1654void anchorStarts(NGHolder &g) {
1655 vector<NFAEdge> dead;
1656 for (const auto &e : out_edges_range(g.startDs, g)) {
1657 NFAVertex v = target(e, g);
1658 if (v == g.startDs) {
1659 continue;
1660 }
1661 add_edge_if_not_present(g.start, v, g[e], g);
1662 dead.push_back(e);
1663 }
1664 remove_edges(dead, g);
1665}
1666
1667static
1668void setZeroReports(NGHolder &g) {
1669 set<NFAVertex> acceptors;
1670 insert(&acceptors, inv_adjacent_vertices(g.accept, g));
1671 insert(&acceptors, inv_adjacent_vertices(g.acceptEod, g));
1672 acceptors.erase(g.accept);
1673
1674 for (auto v : vertices_range(g)) {
1675 auto &reports = g[v].reports;
1676 reports.clear();
1677
1678 if (!contains(acceptors, v)) {
1679 continue;
1680 }
1681
1682 // We use the report ID to store the offset adjustment used for virtual
1683 // starts.
1684
1685 if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
1686 reports.insert(1);
1687 } else {
1688 reports.insert(0);
1689 }
1690 }
1691}
1692
1693/* updates the reports on all vertices leading to the sink */
1694static
1695void makeSomRevNfaReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
1696 const ReportID report, const u32 comp_id) {
1697 // Construct replacement report.
1698 Report ir = rm.getReport(report);
1699 ir.type = EXTERNAL_CALLBACK_SOM_REV_NFA;
1700 ir.revNfaIndex = comp_id;
1701 ReportID new_report = rm.getInternalId(ir);
1702
1703 for (auto v : inv_adjacent_vertices_range(sink, g)) {
1704 if (v == g.accept) {
1705 continue;
1706 }
1707
1708 auto &r = g[v].reports;
1709 if (contains(r, report)) {
1710 r.erase(report);
1711 r.insert(new_report);
1712 }
1713 }
1714}
1715
1716static
1717void clearProperInEdges(NGHolder &g, const NFAVertex sink) {
1718 vector<NFAEdge> dead;
1719 for (const auto &e : in_edges_range(sink, g)) {
1720 if (source(e, g) == g.accept) {
1721 continue;
1722 }
1723 dead.push_back(e);
1724 }
1725
1726 if (dead.empty()) {
1727 return;
1728 }
1729
1730 remove_edges(dead, g);
1731 pruneUseless(g, false);
1732}
1733
1734namespace {
1735struct SomRevNfa {
1736 SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr<NFA> n)
1737 : sink(s), report(r), nfa(move(n)) {}
1738 NFAVertex sink;
1739 ReportID report;
1740 bytecode_ptr<NFA> nfa;
1741};
1742}
1743
1744static
1745bytecode_ptr<NFA> makeBareSomRevNfa(const NGHolder &g,
1746 const CompileContext &cc) {
1747 // Create a reversed anchored version of this NFA which fires a zero report
1748 // ID on accept.
1749 NGHolder g_rev;
1750 reverseHolder(g, g_rev);
1751 anchorStarts(g_rev);
1752 setZeroReports(g_rev);
1753
1754 // Prep for actual construction.
1755 renumber_vertices(g_rev);
1756 g_rev.kind = NFA_REV_PREFIX;
1757 reduceGraphEquivalences(g_rev, cc);
1758 removeRedundancy(g_rev, SOM_NONE);
1759
1760 DEBUG_PRINTF("building a rev NFA with %zu vertices\n", num_vertices(g_rev));
1761
1762 auto nfa = constructReversedNFA(g_rev, cc);
1763 if (!nfa) {
1764 return nfa;
1765 }
1766
1767 // Set some useful properties.
1768 depth maxWidth = findMaxWidth(g);
1769 if (maxWidth.is_finite()) {
1770 nfa->maxWidth = (u32)maxWidth;
1771 } else {
1772 nfa->maxWidth = 0;
1773 }
1774 depth minWidth = findMinWidth(g);
1775 nfa->minWidth = (u32)minWidth;
1776
1777 return nfa;
1778}
1779
1780static
1781bool makeSomRevNfa(vector<SomRevNfa> &som_nfas, const NGHolder &g,
1782 const ReportID report, const NFAVertex sink,
1783 const CompileContext &cc) {
1784 // Clone the graph with ONLY the given report vertices on the given sink.
1785 NGHolder g2;
1786 cloneHolder(g2, g);
1787 clearProperInEdges(g2, sink == g.accept ? g2.acceptEod : g2.accept);
1788 pruneAllOtherReports(g2, report);
1789
1790 if (in_degree(g2.accept, g2) == 0 && in_degree(g2.acceptEod, g2) == 1) {
1791 DEBUG_PRINTF("no work to do for this sink\n");
1792 return true;
1793 }
1794
1795 renumber_vertices(g2); // for findMinWidth, findMaxWidth.
1796
1797 auto nfa = makeBareSomRevNfa(g2, cc);
1798 if (!nfa) {
1799 DEBUG_PRINTF("couldn't build rev nfa\n");
1800 return false;
1801 }
1802
1803 som_nfas.emplace_back(sink, report, move(nfa));
1804 return true;
1805}
1806
1807static
1808bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) {
1809 ReportManager &rm = ng.rm;
1810
1811 // FIXME might want to work on a graph without extra redundancy?
1812 depth maxWidth = findMaxWidth(g);
1813 DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
1814
1815 if (maxWidth > depth(ng.maxSomRevHistoryAvailable)) {
1816 DEBUG_PRINTF("too wide\n");
1817 return false;
1818 }
1819
1820 set<ReportID> reports = all_reports(g);
1821 DEBUG_PRINTF("%zu reports\n", reports.size());
1822
1823 // We distinguish between reports and accept/acceptEod sinks in order to
1824 // correctly handle cases which do different things on eod/normal accepts.
1825 // Later, it might be more elegant to do this with a single NFA and
1826 // multi-tops.
1827
1828 vector<SomRevNfa> som_nfas;
1829
1830 for (auto report : reports) {
1831 if (!makeSomRevNfa(som_nfas, g, report, g.accept, cc)) {
1832 return false;
1833 }
1834 if (!makeSomRevNfa(som_nfas, g, report, g.acceptEod, cc)) {
1835 return false;
1836 }
1837 }
1838
1839 for (auto &som_nfa : som_nfas) {
1840 assert(som_nfa.nfa);
1841
1842 // Transfer ownership of the NFA to the SOM slot manager.
1843 u32 comp_id = ng.ssm.addRevNfa(move(som_nfa.nfa), maxWidth);
1844
1845 // Replace this report on 'g' with a SOM_REV_NFA report pointing at our
1846 // new component.
1847 makeSomRevNfaReports(rm, g, som_nfa.sink, som_nfa.report, comp_id);
1848 }
1849
1850 if (ng.cc.streaming) {
1851 assert(ng.ssm.somHistoryRequired() <=
1852 max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
1853 }
1854
1855 return true;
1856}
1857
1858static
1859u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g,
1860 const CompileContext &cc) {
1861 depth maxWidth = findMaxWidth(g);
1862
1863 assert(maxWidth <= depth(ng.maxSomRevHistoryAvailable));
1864 assert(all_reports(g).size() == 1);
1865
1866 auto nfa = makeBareSomRevNfa(g, cc);
1867 if (!nfa) {
1868 throw CompileError(expr.index, "Pattern is too large.");
1869 }
1870
1871 if (ng.cc.streaming) {
1872 assert(ng.ssm.somHistoryRequired() <=
1873 max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
1874 }
1875
1876 return ng.ssm.addRevNfa(move(nfa), maxWidth);
1877}
1878
1879static
1880bool is_literable(const NGHolder &g, NFAVertex v) {
1881 const CharReach &cr = g[v].char_reach;
1882 return cr.count() == 1 || cr.isCaselessChar();
1883}
1884
1885static
1886void append(ue2_literal &s, const CharReach &cr) {
1887 assert(cr.count() == 1 || cr.isCaselessChar());
1888 s.push_back(cr.find_first(), cr.isCaselessChar());
1889}
1890
1891static
1892map<u32, region_info>::const_iterator findLaterLiteral(const NGHolder &g,
1893 const map<u32, region_info> &info,
1894 map<u32, region_info>::const_iterator lower_bound,
1895 ue2_literal &s_out, const Grey &grey) {
1896#define MIN_LITERAL_LENGTH 3
1897 s_out.clear();
1898 bool past_lower = false;
1899 ue2_literal s;
1900 map<u32, region_info>::const_iterator it;
1901 for (it = info.begin(); it != info.end(); ++it) {
1902 if (it == lower_bound) {
1903 past_lower = true;
1904 }
1905 if (!it->second.optional && it->second.dag
1906 && it->second.full.size() == 1
1907 && is_literable(g, it->second.full.front())) {
1908 append(s, g[it->second.full.front()].char_reach);
1909
1910 if (s.length() >= grey.maxHistoryAvailable && past_lower) {
1911 goto exit;
1912 }
1913 } else {
1914 if (past_lower && it != lower_bound
1915 && s.length() >= MIN_LITERAL_LENGTH) {
1916 --it;
1917 goto exit;
1918 }
1919 s.clear();
1920 }
1921 }
1922
1923 if (past_lower && it != lower_bound && s.length() >= MIN_LITERAL_LENGTH) {
1924 --it;
1925 s_out = s;
1926 return it;
1927 }
1928 exit:
1929 if (s.length() > grey.maxHistoryAvailable) {
1930 ue2_literal::const_iterator jt = s.end() - grey.maxHistoryAvailable;
1931 for (; jt != s.end(); ++jt) {
1932 s_out.push_back(*jt);
1933 }
1934 } else {
1935 s_out = s;
1936 }
1937 return it;
1938}
1939
1940static
1941bool attemptToBuildChainAfterSombe(SomSlotManager &ssm, NGHolder &g,
1942 const unordered_map<NFAVertex, u32> &regions,
1943 const map<u32, region_info> &info,
1944 map<u32, region_info>::const_iterator picked,
1945 const Grey &grey,
1946 vector<som_plan> *plan) {
1947 DEBUG_PRINTF("trying to chain from %u\n", picked->first);
1948 const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
1949
1950 shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
1951 next(picked)->second);
1952
1953 // Quick check to stop us from trying this on huge graphs, which causes us
1954 // to spend forever in ng_execute looking at cases that will most like
1955 // fail. See UE-2078.
1956 size_t prefix_size = num_vertices(*prefix);
1957 size_t total_size = num_vertices(g);
1958 assert(total_size >= prefix_size);
1959 if (total_size - prefix_size > MAX_SOMBE_CHAIN_VERTICES) {
1960 DEBUG_PRINTF("suffix has %zu vertices, fail\n",
1961 total_size - prefix_size);
1962 return false;
1963 }
1964
1965 clearReports(*prefix);
1966 for (auto u : inv_adjacent_vertices_range(prefix->accept, *prefix)) {
1967 (*prefix)[u].reports.insert(0);
1968 }
1969
1970 dumpHolder(*prefix, 0, "full_haiglit_prefix", grey);
1971
1972 CharReach escapes;
1973 bool stuck = isPossibleLock(g, picked, info, &escapes);
1974 if (stuck) {
1975 NGHolder gg;
1976 fillHolderForLockCheck(&gg, g, info, picked);
1977
1978 stuck = firstMatchIsFirst(gg);
1979 }
1980
1981 DEBUG_PRINTF("stuck = %d\n", (int)stuck);
1982
1983 // Note: no-one should ever pay attention to the root plan's som_loc_in.
1984 plan->emplace_back(prefix, escapes, false, 0);
1985 plan->back().no_implement = true;
1986
1987 dumpHolder(*plan->back().prefix, 22, "som_prefix", grey);
1988
1989 /* don't allow tree planning to mutate the graph */
1990 if (!doSomPlanning(g, stuck, regions, info, picked, *plan, grey,
1991 DISALLOW_MODIFY_HOLDER)) {
1992 // Rollback SOM locations.
1993 ssm.rollbackSomTo(numSomLocsBefore);
1994
1995 DEBUG_PRINTF("fail to chain\n");
1996 return false;
1997 }
1998
1999 return true;
2000}
2001
2002static
2003void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) {
2004 ReportID haig_report_id = rose.getNewNfaReport();
2005 DEBUG_PRINTF("setting report id of %u\n", haig_report_id);
2006
2007 clearReports(h);
2008 for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
2009 h[u].reports.clear();
2010 h[u].reports.insert(haig_report_id);
2011 }
2012}
2013
2014static
2015bool tryHaig(RoseBuild &rose, NGHolder &g,
2016 const unordered_map<NFAVertex, u32> &regions,
2017 som_type som, u32 somPrecision,
2018 map<u32, region_info>::const_iterator picked,
2019 shared_ptr<raw_som_dfa> *haig, shared_ptr<NGHolder> *haig_prefix,
2020 const Grey &grey) {
2021 DEBUG_PRINTF("trying to build a haig\n");
2022 shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
2023 next(picked)->second);
2024 prefix->kind = NFA_PREFIX;
2025 setReportOnHaigPrefix(rose, *prefix);
2026 dumpHolder(*prefix, 0, "haig_prefix", grey);
2027 vector<vector<CharReach> > triggers; /* empty for prefix */
2028 *haig = attemptToBuildHaig(*prefix, som, somPrecision, triggers, grey);
2029 if (!*haig) {
2030 DEBUG_PRINTF("failed to haig\n");
2031 return false;
2032 }
2033 *haig_prefix = prefix;
2034 return true;
2035}
2036
2037static
2038void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix,
2039 const shared_ptr<raw_som_dfa> &haig,
2040 const ue2_literal &lit, const set<ReportID> &reports) {
2041 assert(prefix && haig);
2042
2043 DEBUG_PRINTF("trying to build a sombe from %s\n", dumpString(lit).c_str());
2044
2045 RoseInGraph ig;
2046 RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
2047 RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
2048
2049 add_edge(s, v, RoseInEdgeProps(prefix, haig, lit.length()), ig);
2050
2051 assert(!reports.empty());
2052 RoseInVertex a = add_vertex(RoseInVertexProps::makeAccept(reports), ig);
2053 add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
2054
2055 calcVertexOffsets(ig);
2056
2057 UNUSED bool rv = tb.addSombeRose(ig);
2058 assert(rv); // TODO: recover from addRose failure
2059}
2060
2061static
2062sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
2063 u32 comp_id, som_type som,
2064 const unordered_map<NFAVertex, u32> &regions,
2065 const map<u32, region_info> &info,
2066 map<u32, region_info>::const_iterator lower_bound) {
2067 DEBUG_PRINTF("entry\n");
2068 assert(g.kind == NFA_OUTFIX);
2069 const CompileContext &cc = ng.cc;
2070 ReportManager &rm = ng.rm;
2071 SomSlotManager &ssm = ng.ssm;
2072
2073 if (!cc.grey.allowHaigLit) {
2074 return SOMBE_FAIL;
2075 }
2076
2077 const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
2078 u32 som_loc = ssm.getPrivateSomSlot();
2079
2080 if (!checkViolet(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) {
2081 // This is an optimisation: if we can't build a Haig from a portion of
2082 // the graph, then we won't be able to manage it as an outfix either
2083 // when we fall back.
2084 throw CompileError(expr.index, "Pattern is too large.");
2085 }
2086
2087 while (1) {
2088 DEBUG_PRINTF("lower bound is %u\n", lower_bound->first);
2089 ue2_literal s;
2090 map<u32, region_info>::const_iterator lit
2091 = findLaterLiteral(g, info, lower_bound, s, cc.grey);
2092 if (lit == info.end()) {
2093 DEBUG_PRINTF("failed to find literal\n");
2094 ssm.rollbackSomTo(numSomLocsBefore);
2095 return SOMBE_FAIL;
2096 }
2097 DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
2098 lit->first);
2099
2100 if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
2101 DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
2102 lower_bound = lit;
2103 ++lower_bound;
2104 continue;
2105 }
2106
2107 shared_ptr<raw_som_dfa> haig;
2108 shared_ptr<NGHolder> haig_prefix;
2109 map<u32, region_info>::const_iterator haig_reg = lit;
2110
2111 if (edge(lit->second.exits.front(), g.acceptEod, g).second) {
2112 /* TODO: handle */
2113 ssm.rollbackSomTo(numSomLocsBefore);
2114 return SOMBE_FAIL;
2115 }
2116
2117 advance(haig_reg, -(s32)s.length());
2118
2119 if (!haig_reg->first && haig_reg->second.full.size() == 2) {
2120 /* just starts */
2121
2122 /* TODO: make below assertion true, reset checks could be stronger
2123 * (12356)
2124 */
2125 /* assert(!attemptToBuildChainAfterSombe(ng, g, info, lit, cc.grey,
2126 &plan)); */
2127
2128 lower_bound = lit;
2129 ++lower_bound;
2130 continue; /* somebody else should have been able to chain */
2131 }
2132
2133 bool ok = true;
2134 set<ReportID> rep;
2135 if (next(lit) != info.end()) {
2136 /* non terminal literal */
2137
2138 /* TODO: handle edges to accept ? */
2139 vector<som_plan> plan;
2140 if (edge(lit->second.exits.front(), g.accept, g).second) {
2141 insert(&rep, g[lit->second.exits.front()].reports);
2142 remove_edge(lit->second.exits.front(), g.accept, g);
2143 g[lit->second.exits.front()].reports.clear();
2144
2145 /* Note: we can mess with the graph as this is the last literal
2146 * we will find and on failure the graph will be thrown away */
2147 }
2148
2149 ok = attemptToBuildChainAfterSombe(ssm, g, regions, info, lit,
2150 cc.grey, &plan);
2151 ok = ok && tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(),
2152 haig_reg, &haig, &haig_prefix, cc.grey);
2153
2154 if (!ok) {
2155 DEBUG_PRINTF(":( going to next attempt\n");
2156 goto next_try;
2157 }
2158
2159 implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
2160
2161 Report ir = makeCallback(0U, 0);
2162 assert(!plan.empty());
2163 if (plan.front().is_reset) {
2164 ir.type = INTERNAL_SOM_LOC_SET_FROM;
2165 } else {
2166 ir.type = INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE;
2167 }
2168 ir.onmatch = som_loc;
2169 rep.insert(rm.getInternalId(ir));
2170 } else {
2171 /* terminal literal */
2172 ok = tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), haig_reg,
2173 &haig, &haig_prefix, cc.grey);
2174
2175 /* find report */
2176 insert(&rep, g[lit->second.exits.front()].reports);
2177
2178 /* TODO: som_loc is unused */
2179 }
2180
2181 if (ok) {
2182 roseAddHaigLiteral(*ng.rose, haig_prefix, haig, s, rep);
2183 if (next(lit) != info.end()) {
2184 return SOMBE_HANDLED_INTERNAL;
2185 } else {
2186 ssm.rollbackSomTo(numSomLocsBefore);
2187 return SOMBE_HANDLED_ALL;
2188 }
2189 }
2190next_try:
2191 lower_bound = lit;
2192 ++lower_bound;
2193 }
2194 assert(0);
2195 return SOMBE_FAIL;
2196}
2197
2198static
2199bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits,
2200 set<NFAVertex> *terminals) {
2201 /* TODO: smarter (topo) */
2202#define MAX_LEADING_LITERALS 20
2203 set<NFAVertex> s_succ;
2204 insert(&s_succ, adjacent_vertices(g.start, g));
2205
2206 set<NFAVertex> sds_succ;
2207 insert(&sds_succ, adjacent_vertices(g.startDs, g));
2208
2209 if (!is_subset_of(s_succ, sds_succ)) {
2210 DEBUG_PRINTF("not floating\n");
2211 return false;
2212 }
2213
2214 sds_succ.erase(g.startDs);
2215
2216 map<NFAVertex, vector<ue2_literal> > curr;
2217 curr[g.startDs].push_back(ue2_literal());
2218
2219 map<NFAVertex, set<NFAVertex> > seen;
2220 map<NFAVertex, vector<ue2_literal> > next;
2221
2222 bool did_expansion = true;
2223 while (did_expansion) {
2224 did_expansion = false;
2225 u32 count = 0;
2226 assert(!curr.empty());
2227 for (const auto &m : curr) {
2228 const NFAVertex u = m.first;
2229 const vector<ue2_literal> &base = m.second;
2230 DEBUG_PRINTF("expanding from %zu\n", g[u].index);
2231 for (auto v : adjacent_vertices_range(u, g)) {
2232 if (v == g.startDs) {
2233 continue;
2234 }
2235 if (contains(seen[u], v)) {
2236 DEBUG_PRINTF("loop\n");
2237 goto skip_to_next_terminal;
2238 }
2239 if (is_any_accept(v, g) || is_match_vertex(v, g)) {
2240 DEBUG_PRINTF("match\n");
2241 goto skip_to_next_terminal;
2242 }
2243 if (g[v].char_reach.count() > 2 * MAX_LEADING_LITERALS) {
2244 DEBUG_PRINTF("wide\n");
2245 goto skip_to_next_terminal;
2246 }
2247 }
2248
2249 for (auto v : adjacent_vertices_range(u, g)) {
2250 assert(!contains(seen[u], v));
2251 if (v == g.startDs) {
2252 continue;
2253 }
2254 insert(&seen[v], seen[u]);
2255 seen[v].insert(v);
2256 CharReach cr = g[v].char_reach;
2257 vector<ue2_literal> &out = next[v];
2258
2259 DEBUG_PRINTF("expanding to %zu (|| = %zu)\n", g[v].index,
2260 cr.count());
2261 for (size_t c = cr.find_first(); c != CharReach::npos;
2262 c = cr.find_next(c)) {
2263 bool nocase = ourisalpha(c) && cr.test(mytoupper(c))
2264 && cr.test(mytolower(c));
2265
2266 if (nocase && (char)c == mytolower(c)) {
2267 continue; /* uppercase already handled us */
2268 }
2269
2270 for (const auto &lit : base) {
2271 if (count >= MAX_LEADING_LITERALS) {
2272 DEBUG_PRINTF("count %u\n", count);
2273 goto exit;
2274 }
2275 did_expansion = true;
2276 out.push_back(lit);
2277 out.back().push_back(c, nocase);
2278 count++;
2279 if (out.back().length() > MAX_MASK2_WIDTH
2280 && mixed_sensitivity(out.back())) {
2281 goto exit;
2282 }
2283
2284 }
2285 }
2286 }
2287 if (0) {
2288 skip_to_next_terminal:
2289 insert(&next[u], next[u].end(), base);
2290 count += base.size();
2291 if (count > MAX_LEADING_LITERALS) {
2292 DEBUG_PRINTF("count %u\n", count);
2293 goto exit;
2294 }
2295 }
2296 }
2297
2298 curr.swap(next);
2299 next.clear();
2300 };
2301 exit:;
2302 for (const auto &m : curr) {
2303 NFAVertex t = m.first;
2304 if (t == g.startDs) {
2305 assert(curr.size() == 1);
2306 return false;
2307 }
2308 assert(!is_special(t, g));
2309 terminals->insert(t);
2310 insert(lits, m.second);
2311 }
2312 assert(lits->size() <= MAX_LEADING_LITERALS);
2313 return !lits->empty();
2314}
2315
2316static
2317bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out,
2318 NGHolder *rhs) {
2319 DEBUG_PRINTF("looking for a leading literals\n");
2320
2321 set<NFAVertex> terms;
2322 if (!leadingLiterals(g, lit_out, &terms)) {
2323 return false;
2324 }
2325
2326 for (UNUSED const auto &lit : *lit_out) {
2327 DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(lit).c_str(),
2328 lit.length());
2329 }
2330
2331 /* need to validate that it is a clean split */
2332 assert(!terms.empty());
2333 set<NFAVertex> adj_term1;
2334 insert(&adj_term1, adjacent_vertices(*terms.begin(), g));
2335 for (auto v : terms) {
2336 DEBUG_PRINTF("term %zu\n", g[v].index);
2337 set<NFAVertex> temp;
2338 insert(&temp, adjacent_vertices(v, g));
2339 if (temp != adj_term1) {
2340 DEBUG_PRINTF("bad split\n");
2341 return false;
2342 }
2343 }
2344
2345 unordered_map<NFAVertex, NFAVertex> rhs_map;
2346 vector<NFAVertex> pivots;
2347 insert(&pivots, pivots.end(), adj_term1);
2348 splitRHS(g, pivots, rhs, &rhs_map);
2349
2350 assert(is_triggered(*rhs));
2351 return true;
2352}
2353
2354static
2355void findBestLiteral(const NGHolder &g,
2356 const unordered_map<NFAVertex, u32> &regions,
2357 ue2_literal *lit_out, NFAVertex *v,
2358 const CompileContext &cc) {
2359 map<u32, region_info> info;
2360 buildRegionMapping(g, regions, info, false);
2361
2362 ue2_literal best;
2363 NFAVertex best_v = NGHolder::null_vertex();
2364
2365 map<u32, region_info>::const_iterator lit = info.begin();
2366 while (1) {
2367 ue2_literal s;
2368 lit = findLaterLiteral(g, info, lit, s, cc.grey);
2369 if (lit == info.end()) {
2370 break;
2371 }
2372 DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
2373 lit->first);
2374
2375 if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
2376 DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
2377 ++lit;
2378 continue;
2379 }
2380
2381 if (s.length() > best.length()) {
2382 best = s;
2383 assert(!lit->second.exits.empty());
2384 best_v = lit->second.exits[0];
2385 }
2386
2387 ++lit;
2388 }
2389
2390 lit_out->swap(best);
2391 *v = best_v;
2392}
2393
2394static
2395bool splitOffBestLiteral(const NGHolder &g,
2396 const unordered_map<NFAVertex, u32> &regions,
2397 ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs,
2398 const CompileContext &cc) {
2399 NFAVertex v = NGHolder::null_vertex();
2400
2401 findBestLiteral(g, regions, lit_out, &v, cc);
2402 if (lit_out->empty()) {
2403 return false;
2404 }
2405
2406 DEBUG_PRINTF("literal is '%s'\n", dumpString(*lit_out).c_str());
2407
2408 unordered_map<NFAVertex, NFAVertex> lhs_map;
2409 unordered_map<NFAVertex, NFAVertex> rhs_map;
2410
2411 splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map);
2412
2413 DEBUG_PRINTF("v = %zu\n", g[v].index);
2414
2415 return true;
2416}
2417
2418/**
2419 * Replace the given graph's EXTERNAL_CALLBACK reports with
2420 * EXTERNAL_CALLBACK_SOM_PASS reports.
2421 */
2422void makeReportsSomPass(ReportManager &rm, NGHolder &g) {
2423 for (const auto &v : vertices_range(g)) {
2424 const auto &reports = g[v].reports;
2425 if (reports.empty()) {
2426 continue;
2427 }
2428
2429 flat_set<ReportID> new_reports;
2430 for (const ReportID &id : reports) {
2431 const Report &report = rm.getReport(id);
2432 if (report.type != EXTERNAL_CALLBACK) {
2433 new_reports.insert(id);
2434 continue;
2435 }
2436 Report report2 = report;
2437 report2.type = EXTERNAL_CALLBACK_SOM_PASS;
2438 new_reports.insert(rm.getInternalId(report2));
2439 }
2440
2441 g[v].reports = new_reports;
2442 }
2443}
2444
2445static
2446bool doLitHaigSom(NG &ng, NGHolder &g, som_type som) {
2447 ue2_literal lit;
2448 shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
2449 if (!ng.cc.grey.allowLitHaig) {
2450 return false;
2451 }
2452
2453 dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
2454
2455 if (!splitOffLeadingLiteral(g, &lit, &*rhs)) {
2456 DEBUG_PRINTF("no literal\n");
2457 return false;
2458 }
2459
2460 if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
2461 DEBUG_PRINTF("lit too short\n");
2462 return false;
2463 }
2464
2465 assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
2466
2467 makeReportsSomPass(ng.rm, *rhs);
2468
2469 dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
2470
2471 vector<vector<CharReach> > triggers;
2472 triggers.push_back(as_cr_seq(lit));
2473
2474 assert(rhs->kind == NFA_SUFFIX);
2475 shared_ptr<raw_som_dfa> haig
2476 = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
2477 ng.cc.grey, false /* lit implies adv som */);
2478 if (!haig) {
2479 DEBUG_PRINTF("failed to haig\n");
2480 return false;
2481 }
2482 DEBUG_PRINTF("haig %p\n", haig.get());
2483
2484 RoseInGraph ig;
2485 RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
2486 RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
2487 add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
2488
2489 RoseInVertex a
2490 = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
2491 add_edge(v, a, RoseInEdgeProps(haig), ig);
2492
2493 calcVertexOffsets(ig);
2494
2495 return ng.rose->addSombeRose(ig);
2496}
2497
2498static
2499bool doHaigLitHaigSom(NG &ng, NGHolder &g,
2500 const unordered_map<NFAVertex, u32> &regions,
2501 som_type som) {
2502 if (!ng.cc.grey.allowLitHaig) {
2503 return false;
2504 }
2505
2506 // In streaming mode, we can only delay up to our max available history.
2507 const u32 max_delay =
2508 ng.cc.streaming ? ng.cc.grey.maxHistoryAvailable : MO_INVALID_IDX;
2509
2510 ue2_literal lit;
2511 shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
2512 shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
2513 if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
2514 return false;
2515 }
2516
2517 DEBUG_PRINTF("split off best lit '%s' (len=%zu)\n", dumpString(lit).c_str(),
2518 lit.length());
2519
2520 if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
2521 DEBUG_PRINTF("lit too short\n");
2522 return false;
2523 }
2524
2525 assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
2526
2527 if (edge(rhs->start, rhs->acceptEod, *rhs).second) {
2528 return false; /* TODO: handle */
2529 }
2530
2531 makeReportsSomPass(ng.rm, *rhs);
2532
2533 dumpHolder(*lhs, 92, "haiglithaig_lhs", ng.cc.grey);
2534 dumpHolder(*rhs, 93, "haiglithaig_rhs", ng.cc.grey);
2535
2536 u32 delay = removeTrailingLiteralStates(*lhs, lit, max_delay);
2537
2538 RoseInGraph ig;
2539 RoseInVertex s
2540 = add_vertex(RoseInVertexProps::makeStart(false), ig);
2541 RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
2542
2543 bool lhs_all_vac = true;
2544 NGHolder::adjacency_iterator ai, ae;
2545 for (tie(ai, ae) = adjacent_vertices(lhs->startDs, *lhs);
2546 ai != ae && lhs_all_vac; ++ai) {
2547 if (!is_special(*ai, *lhs)) {
2548 lhs_all_vac = false;
2549 }
2550 }
2551 for (tie(ai, ae) = adjacent_vertices(lhs->start, *lhs);
2552 ai != ae && lhs_all_vac; ++ai) {
2553 if (!is_special(*ai, *lhs)) {
2554 lhs_all_vac = false;
2555 }
2556 }
2557
2558 if (lhs_all_vac) {
2559 /* lhs is completely vacuous --> no prefix needed */
2560 add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
2561 } else {
2562 assert(delay == lit.length());
2563 setReportOnHaigPrefix(*ng.rose, *lhs);
2564 vector<vector<CharReach> > prefix_triggers; /* empty for prefix */
2565 assert(lhs->kind == NFA_PREFIX);
2566 shared_ptr<raw_som_dfa> l_haig
2567 = attemptToBuildHaig(*lhs, som, ng.ssm.somPrecision(),
2568 prefix_triggers, ng.cc.grey);
2569 if (!l_haig) {
2570 DEBUG_PRINTF("failed to haig\n");
2571 return false;
2572 }
2573 DEBUG_PRINTF("lhs haig %p\n", l_haig.get());
2574
2575 add_edge(s, v, RoseInEdgeProps(lhs, l_haig, delay), ig);
2576 }
2577
2578 if (!edge(rhs->start, rhs->accept, *rhs).second) {
2579 assert(rhs->kind == NFA_SUFFIX);
2580
2581 vector<vector<CharReach> > triggers;
2582 triggers.push_back(as_cr_seq(lit));
2583
2584 ue2_literal lit2;
2585 if (getTrailingLiteral(g, &lit2)
2586 && lit2.length() >= ng.cc.grey.minRoseLiteralLength
2587 && minStringPeriod(lit2) >= 2) {
2588
2589 /* TODO: handle delay */
2590 size_t overlap = maxOverlap(lit, lit2, 0);
2591 u32 delay2 = min((size_t)max_delay, lit2.length() - overlap);
2592 delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2);
2593 rhs->kind = NFA_INFIX;
2594 assert(delay2 <= lit2.length());
2595 setReportOnHaigPrefix(*ng.rose, *rhs);
2596
2597 shared_ptr<raw_som_dfa> m_haig
2598 = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
2599 triggers, ng.cc.grey, true);
2600 DEBUG_PRINTF("mhs haig %p\n", m_haig.get());
2601 if (!m_haig) {
2602 DEBUG_PRINTF("failed to haig\n");
2603 return false;
2604 }
2605
2606 RoseInVertex w
2607 = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
2608 add_edge(v, w, RoseInEdgeProps(rhs, m_haig, delay2), ig);
2609
2610 NFAVertex reporter = getSoleSourceVertex(g, g.accept);
2611 assert(reporter);
2612 const auto &reports = g[reporter].reports;
2613 RoseInVertex a =
2614 add_vertex(RoseInVertexProps::makeAccept(reports), ig);
2615 add_edge(w, a, RoseInEdgeProps(0U, 0U), ig);
2616 } else {
2617 /* TODO: analysis to see if som is in fact always increasing */
2618 shared_ptr<raw_som_dfa> r_haig
2619 = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
2620 triggers, ng.cc.grey, true);
2621 DEBUG_PRINTF("rhs haig %p\n", r_haig.get());
2622 if (!r_haig) {
2623 DEBUG_PRINTF("failed to haig\n");
2624 return false;
2625 }
2626 RoseInVertex a
2627 = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()),
2628 ig);
2629 add_edge(v, a, RoseInEdgeProps(r_haig), ig);
2630 }
2631 } else {
2632 DEBUG_PRINTF("has start->accept edge\n");
2633 if (in_degree(g.acceptEod, g) > 1) {
2634 DEBUG_PRINTF("also has a path to EOD\n");
2635 return false;
2636 }
2637 NFAVertex reporter = getSoleSourceVertex(g, g.accept);
2638 if (!reporter) {
2639 return false; /* TODO: later */
2640 }
2641 const auto &reports = g[reporter].reports;
2642 assert(!reports.empty());
2643 RoseInVertex a =
2644 add_vertex(RoseInVertexProps::makeAccept(reports), ig);
2645 add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
2646 }
2647
2648 calcVertexOffsets(ig);
2649
2650 return ng.rose->addSombeRose(ig);
2651}
2652
2653static
2654bool doMultiLitHaigSom(NG &ng, NGHolder &g, som_type som) {
2655 set<ue2_literal> lits;
2656 shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
2657 if (!ng.cc.grey.allowLitHaig) {
2658 return false;
2659 }
2660
2661 dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
2662
2663 if (!splitOffLeadingLiterals(g, &lits, &*rhs)) {
2664 DEBUG_PRINTF("no literal\n");
2665 return false;
2666 }
2667
2668 makeReportsSomPass(ng.rm, *rhs);
2669
2670 dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
2671
2672 vector<vector<CharReach>> triggers;
2673 for (const auto &lit : lits) {
2674 if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
2675 DEBUG_PRINTF("lit too short\n");
2676 return false;
2677 }
2678
2679 assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
2680 triggers.push_back(as_cr_seq(lit));
2681 }
2682
2683 bool unordered_som_triggers = true; /* TODO: check overlaps to ensure that
2684 * we can promise ordering */
2685
2686 assert(rhs->kind == NFA_SUFFIX);
2687 shared_ptr<raw_som_dfa> haig
2688 = attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
2689 ng.cc.grey, unordered_som_triggers);
2690 if (!haig) {
2691 DEBUG_PRINTF("failed to haig\n");
2692 return false;
2693 }
2694 DEBUG_PRINTF("haig %p\n", haig.get());
2695
2696 RoseInGraph ig;
2697 RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
2698
2699 RoseInVertex a
2700 = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
2701
2702 for (const auto &lit : lits) {
2703 RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
2704 add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
2705 add_edge(v, a, RoseInEdgeProps(haig), ig);
2706 }
2707
2708 calcVertexOffsets(ig);
2709
2710 return ng.rose->addSombeRose(ig);
2711}
2712
2713static
2714bool trySombe(NG &ng, NGHolder &g, som_type som) {
2715 if (doLitHaigSom(ng, g, som)) {
2716 return true;
2717 }
2718
2719 auto regions = assignRegions(g);
2720
2721 if (doHaigLitHaigSom(ng, g, regions, som)) {
2722 return true;
2723 }
2724
2725 if (doMultiLitHaigSom(ng, g, som)) {
2726 return true;
2727 }
2728
2729 return false;
2730}
2731
2732static
2733map<u32, region_info>::const_iterator pickInitialSomCut(const NGHolder &g,
2734 const unordered_map<NFAVertex, u32> &regions,
2735 const map<u32, region_info> &info,
2736 const vector<DepthMinMax> &depths) {
2737 map<u32, region_info>::const_iterator picked = info.end();
2738 for (map<u32, region_info>::const_iterator it = info.begin();
2739 it != info.end(); ++it) {
2740 if (it->second.exits.empty()) {
2741 assert(it == info.begin());
2742 continue;
2743 }
2744
2745 if (!regionCanEstablishSom(g, regions, it->first, it->second.exits,
2746 depths)) {
2747 /* last region is as far as we can go */
2748 DEBUG_PRINTF("region %u is beyond the fixed region\n", it->first);
2749 break;
2750 }
2751 picked = it;
2752 }
2753
2754 return picked;
2755}
2756
2757static
2758map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g,
2759 const unordered_map<NFAVertex, u32> &regions,
2760 const map<u32, region_info> &info,
2761 const vector<DepthMinMax> &depths,
2762 const map<u32, region_info>::const_iterator &orig,
2763 const CompileContext &cc) {
2764 DEBUG_PRINTF("trying for later rev nfa cut\n");
2765 assert(orig != info.end());
2766
2767 vector<map<u32, region_info>::const_iterator> cands;
2768
2769 map<u32, region_info>::const_iterator it = orig;
2770 ++it;
2771 for (; it != info.end(); ++it) {
2772 /* for simplicity */
2773 if (it->second.exits.size() != 1 || it->second.optional) {
2774 continue;
2775 }
2776 NFAVertex v = *it->second.exits.begin();
2777
2778 if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
2779 continue; /* for simplicity would require external som nfa reports
2780 * as well. */
2781 }
2782
2783 const depth &max_depth = depths[g[v].index].max;
2784 if (max_depth >
2785 depth(cc.grey.somMaxRevNfaLength - 1)) { /* virtual starts */
2786 continue;
2787 }
2788
2789 if (max_depth > depth(MAX_REV_NFA_PREFIX)) {
2790 /* probably not a good idea, anyway */
2791 continue;
2792 }
2793
2794 cands.push_back(it);
2795 }
2796
2797 while (!cands.empty()) {
2798 map<u32, region_info>::const_iterator rv = cands.back();
2799 cands.pop_back();
2800
2801 NFAVertex v = *rv->second.exits.begin();
2802
2803 set<ue2_literal> lits = getLiteralSet(g, v);
2804 compressAndScore(lits);
2805 if (lits.empty()) {
2806 next_region:
2807 continue;
2808 }
2809 for (const auto &lit : lits) {
2810 if (lit.length() <= 3 || minStringPeriod(lit) < 2) {
2811 goto next_region;
2812 }
2813 }
2814
2815 if (rv->second.enters.empty()
2816 || find(rv->second.full.begin(), rv->second.full.end(), g.startDs)
2817 != rv->second.full.end()) {
2818 continue;
2819 }
2820
2821 if (!isMandRegionBetween(info.begin(), rv)
2822 && info.begin()->second.optional) {
2823 continue;
2824 }
2825
2826 /* check to see if it is a reasonable size */
2827 auto prefix =
2828 makePrefix(g, regions, rv->second, next(rv)->second, false);
2829
2830 NGHolder g_rev;
2831 reverseHolder(*prefix, g_rev);
2832 anchorStarts(g_rev);
2833
2834 renumber_vertices(g_rev);
2835 g_rev.kind = NFA_REV_PREFIX;
2836 reduceGraphEquivalences(g_rev, cc);
2837 removeRedundancy(g_rev, SOM_NONE);
2838
2839 if (num_vertices(g_rev) > 128) { /* too big */
2840 continue;
2841 }
2842
2843 return rv;
2844 }
2845
2846 return info.end();
2847}
2848
2849static
2850unique_ptr<NGHolder> makePrefixForChain(NGHolder &g,
2851 const unordered_map<NFAVertex, u32> &regions,
2852 const map<u32, region_info> &info,
2853 const map<u32, region_info>::const_iterator &picked,
2854 vector<DepthMinMax> *depths, bool prefix_by_rev,
2855 ReportManager &rm) {
2856 DEBUG_PRINTF("making prefix for chain attempt\n");
2857 auto prefix =
2858 makePrefix(g, regions, picked->second, next(picked)->second, false);
2859
2860 /* For the root SOM plan, we use a temporary SOM slot to start with so that
2861 * we don't have to do any complicated rollback operations if the call to
2862 * doSomPlanning() below fails. The temporary SOM slot is replaced with a
2863 * real one afterwards. */
2864 const u32 temp_som_loc = UINT32_MAX;
2865 setPrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_WRITABLE,
2866 temp_som_loc, *depths, prefix_by_rev);
2867
2868 /* handle direct edge to accepts from region */
2869 if (edge(picked->second.exits.front(), g.accept, g).second
2870 || edge(picked->second.exits.front(), g.acceptEod, g).second) {
2871 map<u32, region_info>::const_iterator it = picked;
2872 do {
2873 makeSomRelReports(rm, g, it->second.exits, *depths);
2874 } while (it != info.begin() && it->second.optional && (it--)->first);
2875 }
2876
2877 depths->clear(); /* renumbering invalidates depths */
2878 renumber_vertices(*prefix);
2879
2880 DEBUG_PRINTF("done\n");
2881 return prefix;
2882}
2883
2884sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id,
2885 som_type som) {
2886 assert(som);
2887 DEBUG_PRINTF("som hello\n");
2888 ReportManager &rm = ng.rm;
2889 SomSlotManager &ssm = ng.ssm;
2890 const CompileContext &cc = ng.cc;
2891
2892 // Special case: if g is completely anchored or begins with a dot-star, we
2893 // know that we have an absolute SOM of zero all the time.
2894 if (!proper_out_degree(g.startDs, g) || beginsWithDotStar(g)) {
2895 makeSomAbsReports(rm, g, g.accept);
2896 makeSomAbsReports(rm, g, g.acceptEod);
2897 return SOMBE_HANDLED_INTERNAL;
2898 }
2899
2900 if (!cc.grey.allowSomChain) {
2901 return SOMBE_FAIL;
2902 }
2903
2904 // A pristine copy of the input graph, which must be restored to in paths
2905 // that return false. Also used as the forward graph for som rev nfa
2906 // construction.
2907 NGHolder g_pristine;
2908 cloneHolder(g_pristine, g);
2909
2910 vector<DepthMinMax> depths = getDistancesFromSOM(g);
2911
2912 // try a redundancy pass.
2913 if (addSomRedundancy(g, depths)) {
2914 depths = getDistancesFromSOM(g); // recalc
2915 }
2916
2917 auto regions = assignRegions(g);
2918
2919 dumpHolder(g, regions, 11, "som_explode", cc.grey);
2920
2921 map<u32, region_info> info;
2922 buildRegionMapping(g, regions, info);
2923
2924 map<u32, region_info>::const_iterator picked
2925 = pickInitialSomCut(g, regions, info, depths);
2926 DEBUG_PRINTF("picked %u\n", picked->first);
2927 if (picked == info.end() || picked->second.exits.empty()) {
2928 DEBUG_PRINTF("no regions/no progress possible\n");
2929 clear_graph(g);
2930 cloneHolder(g, g_pristine);
2931 if (doSomRevNfa(ng, g, cc)) {
2932 return SOMBE_HANDLED_INTERNAL;
2933 } else {
2934 return SOMBE_FAIL;
2935 }
2936 }
2937
2938 if (finalRegion(g, regions, picked->second.exits[0])) {
2939 makeSomRelReports(rm, g, g.accept, depths);
2940 makeSomRelReports(rm, g, g.acceptEod, depths);
2941 return SOMBE_HANDLED_INTERNAL;
2942 }
2943
2944 if (doSomRevNfa(ng, g_pristine, cc)) {
2945 clear_graph(g);
2946 cloneHolder(g, g_pristine);
2947 return SOMBE_HANDLED_INTERNAL;
2948 }
2949
2950 bool prefix_by_rev = false;
2951 map<u32, region_info>::const_iterator picked_old = picked;
2952 map<u32, region_info>::const_iterator rev_pick
2953 = tryForLaterRevNfaCut(g, regions, info, depths, picked, cc);
2954 if (rev_pick != info.end()) {
2955 DEBUG_PRINTF("found later rev prefix cut point\n");
2956 assert(rev_pick != picked);
2957 picked = rev_pick;
2958 prefix_by_rev = true;
2959 } else {
2960 /* sanity checks for picked region, these checks have already been done
2961 * if we are using a prefix reverse nfa. */
2962 if (picked->second.enters.empty()
2963 || find(picked->second.full.begin(), picked->second.full.end(),
2964 g.startDs) != picked->second.full.end()) {
2965 clear_graph(g);
2966 cloneHolder(g, g_pristine);
2967 return SOMBE_FAIL;
2968 }
2969
2970 if (!isMandRegionBetween(info.begin(), picked)
2971 && info.begin()->second.optional) {
2972 clear_graph(g);
2973 cloneHolder(g, g_pristine);
2974 return SOMBE_FAIL;
2975 }
2976 }
2977
2978 DEBUG_PRINTF("region %u is the final\n", picked->first);
2979
2980 shared_ptr<NGHolder> prefix = makePrefixForChain(
2981 g, regions, info, picked, &depths, prefix_by_rev, rm);
2982 /* note depths cleared as we have renumbered */
2983
2984 CharReach escapes;
2985 bool stuck = isPossibleLock(g, picked, info, &escapes);
2986 if (stuck) {
2987 DEBUG_PRINTF("investigating potential lock\n");
2988
2989 NGHolder gg;
2990 fillHolderForLockCheck(&gg, g, info, picked);
2991
2992 stuck = firstMatchIsFirst(gg);
2993 }
2994
2995 if (stuck && escapes.none()) {
2996 /* leads directly to .* --> woot */
2997 DEBUG_PRINTF("initial slot is full lock\n");
2998 u32 som_loc = ssm.getSomSlot(*prefix, escapes, false,
2999 SomSlotManager::NO_PARENT);
3000 replaceTempSomSlot(rm, *prefix, som_loc);
3001
3002 /* update all reports on g to report the som_loc's som */
3003 updateReportToUseRecordedSom(rm, g, som_loc);
3004
3005 /* create prefix to set the som_loc */
3006 updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET);
3007 if (prefix_by_rev) {
3008 u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
3009 updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
3010 }
3011 renumber_vertices(*prefix);
3012 if (!ng.addHolder(*prefix)) {
3013 DEBUG_PRINTF("failed to add holder\n");
3014 clear_graph(g);
3015 cloneHolder(g, g_pristine);
3016 return SOMBE_FAIL;
3017 }
3018
3019 DEBUG_PRINTF("ok found initial lock\n");
3020 return SOMBE_HANDLED_INTERNAL;
3021 }
3022
3023 vector<som_plan> plan;
3024 retry:
3025 // Note: no-one should ever pay attention to the root plan's parent.
3026 plan.push_back(som_plan(prefix, escapes, false, 0));
3027 dumpHolder(*plan.back().prefix, 12, "som_prefix", cc.grey);
3028 if (!prefix_by_rev) {
3029 if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey)) {
3030 DEBUG_PRINTF("failed\n");
3031 clear_graph(g);
3032 cloneHolder(g, g_pristine);
3033 return SOMBE_FAIL;
3034 }
3035 } else {
3036 DEBUG_PRINTF("trying for som plan\n");
3037 if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey,
3038 DISALLOW_MODIFY_HOLDER)) {
3039 /* Note: the larger prefixes generated by reverse nfas may not
3040 * advance as fair as the original prefix - so we should retry
3041 * with a smaller prefix. */
3042
3043 prefix_by_rev = false;
3044 stuck = false; /* if we reached a lock, then prefix_by_rev would not
3045 * have advanced. */
3046 picked = picked_old;
3047 plan.clear();
3048 depths = getDistancesFromSOM(g); /* due to renumbering, need to
3049 * regenerate */
3050 prefix = makePrefixForChain(g, regions, info, picked, &depths,
3051 prefix_by_rev, rm);
3052 escapes.clear();
3053 DEBUG_PRINTF("retrying\n");
3054 goto retry;
3055 }
3056 }
3057 DEBUG_PRINTF("som planning ok\n");
3058
3059 /* if the initial prefix is weak is if sombe approaches are better */
3060 if (findMinWidth(*prefix) <= depth(2)) {
3061 DEBUG_PRINTF("weak prefix... seeing if sombe can help out\n");
3062 NGHolder g2;
3063 cloneHolder(g2, g_pristine);
3064 if (trySombe(ng, g2, som)) {
3065 return SOMBE_HANDLED_ALL;
3066 }
3067 }
3068
3069 /* From this point we know that we are going to succeed or die horribly with
3070 * a pattern too large. Anything done past this point can be considered
3071 * committed to the compile. */
3072
3073 regions = assignRegions(g); // Update as g may have changed.
3074
3075 DEBUG_PRINTF("-- get slot for initial plan\n");
3076 u32 som_loc;
3077 if (plan[0].is_reset) {
3078 som_loc = ssm.getInitialResetSomSlot(*prefix, g, regions,
3079 picked->first, &plan[0].no_implement);
3080 } else {
3081 som_loc = ssm.getSomSlot(*prefix, escapes, false,
3082 SomSlotManager::NO_PARENT);
3083 }
3084
3085 replaceTempSomSlot(rm, *prefix, som_loc);
3086
3087 if (plan.front().is_reset) {
3088 updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET);
3089 }
3090 if (prefix_by_rev && !plan.front().no_implement) {
3091 u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
3092 updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
3093 }
3094
3095 implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
3096
3097 DEBUG_PRINTF("success\n");
3098 return SOMBE_HANDLED_INTERNAL;
3099}
3100
3101sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr,
3102 u32 comp_id, som_type som) {
3103 assert(som);
3104
3105 DEBUG_PRINTF("som+haig hello\n");
3106
3107 // A pristine copy of the input graph, which must be restored to in paths
3108 // that return false. Also used as the forward graph for som rev nfa
3109 // construction.
3110 NGHolder g_pristine;
3111 cloneHolder(g_pristine, g);
3112
3113 if (trySombe(ng, g, som)) {
3114 return SOMBE_HANDLED_ALL;
3115 }
3116
3117 if (!ng.cc.grey.allowHaigLit || !ng.cc.grey.allowSomChain) {
3118 return SOMBE_FAIL;
3119 }
3120
3121 // know that we have an absolute SOM of zero all the time.
3122 assert(edge(g.startDs, g.startDs, g).second);
3123
3124 vector<DepthMinMax> depths = getDistancesFromSOM(g);
3125
3126 // try a redundancy pass.
3127 if (addSomRedundancy(g, depths)) {
3128 depths = getDistancesFromSOM(g);
3129 }
3130
3131 auto regions = assignRegions(g);
3132
3133 dumpHolder(g, regions, 21, "som_explode", ng.cc.grey);
3134
3135 map<u32, region_info> info;
3136 buildRegionMapping(g, regions, info, true);
3137
3138 sombe_rv rv =
3139 doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin());
3140 if (rv == SOMBE_FAIL) {
3141 clear_graph(g);
3142 cloneHolder(g, g_pristine);
3143 }
3144 return rv;
3145}
3146
3147} // namespace ue2
3148