1/*
2 * Copyright (c) 2015-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "rose_build_add_internal.h"
30#include "rose_build_impl.h"
31
32#include "ue2common.h"
33#include "grey.h"
34#include "rose_build_anchored.h"
35#include "rose_in_util.h"
36#include "hwlm/hwlm_literal.h"
37#include "nfa/goughcompile.h"
38#include "nfa/nfa_api_queue.h"
39#include "nfagraph/ng_depth.h"
40#include "nfagraph/ng_dump.h"
41#include "nfagraph/ng_holder.h"
42#include "nfagraph/ng_limex.h"
43#include "nfagraph/ng_mcclellan.h"
44#include "nfagraph/ng_prefilter.h"
45#include "nfagraph/ng_prune.h"
46#include "nfagraph/ng_region.h"
47#include "nfagraph/ng_repeat.h"
48#include "nfagraph/ng_reports.h"
49#include "nfagraph/ng_util.h"
50#include "nfagraph/ng_width.h"
51#include "util/charreach.h"
52#include "util/charreach_util.h"
53#include "util/compare.h"
54#include "util/compile_context.h"
55#include "util/container.h"
56#include "util/dump_charclass.h"
57#include "util/graph_range.h"
58#include "util/insertion_ordered.h"
59#include "util/make_unique.h"
60#include "util/noncopyable.h"
61#include "util/order_check.h"
62#include "util/report_manager.h"
63#include "util/ue2string.h"
64#include "util/verify_types.h"
65
66#include <algorithm>
67#include <map>
68#include <set>
69#include <string>
70#include <vector>
71#include <utility>
72
73using namespace std;
74
75namespace ue2 {
76
77/**
78 * \brief Data used by most of the construction code in this file.
79 */
80struct RoseBuildData : noncopyable {
81 RoseBuildData(const RoseInGraph &ig_in, bool som_in)
82 : ig(ig_in), som(som_in) {}
83
84 /** Input rose graph. */
85 const RoseInGraph &ig;
86
87 /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which
88 * require ANCH history to prevent overlap. */
89 unordered_set<RoseInEdge> anch_history_edges;
90
91 /** True if we're tracking Start of Match. */
92 bool som;
93};
94
95static
96ReportID findReportId(const NGHolder &g) {
97 /* prefix/infix always have an edge to accept and only 1 reportid initially
98 */
99 assert(in_degree(g.accept, g));
100 const auto &rep = g[*inv_adjacent_vertices(g.accept, g).first].reports;
101 assert(!rep.empty());
102 return *rep.begin();
103}
104
105static
106RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset,
107 u32 max_offset) {
108 RoseGraph &g = build->g;
109 // add to tree
110 RoseVertex v = add_vertex(g);
111 g[v].min_offset = min_offset;
112 g[v].max_offset = max_offset;
113
114 DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].index,
115 literalId);
116 g[v].literals.insert(literalId);
117 build->literal_info[literalId].vertices.insert(v);
118
119 return v;
120}
121
122RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
123 u32 minBound, u32 maxBound, u32 literalId,
124 size_t literalLength,
125 const flat_set<ReportID> &reports) {
126 assert(parent != RoseGraph::null_vertex());
127
128 RoseGraph &g = build->g;
129 // add to tree (offsets set latter)
130 RoseVertex v = createVertex(build, literalId, 0U, 0U);
131
132 /* fill in report information */
133 g[v].reports.insert(reports.begin(), reports.end());
134
135 RoseEdge e = add_edge(parent, v, g);
136 DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound);
137
138 g[e].minBound = minBound;
139 g[e].maxBound = maxBound;
140 g[e].rose_top = 0;
141
142 u32 min_offset = add_rose_depth(g[parent].min_offset, minBound);
143 u32 max_offset = add_rose_depth(g[parent].max_offset, maxBound);
144
145 /* take literal length into account for offsets */
146 const u32 lit_len = verify_u32(literalLength);
147 min_offset = add_rose_depth(min_offset, lit_len);
148 max_offset = add_rose_depth(max_offset, lit_len);
149
150 g[v].min_offset = min_offset;
151 g[v].max_offset = max_offset;
152
153 return v;
154}
155
156static
157RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId,
158 u32 min_offset, u32 max_offset) {
159 RoseGraph &g = build->g;
160 RoseVertex v = createVertex(build, literalId, min_offset, max_offset);
161
162 DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].index,
163 literalId);
164
165 RoseEdge e = add_edge(build->anchored_root, v, g);
166 g[e].minBound = min_offset;
167 g[e].maxBound = max_offset;
168
169 return v;
170}
171
172static
173RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) {
174 RoseGraph &g = build->g;
175 RoseVertex w = add_vertex(g[v], g);
176 DEBUG_PRINTF("added vertex %zu\n", g[w].index);
177
178 for (auto lit_id : g[w].literals) {
179 build->literal_info[lit_id].vertices.insert(w);
180 }
181
182 for (const auto &e : in_edges_range(v, g)) {
183 RoseVertex s = source(e, g);
184 add_edge(s, w, g[e], g);
185 DEBUG_PRINTF("added edge (%zu,%zu)\n", g[s].index, g[w].index);
186 }
187
188 return w;
189}
190
191namespace {
192struct created_key {
193 explicit created_key(const RoseInEdgeProps &trep)
194 : prefix(trep.graph.get()), lag(trep.graph_lag) {
195 }
196 bool operator<(const created_key &b) const {
197 const created_key &a = *this;
198 ORDER_CHECK(prefix);
199 ORDER_CHECK(lag);
200 return false;
201 }
202 NGHolder *prefix;
203 u32 lag;
204};
205}
206
207static
208bool isPureAnchored(const NGHolder &h) {
209 return !proper_out_degree(h.startDs, h);
210}
211
212static
213RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd,
214 const RoseInEdge &rose_edge, const RoseEdge &e) {
215 const RoseGraph &g = tbi.g;
216 const RoseVertex u = source(e, g), v = target(e, g);
217 const bool fixed_offset_src = g[u].fixedOffset();
218 const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF);
219
220 DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
221 g[u].index, g[v].index, g[e].minBound, g[e].maxBound,
222 (int)g[u].fixedOffset(), (int)g[v].left);
223
224 if (g[v].left) {
225 // Roles with prefix engines have their history handled by that prefix.
226 assert(!contains(bd.anch_history_edges, rose_edge));
227 return ROSE_ROLE_HISTORY_NONE;
228 }
229
230 if (contains(bd.anch_history_edges, rose_edge)) {
231 DEBUG_PRINTF("needs anch history\n");
232 return ROSE_ROLE_HISTORY_ANCH;
233 }
234
235 if (fixed_offset_src && has_bounds) {
236 DEBUG_PRINTF("needs anch history\n");
237 return ROSE_ROLE_HISTORY_ANCH;
238 }
239
240 return ROSE_ROLE_HISTORY_NONE;
241}
242
243static
244bool hasSuccessorLiterals(RoseInVertex iv, const RoseInGraph &ig) {
245 for (auto v : adjacent_vertices_range(iv, ig)) {
246 if (ig[v].type != RIV_ACCEPT) {
247 return true;
248 }
249 }
250 return false;
251}
252
253static
254void createVertices(RoseBuildImpl *tbi,
255 map<RoseInVertex, vector<RoseVertex> > &vertex_map,
256 const vector<pair<RoseVertex, RoseInEdge> > &parents,
257 RoseInVertex iv, u32 min_offset, u32 max_offset,
258 u32 literalId, u32 delay, const RoseBuildData &bd) {
259 RoseGraph &g = tbi->g;
260
261 DEBUG_PRINTF("vertex has %zu parents\n", parents.size());
262
263 map<created_key, RoseVertex> created;
264
265 for (const auto &pv : parents) {
266 RoseVertex w;
267 const RoseInEdgeProps &edge_props = bd.ig[pv.second];
268 shared_ptr<NGHolder> prefix_graph = edge_props.graph;
269 u32 prefix_lag = edge_props.graph_lag;
270
271 created_key key(edge_props);
272
273 if (!contains(created, key)) {
274 assert(prefix_graph || !edge_props.haig);
275 w = createVertex(tbi, literalId, min_offset, max_offset);
276 created[key] = w;
277
278 if (prefix_graph) {
279 g[w].left.graph = prefix_graph;
280 if (edge_props.dfa) {
281 g[w].left.dfa = edge_props.dfa;
282 }
283 g[w].left.haig = edge_props.haig;
284 g[w].left.lag = prefix_lag;
285
286 // The graph already has its report id allocated - find it.
287 g[w].left.leftfix_report = findReportId(*prefix_graph);
288
289 if (g[w].left.dfa || g[w].left.haig) {
290 assert(prefix_graph);
291 g[w].left.dfa_min_width = findMinWidth(*prefix_graph);
292 g[w].left.dfa_max_width = findMaxWidth(*prefix_graph);
293 }
294 }
295
296 if (bd.som && !g[w].left.haig) {
297 /* no prefix - som based on literal start */
298 assert(!prefix_graph);
299 g[w].som_adjust = tbi->literals.at(literalId).elength();
300 DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust);
301 }
302
303 DEBUG_PRINTF(" adding new vertex index=%zu\n", tbi->g[w].index);
304 vertex_map[iv].push_back(w);
305 } else {
306 w = created[key];
307 }
308
309 RoseVertex p = pv.first;
310
311 RoseEdge e = add_edge(p, w, g);
312 DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound,
313 edge_props.maxBound);
314 g[e].minBound = edge_props.minBound;
315 if (p != tbi->root && g[w].left.graph
316 && (!tbi->isAnyStart(p) || isPureAnchored(*g[w].left.graph))) {
317 depth mw = findMaxWidth(*g[w].left.graph);
318 if (mw.is_infinite()) {
319 g[e].maxBound = ROSE_BOUND_INF;
320 } else {
321 DEBUG_PRINTF("setting max to %s + %u\n", mw.str().c_str(),
322 prefix_lag);
323 g[e].maxBound = prefix_lag + mw;
324 }
325 } else {
326 g[e].maxBound = edge_props.maxBound;
327 }
328 g[e].rose_top = 0;
329 g[e].history = selectHistory(*tbi, bd, pv.second, e);
330 }
331
332 if (delay && hasSuccessorLiterals(iv, bd.ig)) {
333 // Add an undelayed "ghost" vertex for this literal.
334 u32 ghostId = tbi->literal_info[literalId].undelayed_id;
335 DEBUG_PRINTF("creating delay ghost vertex, id=%u\n", ghostId);
336 assert(ghostId != literalId);
337 assert(tbi->literals.at(ghostId).delay == 0);
338
339 // Adjust offsets, removing delay.
340 u32 ghost_min = min_offset, ghost_max = max_offset;
341 assert(ghost_min < ROSE_BOUND_INF && ghost_min >= delay);
342 ghost_min -= delay;
343 ghost_max -= ghost_max == ROSE_BOUND_INF ? 0 : delay;
344
345 RoseVertex g_v = createVertex(tbi, ghostId, ghost_min, ghost_max);
346
347 for (const auto &pv : parents) {
348 const RoseInEdgeProps &edge_props = bd.ig[pv.second];
349 RoseEdge e = add_edge(pv.first, g_v, tbi->g);
350 g[e].minBound = edge_props.minBound;
351 g[e].maxBound = edge_props.maxBound;
352 g[e].history = selectHistory(*tbi, bd, pv.second, e);
353 DEBUG_PRINTF("parent edge has bounds [%u,%u]\n",
354 edge_props.minBound, edge_props.maxBound);
355 }
356
357 for (auto &m : created) {
358 tbi->ghost[m.second] = g_v;
359 }
360 }
361}
362
363/* ensure the holder does not accept any paths which do not end with lit */
364static
365void removeFalsePaths(NGHolder &g, const ue2_literal &lit) {
366 DEBUG_PRINTF("strip '%s'\n", dumpString(lit).c_str());
367 set<NFAVertex> curr, next;
368 curr.insert(g.accept);
369 curr.insert(g.acceptEod);
370
371 for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
372 next.clear();
373 for (auto curr_v : curr) {
374 DEBUG_PRINTF("handling %zu\n", g[curr_v].index);
375 vector<NFAVertex> next_cand;
376 insert(&next_cand, next_cand.end(),
377 inv_adjacent_vertices(curr_v, g));
378 clear_in_edges(curr_v, g);
379 if (curr_v == g.acceptEod) {
380 add_edge(g.accept, g.acceptEod, g);
381 }
382
383 for (auto v : next_cand) {
384 assert(v != g.startDs);
385 if (v == g.start || v == g.startDs || v == g.accept) {
386 continue;
387 }
388
389 const CharReach &cr = g[v].char_reach;
390
391 if (!overlaps(*it, cr)) {
392 DEBUG_PRINTF("false edge %zu\n", g[v].index);
393 continue;
394 }
395
396 NFAVertex v2 = clone_vertex(g, v);
397 clone_in_edges(g, v, v2);
398 add_edge(v2, curr_v, g);
399 g[v2].char_reach &= *it;
400 DEBUG_PRINTF("next <- %zu\n", g[v2].index);
401 next.insert(v2);
402 }
403 }
404
405 curr.swap(next);
406 }
407
408 pruneUseless(g);
409 clearReports(g);
410 assert(in_degree(g.accept, g) || in_degree(g.acceptEod, g) > 1);
411 assert(allMatchStatesHaveReports(g));
412
413 DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g));
414}
415
416static
417RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi,
418 const RoseInVertexProps &iv_info,
419 const RoseInEdgeProps &ep) {
420 if (ep.graph_lag && ep.graph_lag != iv_info.s.length()) {
421 DEBUG_PRINTF("bad lag %u != %zu\n", ep.graph_lag, iv_info.s.length());
422 return RoseGraph::null_vertex(); /* TODO: better */
423 }
424
425 const depth anchored_max_depth(tbi->cc.grey.maxAnchoredRegion);
426 depth min_width(0), max_width(0);
427
428 if (ep.graph.get()) {
429 const depth graph_lag(ep.graph_lag);
430 max_width = findMaxWidth(*ep.graph) + graph_lag;
431 min_width = findMinWidth(*ep.graph) + graph_lag;
432 if (proper_out_degree(ep.graph->startDs, *ep.graph)) {
433 max_width = depth::infinity();
434 }
435 }
436
437 DEBUG_PRINTF("mw = %s; lag = %u\n", max_width.str().c_str(), ep.graph_lag);
438
439 NGHolder h;
440
441 if (ep.graph.get() && max_width <= anchored_max_depth) {
442 cloneHolder(h, *ep.graph);
443
444 /* add literal/dots */
445 if (ep.graph_lag) {
446 assert(ep.graph_lag == iv_info.s.length());
447 appendLiteral(h, iv_info.s);
448 } else {
449 removeFalsePaths(h, iv_info.s);
450 }
451 } else if (!ep.graph.get() && ep.maxBound < ROSE_BOUND_INF
452 && iv_info.s.length() + ep.maxBound
453 <= tbi->cc.grey.maxAnchoredRegion) {
454 if (ep.maxBound || ep.minBound) {
455 /* TODO: handle, however these cases are not generated currently by
456 ng_violet */
457 return RoseGraph::null_vertex();
458 }
459 max_width = depth(ep.maxBound + iv_info.s.length());
460 min_width = depth(ep.minBound + iv_info.s.length());
461 add_edge(h.start, h.accept, h);
462 appendLiteral(h, iv_info.s);
463 } else {
464 return RoseGraph::null_vertex();
465 }
466
467 u32 anchored_exit_id = tbi->getNewLiteralId();
468 u32 remap_id = 0;
469 DEBUG_PRINTF(" trying to add dfa stuff\n");
470 int rv = addToAnchoredMatcher(*tbi, h, anchored_exit_id, &remap_id);
471
472 if (rv == ANCHORED_FAIL) {
473 return RoseGraph::null_vertex();
474 } else if (rv == ANCHORED_REMAP) {
475 anchored_exit_id = remap_id;
476 } else {
477 assert(rv == ANCHORED_SUCCESS);
478 }
479
480 // Store the literal itself in a side structure so that we can use it for
481 // overlap calculations later. This may be obsolete when the old Rose
482 // construction path (and its history selection code) goes away.
483 rose_literal_id lit(iv_info.s, ROSE_ANCHORED, 0);
484 tbi->anchoredLitSuffix.insert(make_pair(anchored_exit_id, lit));
485
486 assert(min_width <= anchored_max_depth);
487 assert(max_width <= anchored_max_depth);
488 assert(min_width <= max_width);
489
490 /* Note: bounds are end-to-end as anchored lits are considered
491 * to have 0 length. */
492 RoseVertex v = createAnchoredVertex(tbi, anchored_exit_id, min_width,
493 max_width);
494 return v;
495}
496
497static
498u32 findRoseAnchorFloatingOverlap(const RoseInEdgeProps &ep,
499 const RoseInVertexProps &succ_vp) {
500 /* we need to ensure there is enough history to find the successor literal
501 * when we enable its group.
502 */
503
504 if (!ep.graph.get()) {
505 return 0; /* non overlapping */
506 }
507 depth graph_min_width = findMinWidth(*ep.graph);
508 u32 min_width = ep.graph_lag + graph_min_width;
509 u32 s_len = succ_vp.s.length();
510
511 if (s_len <= min_width) {
512 return 0; /* no overlap */
513 }
514
515 u32 overlap = s_len - min_width;
516 DEBUG_PRINTF("found overlap of %u\n", overlap);
517 return overlap;
518}
519
520static
521void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector<u8> &msk,
522 vector<u8> &cmp) {
523 if (lag >= HWLM_MASKLEN) {
524 msk.clear(); cmp.clear();
525 return;
526 }
527
528 assert(in_degree(h.acceptEod, h) == 1); // no eod reports
529
530 // Start with the set of reporter vertices for this rose.
531 set<NFAVertex> curr, next;
532 insert(&curr, inv_adjacent_vertices(h.accept, h));
533 assert(!curr.empty());
534
535 msk.assign(HWLM_MASKLEN, 0);
536 cmp.assign(HWLM_MASKLEN, 0);
537 size_t i = HWLM_MASKLEN - lag - 1;
538 do {
539 if (curr.empty() || contains(curr, h.start) ||
540 contains(curr, h.startDs)) {
541 DEBUG_PRINTF("end of the road\n");
542 break;
543 }
544
545 next.clear();
546 CharReach cr;
547 for (auto v : curr) {
548 DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index,
549 describeClass(h[v].char_reach).c_str());
550 cr |= h[v].char_reach;
551 insert(&next, inv_adjacent_vertices(v, h));
552 }
553 make_and_cmp_mask(cr, &msk[i], &cmp[i]);
554 DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i,
555 describeClass(cr).c_str(), msk.at(i), cmp.at(i));
556 curr.swap(next);
557 } while (i-- > 0);
558}
559
560static
561void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table,
562 map<RoseInVertex, vector<RoseVertex> > &vertex_map,
563 const vector<pair<RoseVertex, RoseInEdge> > &parents,
564 RoseInVertex iv, const RoseBuildData &bd) {
565 const RoseInGraph &ig = bd.ig;
566 const RoseInVertexProps &iv_info = ig[iv];
567 assert(iv_info.type == RIV_LITERAL);
568 assert(!parents.empty()); /* start vertices should not be here */
569
570 // ng_violet should have ensured that mixed-sensitivity literals are no
571 // longer than the benefits max width.
572 assert(iv_info.s.length() <= MAX_MASK2_WIDTH ||
573 !mixed_sensitivity(iv_info.s));
574
575 // Rose graph construction process should have given us a min_offset.
576 assert(iv_info.min_offset > 0);
577
578 if (use_eod_table) {
579 goto floating;
580 }
581
582 DEBUG_PRINTF("rose find vertex\n");
583 if (parents.size() == 1) {
584 const RoseVertex u = parents.front().first;
585 const RoseInEdgeProps &ep = ig[parents.front().second];
586
587 if (!tbi->isAnyStart(u)) {
588 goto floating;
589 }
590
591 if (!ep.graph && ep.maxBound == ROSE_BOUND_INF) {
592 goto floating;
593 }
594 if (ep.graph && !isAnchored(*ep.graph)) {
595 goto floating;
596 }
597
598 DEBUG_PRINTF("cand for anchored maxBound %u, %p (%d)\n", ep.maxBound,
599 ep.graph.get(), ep.graph ? (int)isAnchored(*ep.graph) : 3);
600
601 /* need to check if putting iv into the anchored table would create
602 * any bad_overlap relationships with its successor literals */
603 for (const auto &e : out_edges_range(iv, ig)) {
604 RoseInVertex t = target(e, ig);
605 u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[t]);
606 DEBUG_PRINTF("found overlap of %u\n", overlap);
607 if (overlap > tbi->cc.grey.maxHistoryAvailable + 1) {
608 goto floating;
609 }
610 }
611
612 RoseVertex v = tryForAnchoredVertex(tbi, iv_info, ep);
613 if (v != RoseGraph::null_vertex()) {
614 DEBUG_PRINTF("add anchored literal vertex\n");
615 vertex_map[iv].push_back(v);
616 return;
617 }
618 }
619
620floating:
621 vector<u8> msk, cmp;
622 if (tbi->cc.grey.roseHamsterMasks && in_degree(iv, ig) == 1) {
623 RoseInEdge e = *in_edges(iv, ig).first;
624 if (ig[e].graph) {
625 findRoseLiteralMask(*ig[e].graph, ig[e].graph_lag, msk, cmp);
626 }
627 }
628
629 u32 delay = iv_info.delay;
630 rose_literal_table table = use_eod_table ? ROSE_EOD_ANCHORED : ROSE_FLOATING;
631
632 u32 literalId = tbi->getLiteralId(iv_info.s, msk, cmp, delay, table);
633
634 DEBUG_PRINTF("literal=%u (len=%zu, delay=%u, offsets=[%u,%u] '%s')\n",
635 literalId, iv_info.s.length(), delay, iv_info.min_offset,
636 iv_info.max_offset, dumpString(iv_info.s).c_str());
637
638 createVertices(tbi, vertex_map, parents, iv, iv_info.min_offset,
639 iv_info.max_offset, literalId, delay, bd);
640}
641
642static
643unique_ptr<NGHolder> makeRoseEodPrefix(const NGHolder &h, RoseBuildImpl &build,
644 map<flat_set<ReportID>, ReportID> &remap) {
645 assert(generates_callbacks(h));
646 assert(!in_degree(h.accept, h));
647 auto gg = cloneHolder(h);
648 NGHolder &g = *gg;
649 g.kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX;
650
651 // Move acceptEod edges over to accept.
652 vector<NFAEdge> dead;
653 for (const auto &e : in_edges_range(g.acceptEod, g)) {
654 NFAVertex u = source(e, g);
655 if (u == g.accept) {
656 continue;
657 }
658 add_edge_if_not_present(u, g.accept, g);
659 dead.push_back(e);
660
661 if (!contains(remap, g[u].reports)) {
662 remap[g[u].reports] = build.getNewNfaReport();
663 }
664
665 g[u].reports = { remap[g[u].reports] };
666 }
667
668 remove_edges(dead, g);
669 return gg;
670}
671
672static
673u32 getEodEventID(RoseBuildImpl &build) {
674 // Allocate the EOD event if it hasn't been already.
675 if (build.eod_event_literal_id == MO_INVALID_IDX) {
676 build.eod_event_literal_id = build.getLiteralId({}, 0, ROSE_EVENT);
677 }
678
679 return build.eod_event_literal_id;
680}
681
682static
683void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u,
684 const NGHolder &h) {
685 assert(!build.isInETable(u));
686
687 RoseGraph &g = build.g;
688 map<flat_set<ReportID>, ReportID> report_remap;
689 shared_ptr<NGHolder> eod_leftfix
690 = makeRoseEodPrefix(h, build, report_remap);
691
692 u32 eod_event = getEodEventID(build);
693
694 for (const auto &report_mapping : report_remap) {
695 RoseVertex v = add_vertex(g);
696 g[v].literals.insert(eod_event);
697 build.literal_info[eod_event].vertices.insert(v);
698
699 g[v].left.graph = eod_leftfix;
700 g[v].left.leftfix_report = report_mapping.second;
701 g[v].left.lag = 0;
702 RoseEdge e1 = add_edge(u, v, g);
703 g[e1].minBound = 0;
704 g[e1].maxBound = ROSE_BOUND_INF;
705 g[v].min_offset = add_rose_depth(g[u].min_offset,
706 findMinWidth(*g[v].left.graph));
707 g[v].max_offset = ROSE_BOUND_INF;
708
709 depth max_width = findMaxWidth(*g[v].left.graph);
710 if (u != build.root && max_width.is_finite()
711 && (!build.isAnyStart(u) || isPureAnchored(*g[v].left.graph))) {
712 g[e1].maxBound = max_width;
713 g[v].max_offset = add_rose_depth(g[u].max_offset, max_width);
714 }
715
716 g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix
717 RoseVertex w = add_vertex(g);
718 g[w].eod_accept = true;
719 g[w].reports = report_mapping.first;
720 g[w].min_offset = g[v].min_offset;
721 g[w].max_offset = g[v].max_offset;
722 RoseEdge e = add_edge(v, w, g);
723 g[e].minBound = 0;
724 g[e].maxBound = 0;
725 /* No need to set history as the event is only delivered at the last
726 * byte anyway - no need to invalidate stale entries. */
727 g[e].history = ROSE_ROLE_HISTORY_NONE;
728 DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index);
729 }
730}
731
732static
733void doRoseAcceptVertex(RoseBuildImpl *tbi,
734 const vector<pair<RoseVertex, RoseInEdge> > &parents,
735 RoseInVertex iv, const RoseBuildData &bd) {
736 const RoseInGraph &ig = bd.ig;
737 assert(ig[iv].type == RIV_ACCEPT || ig[iv].type == RIV_ACCEPT_EOD);
738
739 RoseGraph &g = tbi->g;
740
741 for (const auto &pv : parents) {
742 RoseVertex u = pv.first;
743 const RoseInEdgeProps &edge_props = bd.ig[pv.second];
744
745 /* We need to duplicate the parent vertices if:
746 *
747 * 1) It already has a suffix, etc as we are going to add the specified
748 * suffix, etc to the parents and we do not want to overwrite the
749 * existing information.
750 *
751 * 2) We are making the an EOD accept and the vertex already has other
752 * out-edges - The LAST_BYTE history used for EOD accepts is
753 * incompatible with normal successors. As accepts are processed last we
754 * do not need to worry about other normal successors being added later.
755 */
756 if (g[u].suffix || !g[u].reports.empty()
757 || (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g)
758 && !edge_props.graph)
759 || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) {
760 DEBUG_PRINTF("duplicating for parent %zu\n", g[u].index);
761 assert(!tbi->isAnyStart(u));
762 u = duplicate(tbi, u);
763 g[u].suffix.reset();
764 g[u].eod_accept = false;
765 }
766
767 assert(!g[u].suffix);
768 if (ig[iv].type == RIV_ACCEPT) {
769 assert(!tbi->isAnyStart(u));
770 if (edge_props.dfa) {
771 DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].index);
772 g[u].suffix.rdfa = edge_props.dfa;
773 g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph);
774 g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph);
775 } else if (edge_props.graph) {
776 DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index);
777 g[u].suffix.graph = edge_props.graph;
778 assert(g[u].suffix.graph->kind == NFA_SUFFIX);
779 /* TODO: set dfa_(min|max)_width */
780 } else if (edge_props.haig) {
781 DEBUG_PRINTF("adding suffaig to i%zu\n", g[u].index);
782 g[u].suffix.haig = edge_props.haig;
783 } else {
784 DEBUG_PRINTF("adding boring accept to i%zu\n", g[u].index);
785 assert(!g[u].eod_accept);
786 g[u].reports = ig[iv].reports;
787 }
788 } else {
789 assert(ig[iv].type == RIV_ACCEPT_EOD);
790 assert(!edge_props.haig);
791
792 if (!edge_props.graph) {
793 RoseVertex w = add_vertex(g);
794 g[w].eod_accept = true;
795 g[w].reports = ig[iv].reports;
796 g[w].min_offset = g[u].min_offset;
797 g[w].max_offset = g[u].max_offset;
798 RoseEdge e = add_edge(u, w, g);
799 g[e].minBound = 0;
800 g[e].maxBound = 0;
801 g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
802 DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index);
803 continue;
804 }
805
806 const NGHolder &h = *edge_props.graph;
807 assert(!in_degree(h.accept, h));
808 assert(generates_callbacks(h));
809
810 if (tbi->isInETable(u)) {
811 assert(h.kind == NFA_SUFFIX);
812 assert(!tbi->isAnyStart(u));
813 /* etable can't/shouldn't use eod event */
814 DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index);
815 g[u].suffix.graph = edge_props.graph;
816 continue;
817 }
818
819 makeEodEventLeftfix(*tbi, u, h);
820 }
821 }
822}
823
824static
825bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo,
826 u32 *max_len, const CompileContext &cc) {
827 map<RoseInVertex, u32> max_depth_from_eod;
828 *max_len = 0;
829
830 reverse(topo.begin(), topo.end()); /* we want to start at accept end */
831
832 for (auto v : topo) {
833 u32 v_depth = 0;
834
835 if (ig[v].type == RIV_ACCEPT) {
836 DEBUG_PRINTF("[ACCEPT]\n");
837 for (const auto &e : in_edges_range(v, ig)) {
838 if (!ig[e].graph || !can_only_match_at_eod(*ig[e].graph)) {
839 DEBUG_PRINTF("floating accept\n");
840 return false;
841 }
842 }
843 }
844
845 switch (ig[v].type) {
846 case RIV_LITERAL:
847 DEBUG_PRINTF("[LITERAL]\n");
848 break;
849 case RIV_START:
850 DEBUG_PRINTF("[START]\n");
851 break;
852 case RIV_ANCHORED_START:
853 DEBUG_PRINTF("[ANCHOR]\n");
854 break;
855 case RIV_ACCEPT:
856 break;
857 case RIV_ACCEPT_EOD:
858 DEBUG_PRINTF("[EOD]\n");
859 break;
860 default:
861 assert(0);
862 DEBUG_PRINTF("????\n");
863 return false;
864 }
865
866 for (const auto &e : out_edges_range(v, ig)) {
867 RoseInVertex t = target(e, ig);
868
869 assert(contains(max_depth_from_eod, t));
870 u64a max_width;
871
872 if (ig[v].type == RIV_START || ig[v].type == RIV_ANCHORED_START) {
873 /* start itself doesn't need to be in history buffer
874 * just need to make sure all succ literals are ok */
875 if (ig[t].type == RIV_LITERAL) {
876 max_width = ig[t].s.length();
877 } else {
878 max_width = 0;
879 }
880 if (ig[e].graph) {
881 depth graph_max_width = findMaxWidth(*ig[e].graph);
882 DEBUG_PRINTF("graph max width %s, lag %u\n",
883 graph_max_width.str().c_str(),
884 ig[e].graph_lag);
885 if (!graph_max_width.is_finite()) {
886 DEBUG_PRINTF("fail due to graph with inf max width\n");
887 return false;
888 }
889 max_width += graph_max_width;
890 }
891 } else if (ig[e].haig) {
892 DEBUG_PRINTF("fail due to haig\n");
893 return false;
894 } else if (ig[e].graph) {
895 depth graph_max_width = findMaxWidth(*ig[e].graph);
896 DEBUG_PRINTF("graph max width %s, lag %u\n",
897 graph_max_width.str().c_str(), ig[e].graph_lag);
898 if (!graph_max_width.is_finite()) {
899 DEBUG_PRINTF("fail due to graph with inf max width\n");
900 return false;
901 }
902 max_width = ig[e].graph_lag + graph_max_width;
903 } else {
904 max_width = ig[e].maxBound;
905 if (ig[t].type == RIV_LITERAL) {
906 max_width += ig[t].s.length();
907 }
908 }
909
910 max_width += max_depth_from_eod[t];
911 if (max_width > ROSE_BOUND_INF) {
912 max_width = ROSE_BOUND_INF;
913 }
914
915 DEBUG_PRINTF("max_width=%llu\n", max_width);
916
917 ENSURE_AT_LEAST(&v_depth, (u32)max_width);
918 }
919
920 if (v_depth == ROSE_BOUND_INF
921 || v_depth > cc.grey.maxHistoryAvailable) {
922 DEBUG_PRINTF("not suitable for eod table %u\n", v_depth);
923 return false;
924 }
925
926 max_depth_from_eod[v] = v_depth;
927 ENSURE_AT_LEAST(max_len, v_depth);
928 }
929
930 DEBUG_PRINTF("to the eod table and beyond\n");
931 return true;
932}
933
934static
935void shift_accepts_to_end(const RoseInGraph &ig,
936 vector<RoseInVertex> &topo_order) {
937 stable_partition(begin(topo_order), end(topo_order),
938 [&](RoseInVertex v){ return !is_any_accept(v, ig); });
939}
940
941static
942void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) {
943 const RoseInGraph &ig = bd.ig;
944
945 /* add the pattern in to the main rose graph */
946 DEBUG_PRINTF("%srose pop\n", bd.som ? "som " : "");
947
948 /* Note: an input vertex may need to create several rose vertices. This is
949 * primarily because a RoseVertex can only have 1 one leftfix */
950 map<RoseInVertex, vector<RoseVertex> > vertex_map;
951
952 vector<RoseInVertex> v_order = topo_order(ig);
953 shift_accepts_to_end(ig, v_order);
954
955 u32 eod_space_required;
956 bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required,
957 tbi->cc);
958 if (use_eod_table) {
959 ENSURE_AT_LEAST(&tbi->ematcher_region_size, eod_space_required);
960 }
961
962 assert(ig[v_order.front()].type == RIV_START
963 || ig[v_order.front()].type == RIV_ANCHORED_START);
964
965 for (RoseInVertex iv : v_order) {
966 DEBUG_PRINTF("vertex %zu\n", ig[iv].index);
967
968 if (ig[iv].type == RIV_START) {
969 DEBUG_PRINTF("is root\n");
970 vertex_map[iv].push_back(tbi->root);
971 continue;
972 } else if (ig[iv].type == RIV_ANCHORED_START) {
973 DEBUG_PRINTF("is anchored root\n");
974 vertex_map[iv].push_back(tbi->anchored_root);
975 continue;
976 }
977
978 vector<pair<RoseVertex, RoseInEdge> > parents;
979 for (const auto &e : in_edges_range(iv, ig)) {
980 RoseInVertex u = source(e, ig);
981 assert(contains(vertex_map, u));
982 const vector<RoseVertex> &images = vertex_map[u];
983
984 // We should have no dupes.
985 assert(set<RoseVertex>(images.begin(), images.end()).size()
986 == images.size());
987
988 for (auto v_image : images) {
989 // v_image should NOT already be in our parents list.
990 assert(find_if(parents.begin(), parents.end(),
991 [&v_image](const pair<RoseVertex, RoseInEdge> &p) {
992 return p.first == v_image;
993 }) == parents.end());
994
995 parents.emplace_back(v_image, e);
996
997 if (tbi->isAnchored(v_image)) {
998 assert(!use_eod_table);
999 u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[iv]);
1000 assert(overlap <= tbi->cc.grey.maxHistoryAvailable + 1);
1001 ENSURE_AT_LEAST(&tbi->max_rose_anchored_floating_overlap,
1002 overlap);
1003 }
1004 }
1005 }
1006
1007 if (ig[iv].type == RIV_LITERAL) {
1008 DEBUG_PRINTF("LITERAL '%s'\n", dumpString(ig[iv].s).c_str());
1009 assert(!isLeafNode(iv, ig));
1010 doRoseLiteralVertex(tbi, use_eod_table, vertex_map, parents, iv,
1011 bd);
1012 } else {
1013 if (ig[iv].type == RIV_ACCEPT) {
1014 DEBUG_PRINTF("ACCEPT\n");
1015 } else {
1016 assert(ig[iv].type == RIV_ACCEPT_EOD);
1017 DEBUG_PRINTF("ACCEPT_EOD\n");
1018 }
1019 assert(isLeafNode(iv, ig)); /* accepts are final */
1020 doRoseAcceptVertex(tbi, parents, iv, bd);
1021 }
1022 }
1023 DEBUG_PRINTF("done\n");
1024}
1025
1026template<typename GraphT>
1027static
1028bool empty(const GraphT &g) {
1029 typename GraphT::vertex_iterator vi, ve;
1030 tie(vi, ve) = vertices(g);
1031 return vi == ve;
1032}
1033
1034static
1035bool canImplementGraph(NGHolder &h, bool prefilter, const ReportManager &rm,
1036 const CompileContext &cc) {
1037 if (isImplementableNFA(h, &rm, cc)) {
1038 return true;
1039 }
1040
1041 if (prefilter && cc.grey.prefilterReductions) {
1042 // If we're prefiltering, we can have another go with a reduced graph.
1043 UNUSED size_t numBefore = num_vertices(h);
1044 prefilterReductions(h, cc);
1045 UNUSED size_t numAfter = num_vertices(h);
1046 DEBUG_PRINTF("reduced from %zu to %zu vertices\n", numBefore, numAfter);
1047
1048 if (isImplementableNFA(h, &rm, cc)) {
1049 return true;
1050 }
1051 }
1052
1053 DEBUG_PRINTF("unable to build engine\n");
1054 return false;
1055}
1056
1057static
1058bool predsAreDelaySensitive(const RoseInGraph &ig, RoseInVertex v) {
1059 assert(in_degree(v, ig));
1060
1061 for (const auto &e : in_edges_range(v, ig)) {
1062 if (ig[e].graph || ig[e].haig) {
1063 DEBUG_PRINTF("edge graph\n");
1064 return true;
1065 }
1066 if (ig[e].minBound || ig[e].maxBound != ROSE_BOUND_INF) {
1067 DEBUG_PRINTF("edge bounds\n");
1068 return true;
1069 }
1070
1071 RoseInVertex u = source(e, ig);
1072 if (ig[u].type == RIV_START) {
1073 continue;
1074 }
1075 if (ig[u].type != RIV_LITERAL) {
1076 DEBUG_PRINTF("unsafe pred vertex\n");
1077 return true;
1078 }
1079 if (ig[u].delay) {
1080 DEBUG_PRINTF("pred has delay\n");
1081 return true;
1082 }
1083 }
1084
1085 return false;
1086}
1087
1088static
1089u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) {
1090 /* overly conservative if only part of the string is nocase */
1091 string pred = pred_key.get_string();
1092 string lit = lit_key.get_string();
1093
1094 if (pred_key.any_nocase() || lit_key.any_nocase()) {
1095 upperString(pred);
1096 upperString(lit);
1097 }
1098
1099 string::size_type last = pred.rfind(lit);
1100 if (last == string::npos) {
1101 return MAX_DELAY;
1102 }
1103
1104 u32 raw = pred.size() - last - 1;
1105 return MIN(raw, MAX_DELAY);
1106}
1107
1108static
1109u32 findMaxSafeDelay(const RoseInGraph &ig, RoseInVertex u, RoseInVertex v) {
1110 // First, check the overlap constraints on (u,v).
1111 size_t max_delay;
1112 if (ig[v].type == RIV_LITERAL) {
1113 DEBUG_PRINTF("lit->lit edge: '%s' -> '%s'\n",
1114 escapeString(ig[u].s).c_str(),
1115 escapeString(ig[v].s).c_str());
1116 max_delay = maxAvailableDelay(ig[u].s, ig[v].s);
1117 } else if (ig[v].type == RIV_ACCEPT) {
1118 DEBUG_PRINTF("lit->accept edge: '%s' -> ACCEPT\n",
1119 escapeString(ig[u].s).c_str());
1120 max_delay = MAX_DELAY;
1121 } else {
1122 assert(0);
1123 return 0;
1124 }
1125
1126 DEBUG_PRINTF("max safe delay for this edge: %zu\n", max_delay);
1127
1128 // Now consider the predecessors of u.
1129 for (const auto &e : in_edges_range(u, ig)) {
1130 RoseInVertex w = source(e, ig);
1131 if (ig[w].type == RIV_START) {
1132 continue;
1133 }
1134 assert(ig[w].type == RIV_LITERAL);
1135 assert(ig[w].delay == 0);
1136
1137 DEBUG_PRINTF("pred lit->lit edge: '%s' -> '%s'\n",
1138 escapeString(ig[w].s).c_str(),
1139 escapeString(ig[u].s).c_str());
1140
1141 // We cannot delay the literal on u so much that a predecessor literal
1142 // could occur in the delayed region. For example, consider
1143 // 'barman.*foobar': if we allow 'foobar' to be delayed by 3, then
1144 // 'barman' could occur in the input string and race with 'foobar', as
1145 // in 'foobarman'.
1146
1147 const size_t pred_len = ig[w].s.length();
1148 size_t overlap = maxOverlap(ig[u].s, ig[w].s, 0);
1149 DEBUG_PRINTF("pred_len=%zu, overlap=%zu\n", pred_len, overlap);
1150 assert(overlap <= pred_len);
1151 size_t max_lit_delay = pred_len - min(overlap + 1, pred_len);
1152 DEBUG_PRINTF("overlap=%zu -> max_lit_delay=%zu\n", overlap,
1153 max_lit_delay);
1154 max_delay = min(max_delay, max_lit_delay);
1155 }
1156
1157 DEBUG_PRINTF("max_delay=%zu\n", max_delay);
1158 assert(max_delay <= MAX_DELAY);
1159 return max_delay;
1160}
1161
1162static
1163bool transformInfixToDelay(const RoseInGraph &ig, const RoseInEdge &e,
1164 const CompileContext &cc, u32 *delay_out) {
1165 const u32 max_history =
1166 cc.streaming ? cc.grey.maxHistoryAvailable : ROSE_BOUND_INF;
1167
1168 const RoseInVertex u = source(e, ig), v = target(e, ig);
1169 const u32 graph_lag = ig[e].graph_lag;
1170
1171 // Clone a copy of the graph, as we need to be able to roll back this
1172 // operation.
1173 NGHolder h;
1174 cloneHolder(h, *ig[e].graph);
1175
1176 DEBUG_PRINTF("target literal: %s\n", dumpString(ig[v].s).c_str());
1177 DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
1178 graph_lag);
1179
1180 assert(graph_lag <= ig[v].s.length());
1181 if (graph_lag < ig[v].s.length()) {
1182 size_t len = ig[v].s.length() - graph_lag;
1183 ue2_literal lit(ig[v].s.substr(0, len));
1184 DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
1185 u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
1186 if (delay2 == MO_INVALID_IDX) {
1187 DEBUG_PRINTF("couldn't remove trailing literal\n");
1188 return false;
1189 }
1190 if (delay2 != len) {
1191 DEBUG_PRINTF("couldn't remove entire trailing literal\n");
1192 return false;
1193 }
1194 }
1195
1196 PureRepeat repeat;
1197 if (!isPureRepeat(h, repeat)) {
1198 DEBUG_PRINTF("graph is not repeat\n");
1199 return false;
1200 }
1201 DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
1202 if (!repeat.bounds.max.is_infinite()) {
1203 DEBUG_PRINTF("not inf\n");
1204 return false;
1205 }
1206
1207 if (!repeat.reach.all()) {
1208 DEBUG_PRINTF("non-dot reach\n");
1209 return false;
1210 }
1211
1212 u32 delay = ig[v].s.length() + repeat.bounds.min;
1213 if (delay > MAX_DELAY) {
1214 DEBUG_PRINTF("delay %u > MAX_DELAY\n", delay);
1215 return false;
1216 }
1217
1218 if (delay + ig[u].s.length() - 1 > max_history) {
1219 DEBUG_PRINTF("delay too large for history\n");
1220 return false;
1221 }
1222
1223 *delay_out = delay;
1224 return true;
1225}
1226
1227static
1228void transformLiteralDelay(RoseInGraph &ig, const CompileContext &cc) {
1229 if (!cc.grey.roseTransformDelay) {
1230 return;
1231 }
1232
1233 for (auto u : vertices_range(ig)) {
1234 if (ig[u].type != RIV_LITERAL) {
1235 continue;
1236 }
1237 if (out_degree(u, ig) != 1) {
1238 continue;
1239 }
1240
1241 RoseInEdge e = *out_edges(u, ig).first;
1242 RoseInVertex v = target(e, ig);
1243 if (ig[v].type != RIV_LITERAL) {
1244 continue;
1245 }
1246 if (ig[e].haig) {
1247 continue;
1248 }
1249 if (!ig[e].graph) {
1250 continue;
1251 }
1252
1253 if (predsAreDelaySensitive(ig, u)) {
1254 DEBUG_PRINTF("preds are delay sensitive\n");
1255 continue;
1256 }
1257
1258 u32 max_delay = findMaxSafeDelay(ig, u, v);
1259
1260 DEBUG_PRINTF("lit->lit edge with graph: '%s' -> '%s'\n",
1261 escapeString(ig[u].s).c_str(),
1262 escapeString(ig[v].s).c_str());
1263
1264 u32 delay = 0;
1265 if (!transformInfixToDelay(ig, e, cc, &delay)) {
1266 continue;
1267 }
1268
1269 if (delay > max_delay) {
1270 DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
1271 continue;
1272 }
1273
1274 DEBUG_PRINTF("setting lit delay to %u and deleting graph\n", delay);
1275 ig[u].delay = delay;
1276 ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
1277 ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
1278 ig[e].graph_lag = 0;
1279 ig[e].graph.reset();
1280 ig[e].minBound = 0;
1281 ig[e].maxBound = ROSE_BOUND_INF;
1282 }
1283}
1284
1285static
1286bool transformInfixToAnchBounds(const RoseInGraph &ig, const RoseInEdge &e,
1287 const CompileContext &cc, DepthMinMax *bounds) {
1288 const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
1289 : ROSE_BOUND_INF;
1290
1291 const RoseInVertex v = target(e, ig);
1292 const u32 graph_lag = ig[e].graph_lag;
1293
1294 // Clone a copy of the graph, as we need to be able to roll back this
1295 // operation.
1296 NGHolder h;
1297 cloneHolder(h, *ig[e].graph);
1298
1299 DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
1300 graph_lag);
1301
1302 assert(graph_lag <= ig[v].s.length());
1303 if (graph_lag < ig[v].s.length()) {
1304 size_t len = ig[v].s.length() - graph_lag;
1305 ue2_literal lit(ig[v].s.substr(0, len));
1306 DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
1307 u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
1308 if (delay2 == MO_INVALID_IDX) {
1309 DEBUG_PRINTF("couldn't remove trailing literal\n");
1310 return false;
1311 }
1312 if (delay2 != len) {
1313 DEBUG_PRINTF("couldn't remove entire trailing literal\n");
1314 return false;
1315 }
1316 }
1317
1318 PureRepeat repeat;
1319 if (!isPureRepeat(h, repeat)) {
1320 DEBUG_PRINTF("graph is not repeat\n");
1321 return false;
1322 }
1323 DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
1324 if (!repeat.bounds.max.is_infinite()) {
1325 DEBUG_PRINTF("not inf\n");
1326 return false;
1327 }
1328
1329 if (!repeat.reach.all()) {
1330 DEBUG_PRINTF("non-dot reach\n");
1331 return false;
1332 }
1333
1334 *bounds = repeat.bounds;
1335 return true;
1336}
1337
1338static
1339void transformAnchoredLiteralOverlap(RoseInGraph &ig, RoseBuildData &bd,
1340 const CompileContext &cc) {
1341 if (!cc.grey.roseTransformDelay) {
1342 return;
1343 }
1344
1345 for (const auto &e : edges_range(ig)) {
1346 const RoseInVertex u = source(e, ig);
1347 const RoseInVertex v = target(e, ig);
1348
1349 if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) {
1350 continue;
1351 }
1352 if (ig[e].haig || !ig[e].graph) {
1353 continue;
1354 }
1355
1356 if (ig[u].min_offset != ig[u].max_offset) {
1357 DEBUG_PRINTF("u not fixed depth\n");
1358 continue;
1359 }
1360
1361 DEBUG_PRINTF("anch_lit->lit edge with graph: '%s' -> '%s'\n",
1362 escapeString(ig[u].s).c_str(),
1363 escapeString(ig[v].s).c_str());
1364
1365 DepthMinMax bounds;
1366 if (!transformInfixToAnchBounds(ig, e, cc, &bounds)) {
1367 continue;
1368 }
1369
1370 DEBUG_PRINTF("setting bounds to %s and deleting graph\n",
1371 bounds.str().c_str());
1372 ig[e].graph_lag = 0;
1373 ig[e].graph.reset();
1374 ig[e].minBound = bounds.min;
1375 ig[e].maxBound = bounds.max.is_finite() ? (u32)bounds.max
1376 : ROSE_BOUND_INF;
1377 bd.anch_history_edges.insert(e);
1378 }
1379}
1380
1381/**
1382 * \brief Transform small trailing dot repeat suffixes into delay on the last
1383 * literal.
1384 *
1385 * For example, the case /hatstand.*teakettle./s can just delay 'teakettle' +1
1386 * rather than having a suffix to handle the dot.
1387 *
1388 * This transformation looks for literal->accept edges and transforms them if
1389 * appropriate. It doesn't handle complex cases where the literal has more than
1390 * one successor.
1391 */
1392static
1393void transformSuffixDelay(RoseInGraph &ig, const CompileContext &cc) {
1394 if (!cc.grey.roseTransformDelay) {
1395 return;
1396 }
1397
1398 const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
1399 : ROSE_BOUND_INF;
1400
1401 set<RoseInVertex> modified_accepts; // may be dead after transform
1402
1403 for (auto u : vertices_range(ig)) {
1404 if (ig[u].type != RIV_LITERAL) {
1405 continue;
1406 }
1407 if (out_degree(u, ig) != 1) {
1408 continue;
1409 }
1410
1411 RoseInEdge e = *out_edges(u, ig).first;
1412 RoseInVertex v = target(e, ig);
1413 if (ig[v].type != RIV_ACCEPT) {
1414 continue;
1415 }
1416 if (ig[e].haig) {
1417 continue;
1418 }
1419 if (!ig[e].graph) {
1420 continue;
1421 }
1422
1423 if (predsAreDelaySensitive(ig, u)) {
1424 DEBUG_PRINTF("preds are delay sensitive\n");
1425 continue;
1426 }
1427
1428 DEBUG_PRINTF("lit->accept edge with graph: lit='%s'\n",
1429 escapeString(ig[u].s).c_str());
1430
1431 const NGHolder &h = *ig[e].graph;
1432 const set<ReportID> reports = all_reports(h);
1433 if (reports.size() != 1) {
1434 DEBUG_PRINTF("too many reports\n");
1435 continue;
1436 }
1437
1438 PureRepeat repeat;
1439 if (!isPureRepeat(h, repeat)) {
1440 DEBUG_PRINTF("suffix graph is not repeat\n");
1441 continue;
1442 }
1443 DEBUG_PRINTF("suffix graph is %s repeat\n",
1444 repeat.bounds.str().c_str());
1445
1446 if (!repeat.reach.all()) {
1447 DEBUG_PRINTF("non-dot reach\n");
1448 continue;
1449 }
1450
1451 if (repeat.bounds.min != repeat.bounds.max ||
1452 repeat.bounds.min > depth(MAX_DELAY)) {
1453 DEBUG_PRINTF("repeat is variable or too large\n");
1454 continue;
1455 }
1456
1457 u32 max_delay = findMaxSafeDelay(ig, u, v);
1458
1459 u32 delay = repeat.bounds.min;
1460 if (delay > max_delay) {
1461 DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
1462 continue;
1463 }
1464
1465 if (delay + ig[u].s.length() - 1 > max_history) {
1466 DEBUG_PRINTF("delay too large for history\n");
1467 continue;
1468 }
1469
1470 DEBUG_PRINTF("setting lit delay to %u and removing suffix\n", delay);
1471 ig[u].delay = delay;
1472 ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
1473 ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
1474
1475 // Construct a new accept vertex for this report and remove edge e.
1476 // (This allows us to cope if v has more than one in-edge).
1477 RoseInVertex v2 =
1478 add_vertex(RoseInVertexProps::makeAccept(reports), ig);
1479 add_edge(u, v2, ig);
1480 remove_edge(e, ig);
1481 modified_accepts.insert(v);
1482 }
1483
1484 DEBUG_PRINTF("%zu modified accepts\n", modified_accepts.size());
1485
1486 for (auto v : modified_accepts) {
1487 if (in_degree(v, ig) == 0) {
1488 DEBUG_PRINTF("removing accept vertex with no preds\n");
1489 remove_vertex(v, ig);
1490 }
1491 }
1492}
1493
1494#ifndef NDEBUG
1495static
1496bool validateKinds(const RoseInGraph &g) {
1497 for (const auto &e : edges_range(g)) {
1498 if (g[e].graph && g[e].graph->kind != whatRoseIsThis(g, e)) {
1499 return false;
1500 }
1501 }
1502
1503 return true;
1504}
1505#endif
1506
1507bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) {
1508 DEBUG_PRINTF("trying to rose\n");
1509 assert(validateKinds(ig));
1510 assert(hasCorrectlyNumberedVertices(ig));
1511
1512 if (::ue2::empty(ig)) {
1513 assert(0);
1514 return false;
1515 }
1516
1517 const unique_ptr<RoseInGraph> in_ptr = cloneRoseGraph(ig);
1518 RoseInGraph &in = *in_ptr;
1519
1520 RoseBuildData bd(in, false);
1521
1522 transformLiteralDelay(in, cc);
1523 transformAnchoredLiteralOverlap(in, bd, cc);
1524 transformSuffixDelay(in, cc);
1525
1526 renumber_vertices(in);
1527 assert(validateKinds(in));
1528
1529 insertion_ordered_map<NGHolder *, vector<RoseInEdge>> graphs;
1530
1531 for (const auto &e : edges_range(in)) {
1532 if (!in[e].graph) {
1533 assert(!in[e].dfa);
1534 assert(!in[e].haig);
1535 continue; // no graph
1536 }
1537
1538 if (in[e].haig || in[e].dfa) {
1539 /* Early DFAs/Haigs are always implementable (we've already built
1540 * the raw DFA). */
1541 continue;
1542 }
1543
1544 NGHolder *h = in[e].graph.get();
1545
1546 assert(isCorrectlyTopped(*h));
1547 graphs[h].push_back(e);
1548 }
1549
1550 vector<RoseInEdge> graph_edges;
1551
1552 for (const auto &m : graphs) {
1553 NGHolder *h = m.first;
1554 if (!canImplementGraph(*h, prefilter, rm, cc)) {
1555 return false;
1556 }
1557 insert(&graph_edges, graph_edges.end(), m.second);
1558 }
1559
1560 /* we are now past the point of no return. We can start making irreversible
1561 changes to the rose graph, etc */
1562
1563 for (const auto &e : graph_edges) {
1564 assert(in[e].graph);
1565 assert(!in[e].haig);
1566 NGHolder &h = *in[e].graph;
1567 DEBUG_PRINTF("handling %p\n", &h);
1568 assert(allMatchStatesHaveReports(h));
1569
1570 if (!generates_callbacks(whatRoseIsThis(in, e))
1571 && in[target(e, in)].type != RIV_ACCEPT_EOD) {
1572 set_report(h, getNewNfaReport());
1573 }
1574 }
1575
1576 populateRoseGraph(this, bd);
1577
1578 return true;
1579}
1580
1581bool RoseBuildImpl::addSombeRose(const RoseInGraph &ig) {
1582 DEBUG_PRINTF("rose is trying to consume a sombe\n");
1583 assert(validateKinds(ig));
1584
1585 if (::ue2::empty(ig)) {
1586 assert(0);
1587 return false;
1588 }
1589
1590 RoseBuildData bd(ig, true);
1591
1592 for (const auto &e : edges_range(ig)) {
1593 if (!ig[e].graph) {
1594 continue; // no graph
1595 }
1596 DEBUG_PRINTF("handling %p\n", ig[e].graph.get());
1597 assert(allMatchStatesHaveReports(*ig[e].graph));
1598 assert(ig[e].haig);
1599 }
1600
1601 populateRoseGraph(this, bd);
1602
1603 return true;
1604}
1605
1606bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
1607 const ReportManager &rm, const CompileContext &cc) {
1608 assert(validateKinds(ig));
1609
1610 if (::ue2::empty(ig)) {
1611 assert(0);
1612 return false;
1613 }
1614
1615 vector<NGHolder *> graphs;
1616
1617 for (const auto &e : edges_range(ig)) {
1618 if (!ig[e].graph) {
1619 continue; // no graph
1620 }
1621
1622 if (ig[e].haig) {
1623 // Haigs are always implementable (we've already built the raw DFA).
1624 continue;
1625 }
1626
1627 graphs.push_back(ig[e].graph.get());
1628 }
1629
1630 for (const auto &g : graphs) {
1631 if (!canImplementGraph(*g, prefilter, rm, cc)) {
1632 return false;
1633 }
1634 }
1635
1636 return true;
1637}
1638
1639void RoseBuildImpl::add(bool anchored, bool eod, const ue2_literal &lit,
1640 const flat_set<ReportID> &reports) {
1641 assert(!reports.empty());
1642
1643 if (cc.grey.floodAsPuffette && !anchored && !eod && is_flood(lit) &&
1644 lit.length() > 3) {
1645 DEBUG_PRINTF("adding as puffette\n");
1646 const CharReach &cr = *lit.begin();
1647 for (const auto &report : reports) {
1648 addOutfix(raw_puff(lit.length(), true, report, cr, true));
1649 }
1650
1651 return;
1652 }
1653
1654 RoseInGraph ig;
1655 RoseInVertex start = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
1656 RoseInVertex accept = add_vertex(
1657 eod ? RoseInVertexProps::makeAcceptEod(set<ReportID>())
1658 : RoseInVertexProps::makeAccept(set<ReportID>()), ig);
1659 RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
1660
1661 add_edge(start, v, RoseInEdgeProps(0U, anchored ? 0U : ROSE_BOUND_INF), ig);
1662 add_edge(v, accept, RoseInEdgeProps(0U, 0U), ig);
1663
1664 calcVertexOffsets(ig);
1665
1666 ig[accept].reports.insert(reports.begin(), reports.end());
1667
1668 addRose(ig, false);
1669}
1670
1671static
1672u32 findMaxBAWidth(const NGHolder &h) {
1673 // Must be bi-anchored: no out-edges from startDs (other than its
1674 // self-loop), no in-edges to accept.
1675 if (out_degree(h.startDs, h) > 1 || in_degree(h.accept, h)) {
1676 return ROSE_BOUND_INF;
1677 }
1678 depth d = findMaxWidth(h);
1679 assert(d.is_reachable());
1680
1681 if (!d.is_finite()) {
1682 return ROSE_BOUND_INF;
1683 }
1684 return d;
1685}
1686
1687static
1688void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h,
1689 const RoseBuildImpl &tbi) {
1690 outfix.maxBAWidth = findMaxBAWidth(h);
1691 outfix.minWidth = findMinWidth(h);
1692 outfix.maxWidth = findMaxWidth(h);
1693 outfix.maxOffset = findMaxOffset(h, tbi.rm);
1694 populateReverseAccelerationInfo(outfix.rev_info, h);
1695}
1696
1697static
1698bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) {
1699 map<flat_set<ReportID>, ReportID> report_remap;
1700 shared_ptr<NGHolder> eod_leftfix
1701 = makeRoseEodPrefix(h, build, report_remap);
1702
1703 bool nfa_ok = isImplementableNFA(h, &build.rm, build.cc);
1704
1705 /* TODO: check if early dfa is possible */
1706
1707 if (!nfa_ok) {
1708 DEBUG_PRINTF("could not build as NFA\n");
1709 return false;
1710 }
1711
1712 u32 eod_event = getEodEventID(build);
1713
1714 auto &g = build.g;
1715 for (const auto &report_mapping : report_remap) {
1716 RoseVertex v = add_vertex(g);
1717 g[v].literals.insert(eod_event);
1718 build.literal_info[eod_event].vertices.insert(v);
1719
1720 g[v].left.graph = eod_leftfix;
1721 g[v].left.leftfix_report = report_mapping.second;
1722 g[v].left.lag = 0;
1723 RoseEdge e1 = add_edge(build.anchored_root, v, g);
1724 g[e1].minBound = 0;
1725 g[e1].maxBound = ROSE_BOUND_INF;
1726 g[v].min_offset = findMinWidth(*eod_leftfix);
1727 g[v].max_offset = ROSE_BOUND_INF;
1728
1729 depth max_width = findMaxWidth(*g[v].left.graph);
1730 if (max_width.is_finite() && isPureAnchored(*eod_leftfix)) {
1731 g[e1].maxBound = max_width;
1732 g[v].max_offset = max_width;
1733 }
1734
1735 g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix
1736 RoseVertex w = add_vertex(g);
1737 g[w].eod_accept = true;
1738 g[w].reports = report_mapping.first;
1739 g[w].min_offset = g[v].min_offset;
1740 g[w].max_offset = g[v].max_offset;
1741 RoseEdge e = add_edge(v, w, g);
1742 g[e].minBound = 0;
1743 g[e].maxBound = 0;
1744 g[e].history = ROSE_ROLE_HISTORY_NONE;
1745 DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index);
1746 }
1747
1748 return true;
1749}
1750
1751bool RoseBuildImpl::addOutfix(const NGHolder &h) {
1752 DEBUG_PRINTF("%zu vertices, %zu edges\n", num_vertices(h), num_edges(h));
1753
1754 /* TODO: handle more than one report */
1755 if (!in_degree(h.accept, h)
1756 && all_reports(h).size() == 1
1757 && addEodOutfix(*this, h)) {
1758 return true;
1759 }
1760
1761 const u32 nfa_states = isImplementableNFA(h, &rm, cc);
1762 if (nfa_states) {
1763 DEBUG_PRINTF("implementable as an NFA in %u states\n", nfa_states);
1764 } else {
1765 DEBUG_PRINTF("not implementable as an NFA\n");
1766 }
1767
1768 bool dfa_cand = !nfa_states || nfa_states > 128 /* slow model */
1769 || can_exhaust(h, rm); /* can be pruned */
1770
1771 unique_ptr<raw_dfa> rdfa;
1772
1773 if (!nfa_states || cc.grey.roseMcClellanOutfix == 2 ||
1774 (cc.grey.roseMcClellanOutfix == 1 && dfa_cand)) {
1775 rdfa = buildMcClellan(h, &rm, cc.grey);
1776 }
1777
1778 if (!nfa_states && !rdfa) {
1779 DEBUG_PRINTF("could not build as either an NFA or a DFA\n");
1780 return false;
1781 }
1782
1783 if (rdfa) {
1784 outfixes.push_back(OutfixInfo(move(rdfa)));
1785 } else {
1786 outfixes.push_back(OutfixInfo(cloneHolder(h)));
1787 }
1788
1789 populateOutfixInfo(outfixes.back(), h, *this);
1790
1791 return true;
1792}
1793
1794bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) {
1795 DEBUG_PRINTF("haig with %zu states\n", haig.states.size());
1796
1797 outfixes.push_back(OutfixInfo(ue2::make_unique<raw_som_dfa>(haig)));
1798 populateOutfixInfo(outfixes.back(), h, *this);
1799
1800 return true; /* failure is not yet an option */
1801}
1802
1803bool RoseBuildImpl::addOutfix(const raw_puff &rp) {
1804 if (!mpv_outfix) {
1805 mpv_outfix = make_unique<OutfixInfo>(MpvProto());
1806 }
1807
1808 auto *mpv = mpv_outfix->mpv();
1809 assert(mpv);
1810 mpv->puffettes.push_back(rp);
1811
1812 mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
1813 mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
1814 mpv_outfix->maxWidth = rp.unbounded
1815 ? depth::infinity()
1816 : max(mpv_outfix->maxWidth, depth(rp.repeats));
1817
1818 if (mpv_outfix->maxOffset == ROSE_BOUND_INF || rp.unbounded) {
1819 mpv_outfix->maxOffset = ROSE_BOUND_INF;
1820 } else {
1821 mpv_outfix->maxOffset = MAX(mpv_outfix->maxOffset, rp.repeats);
1822 }
1823
1824 return true; /* failure is not yet an option */
1825}
1826
1827bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out,
1828 u32 *event_out) {
1829 if (!mpv_outfix) {
1830 mpv_outfix = make_unique<OutfixInfo>(MpvProto());
1831 }
1832
1833 auto *mpv = mpv_outfix->mpv();
1834 assert(mpv);
1835 mpv->triggered_puffettes.push_back(rp);
1836
1837 mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
1838 mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
1839 mpv_outfix->maxWidth = rp.unbounded
1840 ? depth::infinity()
1841 : max(mpv_outfix->maxWidth, depth(rp.repeats));
1842
1843 mpv_outfix->maxOffset = ROSE_BOUND_INF; /* TODO: we could get information from
1844 * the caller */
1845
1846 *queue_out = mpv_outfix->get_queue(qif);
1847 *event_out = MQE_TOP_FIRST + mpv->triggered_puffettes.size() - 1;
1848
1849 return true; /* failure is not yet an option */
1850}
1851
1852static
1853bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
1854 NFAVertex u,
1855 const vector<DepthMinMax> &vertexDepths,
1856 map<u32, DepthMinMax> &depthMap,
1857 map<NFAVertex, set<u32>> &reportMap,
1858 map<ReportID, u32> &allocated_reports,
1859 flat_set<u32> &added_lit_ids) {
1860 const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion);
1861 const size_t index = w[u].index;
1862 assert(index < vertexDepths.size());
1863 const DepthMinMax &d = vertexDepths.at(index);
1864
1865 for (const auto &int_report : w[u].reports) {
1866 assert(int_report != MO_INVALID_IDX);
1867
1868 u32 lit_id;
1869 if (!contains(allocated_reports, int_report)) {
1870 lit_id = tbi.getNewLiteralId();
1871 added_lit_ids.insert(lit_id);
1872 allocated_reports[int_report] = lit_id;
1873 } else {
1874 lit_id = allocated_reports[int_report];
1875 }
1876
1877 reportMap[u].insert(lit_id);
1878
1879 if (!contains(depthMap, lit_id)) {
1880 depthMap[lit_id] = d;
1881 } else {
1882 depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d);
1883 }
1884
1885 if (depthMap[lit_id].max > max_anchored_depth) {
1886 DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n",
1887 depthMap[lit_id].max.str().c_str(),
1888 tbi.cc.grey.maxAnchoredRegion);
1889 return false;
1890 }
1891 }
1892
1893 return true;
1894}
1895
1896// Failure path for addAnchoredAcyclic: removes the literal IDs that have been
1897// added to support anchored NFAs. Assumes that they are a contiguous range at
1898// the end of the RoseBuildImpl::literal_info vector.
1899static
1900void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set<u32> &lit_ids) {
1901 if (lit_ids.empty()) {
1902 return;
1903 }
1904
1905 DEBUG_PRINTF("remove last %zu literals\n", lit_ids.size());
1906
1907 // lit_ids should be a contiguous range.
1908 assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1);
1909 assert(*lit_ids.rbegin() == tbi.literals.size() - 1);
1910
1911 assert(all_of_in(lit_ids, [&](u32 lit_id) {
1912 return lit_id < tbi.literal_info.size() &&
1913 tbi.literals.at(lit_id).table == ROSE_ANCHORED &&
1914 tbi.literal_info[lit_id].vertices.empty();
1915 }));
1916
1917 tbi.literals.erase_back(lit_ids.size());
1918 assert(tbi.literals.size() == *lit_ids.begin());
1919
1920 // lit_ids should be at the end of tbi.literal_info.
1921 assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1);
1922 tbi.literal_info.resize(*lit_ids.begin()); // remove all ids in lit_ids
1923}
1924
1925bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
1926 auto vertexDepths = calcDepthsFrom(h, h.start);
1927
1928 map<NFAVertex, set<u32> > reportMap; /* NFAVertex -> literal ids */
1929 map<u32, DepthMinMax> depthMap; /* literal id -> min/max depth */
1930 map<ReportID, u32> allocated_reports; /* report -> literal id */
1931 flat_set<u32> added_lit_ids; /* literal ids added for this NFA */
1932
1933 for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
1934 if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
1935 reportMap, allocated_reports,
1936 added_lit_ids)) {
1937 removeAddedLiterals(*this, added_lit_ids);
1938 return false;
1939 }
1940 }
1941
1942 map<ReportID, u32> allocated_reports_eod; /* report -> literal id */
1943
1944 for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
1945 if (v == h.accept) {
1946 continue;
1947 }
1948 if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
1949 reportMap, allocated_reports_eod,
1950 added_lit_ids)) {
1951 removeAddedLiterals(*this, added_lit_ids);
1952 return false;
1953 }
1954 }
1955
1956 assert(!reportMap.empty());
1957
1958 int rv = addAnchoredNFA(*this, h, reportMap);
1959 if (rv != ANCHORED_FAIL) {
1960 assert(rv != ANCHORED_REMAP);
1961 DEBUG_PRINTF("added anchored nfa\n");
1962 /* add edges to the rose graph to bubble the match up */
1963 for (const auto &m : allocated_reports) {
1964 const ReportID &report = m.first;
1965 const u32 &lit_id = m.second;
1966 assert(depthMap[lit_id].max.is_finite());
1967 u32 minBound = depthMap[lit_id].min;
1968 u32 maxBound = depthMap[lit_id].max;
1969 RoseVertex v
1970 = createAnchoredVertex(this, lit_id, minBound, maxBound);
1971 g[v].reports.insert(report);
1972 }
1973
1974 for (const auto &m : allocated_reports_eod) {
1975 const ReportID &report = m.first;
1976 const u32 &lit_id = m.second;
1977 assert(depthMap[lit_id].max.is_finite());
1978 u32 minBound = depthMap[lit_id].min;
1979 u32 maxBound = depthMap[lit_id].max;
1980 RoseVertex v
1981 = createAnchoredVertex(this, lit_id, minBound, maxBound);
1982 RoseVertex eod = add_vertex(g);
1983 g[eod].eod_accept = true;
1984 g[eod].reports.insert(report);
1985 g[eod].min_offset = g[v].min_offset;
1986 g[eod].max_offset = g[v].max_offset;
1987 add_edge(v, eod, g);
1988 }
1989
1990 return true;
1991 } else {
1992 DEBUG_PRINTF("failed to add anchored nfa\n");
1993 removeAddedLiterals(*this, added_lit_ids);
1994 return false;
1995 }
1996}
1997
1998} // namespace ue2
1999