1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "rose_build_convert.h"
30
31#include "grey.h"
32#include "rose_build.h"
33#include "rose_build_impl.h"
34#include "rose_build_util.h"
35#include "ue2common.h"
36#include "hwlm/hwlm_build.h"
37#include "nfa/castlecompile.h"
38#include "nfa/limex_limits.h"
39#include "nfagraph/ng_dump.h"
40#include "nfagraph/ng_limex.h"
41#include "nfagraph/ng_repeat.h"
42#include "nfagraph/ng_reports.h"
43#include "nfagraph/ng_split.h"
44#include "nfagraph/ng_util.h"
45#include "nfagraph/ng_width.h"
46#include "util/bitutils.h"
47#include "util/charreach.h"
48#include "util/charreach_util.h"
49#include "util/compile_context.h"
50#include "util/depth.h"
51#include "util/graph_range.h"
52#include "util/make_unique.h"
53#include "util/order_check.h"
54#include "util/ue2string.h"
55
56#include <algorithm>
57#include <map>
58#include <queue>
59#include <set>
60#include <string>
61#include <unordered_map>
62#include <utility>
63#include <vector>
64
65#include <boost/range/adaptor/map.hpp>
66
67using namespace std;
68using boost::adaptors::map_values;
69
70namespace ue2 {
71
72static
73NFAVertex addHolderVertex(const CharReach &cr, NGHolder &out) {
74 assert(cr.any());
75 NFAVertex v = add_vertex(out);
76 out[v].char_reach = cr;
77 return v;
78}
79
80static
81size_t suffixFloodLen(const ue2_literal &s) {
82 if (s.empty()) {
83 return 0;
84 }
85
86 const ue2_literal::elem &c = s.back();
87 auto it = find_if(s.rbegin(), s.rend(),
88 [&c](const ue2_literal::elem &e) { return e != c; });
89 return distance(s.rbegin(), it);
90}
91
92static
93unique_ptr<NGHolder> makeFloodProneSuffix(const ue2_literal &s, size_t len,
94 const flat_set<ReportID> &reports) {
95 assert(len < s.length());
96 assert(!reports.empty());
97
98 unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(NFA_SUFFIX);
99
100 NFAVertex u = h->start;
101 for (auto it = s.begin() + s.length() - len; it != s.end(); ++it) {
102 NFAVertex v = addHolderVertex(*it, *h);
103 NFAEdge e = add_edge(u, v, *h);
104 if (u == h->start) {
105 (*h)[e].tops.insert(DEFAULT_TOP);
106 }
107 u = v;
108 }
109
110 (*h)[u].reports.insert(reports.begin(), reports.end());
111 add_edge(u, h->accept, *h);
112 return h;
113}
114
115static
116unique_ptr<NGHolder> makeRosePrefix(const ue2_literal &s) {
117 unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(NFA_PREFIX);
118
119 NFAVertex u = h->startDs;
120 for (const auto &c : s) {
121 NFAVertex v = addHolderVertex(c, *h);
122 add_edge(u, v, *h);
123 u = v;
124 }
125 add_edge(u, h->accept, *h);
126 return h;
127}
128
129static
130void replaceWithLitPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
131 const rose_literal_id &lit, size_t suffixlen,
132 size_t delay) {
133 assert(suffixlen < lit.s.length());
134
135 DEBUG_PRINTF("replacing '%s' with prefix, length=%zu, delay=%zu\n",
136 dumpString(lit.s).c_str(), lit.s.length() - suffixlen, delay);
137
138 RoseGraph &g = tbi.g;
139 ue2_literal new_lit = lit.s.substr(0, lit.s.length() - suffixlen);
140 u32 new_id = tbi.getLiteralId(new_lit, delay, ROSE_FLOATING);
141 rose_literal_info &old_info = tbi.literal_info.at(lit_id);
142 old_info.vertices.erase(v);
143 tbi.literal_info.at(new_id).vertices.insert(v);
144 g[v].literals.clear();
145 g[v].literals.insert(new_id);
146}
147
148static
149bool delayLiteralWithPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
150 const rose_literal_id &lit, size_t suffixlen) {
151 if (suffixlen > MAX_DELAY) {
152 DEBUG_PRINTF("delay too large\n");
153 return false;
154 }
155
156 if (!tbi.isDirectReport(lit_id)) {
157 DEBUG_PRINTF("literal is not direct report\n");
158 return false;
159 }
160
161 if (tbi.cc.streaming &&
162 lit.s.length() > tbi.cc.grey.maxHistoryAvailable + 1) {
163 DEBUG_PRINTF("insufficient history to delay literal of len %zu\n",
164 lit.s.length());
165 return false;
166 }
167
168 shared_ptr<NGHolder> h = makeRosePrefix(lit.s);
169 ReportID prefix_report = 0;
170 set_report(*h, prefix_report);
171
172 if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) {
173 DEBUG_PRINTF("prefix not implementable\n");
174 return false;
175 }
176
177 RoseGraph &g = tbi.g;
178 assert(!g[v].left);
179 g[v].left.graph = h;
180 g[v].left.lag = 0;
181 g[v].left.leftfix_report = prefix_report;
182
183 // Swap v's literal for a shorter one, delayed by suffix len.
184 replaceWithLitPrefix(tbi, v, lit_id, lit, suffixlen, suffixlen);
185
186 return true;
187}
188
189static
190void convertFloodProneSuffix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
191 const rose_literal_id &lit, size_t suffixlen) {
192 DEBUG_PRINTF("flood-prone leaf '%s'\n", dumpString(lit.s).c_str());
193 DEBUG_PRINTF("turning last %zu chars into a suffix NFA\n", suffixlen);
194 RoseGraph &g = tbi.g;
195 assert(!g[v].eod_accept);
196
197 // If we're a direct report literal, we may be able to convert this case
198 // into a delayed literal with a (very boring) transient prefix that
199 // handles our flood-prone suffix.
200 if (delayLiteralWithPrefix(tbi, v, lit_id, lit, suffixlen)) {
201 DEBUG_PRINTF("implemented as delayed literal with a rose prefix\n");
202 return;
203 }
204
205 // General case: create a suffix that implements the flood-prone portion.
206
207 // Create the NFA.
208 auto h = makeFloodProneSuffix(lit.s, suffixlen, g[v].reports);
209 if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) {
210 DEBUG_PRINTF("not implementable\n");
211 return;
212 }
213
214 // Apply the NFA.
215 assert(!g[v].suffix);
216 g[v].suffix.graph = move(h);
217 g[v].reports.clear();
218
219 // Swap v's literal for a shorter one.
220 replaceWithLitPrefix(tbi, v, lit_id, lit, suffixlen, 0);
221
222 // It's possible that min_offset might be an underestimate, so we
223 // subtract min(min_offset, suffixlen) for safety.
224 g[v].min_offset -= min((size_t)g[v].min_offset, suffixlen);
225
226 if (g[v].max_offset < ROSE_BOUND_INF) {
227 assert(g[v].max_offset >= suffixlen);
228 g[v].max_offset -= suffixlen;
229 }
230}
231
232/**
233 * Collect an estimate of the number of literals in the floating table, and use
234 * this to estimate the flood prone suffix length.
235 */
236static
237size_t findFloodProneSuffixLen(const RoseBuildImpl &tbi) {
238 size_t numLiterals = 0;
239 for (const rose_literal_id &lit : tbi.literals) {
240 if (lit.delay) {
241 continue; // delay ids are virtual-ish
242 }
243 if (lit.table != ROSE_FLOATING) {
244 continue;
245 }
246
247 numLiterals++;
248 }
249
250 return hwlmFloodProneSuffixLen(numLiterals, tbi.cc);
251}
252
253/**
254 * \brief Convert flood-prone literal suffixes into suffix NFAs.
255 *
256 * For any trailing string in Rose (string cannot lead to more Rose roles or
257 * NFAs, etc) ending with a continuous run of a single character with more than
258 * 3 copies of that single character,
259 *
260 * If the result of removing all but 2 copies of that character yields a string
261 * that is greater than FLOOD_PRONE_LIT_MIN_LENGTH characters, remove those
262 * final characters from the literal and move them into a suffix NFA.
263 */
264void convertFloodProneSuffixes(RoseBuildImpl &tbi) {
265 static const size_t FLOOD_PRONE_LIT_MIN_LENGTH = 5;
266
267 if (!tbi.cc.grey.roseConvertFloodProneSuffixes) {
268 return;
269 }
270
271 const size_t floodProneLen = findFloodProneSuffixLen(tbi);
272 DEBUG_PRINTF("flood prone suffix len = %zu\n", floodProneLen);
273
274 RoseGraph &g = tbi.g;
275
276 for (auto v : vertices_range(g)) {
277 if (!isLeafNode(v, g)) {
278 continue;
279 }
280
281 if (g[v].reports.empty()) {
282 continue;
283 }
284
285 // TODO: currently only boring vertices.
286 if (!g[v].isBoring()) {
287 continue;
288 }
289
290 // Currently only handles vertices with a single literal (should always
291 // be the case this early in Rose construction).
292 if (g[v].literals.size() != 1) {
293 continue;
294 }
295
296 u32 lit_id = *g[v].literals.begin();
297 const rose_literal_id &lit = tbi.literals.at(lit_id);
298
299 // anchored or delayed literals need thought.
300 if (lit.table != ROSE_FLOATING || lit.delay) {
301 continue;
302 }
303
304 // don't do this to literals with msk/cmp.
305 if (!lit.msk.empty()) {
306 continue;
307 }
308
309 // Can't safely do this operation to vertices with delayed
310 // predecessors.
311 if (tbi.hasDelayPred(v)) {
312 DEBUG_PRINTF("delayed pred\n");
313 continue;
314 }
315
316 if (lit.s.length() <= FLOOD_PRONE_LIT_MIN_LENGTH) {
317 DEBUG_PRINTF("literal is short enough already\n");
318 continue;
319 }
320
321 size_t floodLen = suffixFloodLen(lit.s);
322 if (floodLen < floodProneLen) {
323 DEBUG_PRINTF("literal not flood-prone\n");
324 continue;
325 }
326
327 if (floodLen == lit.s.length()) {
328 DEBUG_PRINTF("whole literal is a flood\n");
329 // Removing the part of the flood from the end of the literal would
330 // leave us with a shorter, but still flood-prone, prefix. Better
331 // to leave it alone.
332 continue;
333 }
334
335 size_t suffixLen = floodLen - (floodProneLen - 1);
336 if (lit.s.length() - suffixLen < FLOOD_PRONE_LIT_MIN_LENGTH) {
337 DEBUG_PRINTF("removing flood would leave literal too short\n");
338 continue;
339 }
340
341 convertFloodProneSuffix(tbi, v, lit_id, lit, suffixLen);
342 }
343}
344
345static
346CharReach getReachOfNormalVertex(const NGHolder &g) {
347 for (auto v : vertices_range(g)) {
348 if (is_special(v, g)) {
349 continue;
350 }
351 return g[v].char_reach;
352 }
353 assert(0);
354 return CharReach();
355}
356
357/**
358 * \brief Set the edge bounds and appropriate history on the given edge in the
359 * Rose graph.
360 */
361static
362void setEdgeBounds(RoseGraph &g, const RoseEdge &e, u32 min_bound,
363 u32 max_bound) {
364 assert(min_bound <= max_bound);
365 assert(max_bound <= ROSE_BOUND_INF);
366
367 g[e].minBound = min_bound;
368 g[e].maxBound = max_bound;
369
370 if (min_bound || max_bound < ROSE_BOUND_INF) {
371 g[e].history = ROSE_ROLE_HISTORY_ANCH;
372 } else {
373 g[e].history = ROSE_ROLE_HISTORY_NONE;
374 }
375}
376
377static
378bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
379 const RoseEdge &e_old, RoseVertex ar,
380 vector<RoseEdge> *to_delete) {
381 DEBUG_PRINTF("hi\n");
382
383 /* check for prefix cliches connected to start (^.{N,M}) */
384 if (!getReachOfNormalVertex(h).all()) {
385 DEBUG_PRINTF(":(\n");
386 return false;
387 }
388
389 PureRepeat repeat;
390 if (!isPureRepeat(h, repeat)) {
391 DEBUG_PRINTF(":(\n");
392 return false;
393 }
394
395 assert(repeat.bounds.min.is_finite());
396 assert(repeat.bounds.max.is_reachable());
397 assert(repeat.bounds.min <= repeat.bounds.max);
398
399 DEBUG_PRINTF("prefix is ^.{%s,%s}\n", repeat.bounds.min.str().c_str(),
400 repeat.bounds.max.str().c_str());
401
402 /* update bounds on edge */
403
404 // Convert to Rose graph bounds, which are not (yet?) depth classes.
405 u32 bound_min = repeat.bounds.min;
406 u32 bound_max =
407 repeat.bounds.max.is_finite() ? (u32)repeat.bounds.max : ROSE_BOUND_INF;
408
409 if (source(e_old, g) == ar) {
410 assert(g[e_old].minBound <= bound_min);
411 assert(g[e_old].maxBound >= bound_max);
412 setEdgeBounds(g, e_old, bound_min, bound_max);
413 } else {
414 RoseEdge e_new = add_edge(ar, v, g);
415 setEdgeBounds(g, e_new, bound_min, bound_max);
416 to_delete->push_back(e_old);
417 }
418
419 g[v].left.reset(); /* clear the prefix info */
420 return true;
421}
422
423static
424bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
425 const RoseEdge &e) {
426 DEBUG_PRINTF("hi\n");
427 /* check for prefix cliches connected to start-ds (.{N}, ^.{N,}) */
428 u32 repeatCount = 0;
429 NFAVertex hu = h.startDs;
430
431 auto start_succ = succs<set<NFAVertex>>(h.start, h);
432 auto startds_succ = succs<set<NFAVertex>>(h.startDs, h);
433
434 if (!is_subset_of(start_succ, startds_succ)) {
435 DEBUG_PRINTF("not a simple chain\n");
436 return false;
437 }
438
439 set<NFAVertex> seen;
440 do {
441 if (!h[hu].char_reach.all()) {
442 return false;
443 }
444 NFAVertex hv = getSoleDestVertex(h, hu);
445 if (!hv) {
446 return false;
447 }
448 if (contains(seen, hv)) {
449 assert(0);
450 return false;
451 }
452 hu = hv;
453 repeatCount++;
454 if (hu == h.accept) {
455 break;
456 }
457 } while(1);
458
459 assert(hu == h.accept);
460
461 repeatCount--; /* do not count accept as part of the chain */
462
463 DEBUG_PRINTF("prefix is ^.{%u,}\n", repeatCount);
464
465 /* update bounds on edge */
466 assert(g[e].minBound <= repeatCount);
467 setEdgeBounds(g, e, repeatCount, ROSE_BOUND_INF);
468
469 g[v].left.reset(); /* clear the prefix info */
470
471 return true;
472}
473
474static
475bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
476 const RoseEdge &e_old, RoseVertex ar,
477 vector<RoseEdge> *to_delete,
478 const CompileContext &cc) {
479 assert(in_degree(h.acceptEod, h) == 1);
480
481 bool anchored = !proper_out_degree(h.startDs, h);
482 NFAVertex key = NGHolder::null_vertex();
483 NFAVertex base = anchored ? h.start : h.startDs;
484
485 if (!anchored) {
486 auto start_succ = succs<set<NFAVertex>>(h.start, h);
487 auto startds_succ = succs<set<NFAVertex>>(h.startDs, h);
488
489 if (!is_subset_of(start_succ, startds_succ)) {
490 DEBUG_PRINTF("not a simple chain\n");
491 return false;
492 }
493 }
494
495 for (auto w : adjacent_vertices_range(base, h)) {
496 DEBUG_PRINTF("checking %zu\n", h[w].index);
497 if (!h[w].char_reach.all()) {
498 continue;
499 }
500
501 if (!is_special(w, h)) {
502 key = w;
503 break;
504 }
505 }
506
507 if (!key) {
508 return false;
509 }
510
511 vector<GraphRepeatInfo> repeats;
512 findRepeats(h, 2, &repeats);
513
514 vector<GraphRepeatInfo>::const_iterator it;
515 for (it = repeats.begin(); it != repeats.end(); ++it) {
516 DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size());
517 if (find(it->vertices.begin(), it->vertices.end(), key)
518 != it->vertices.end()) {
519 break;
520 }
521 }
522 if (it == repeats.end()) {
523 DEBUG_PRINTF("no repeat found\n");
524 return false;
525 }
526
527 GraphRepeatInfo ri = *it;
528
529 set<NFAVertex> exits_and_repeat_verts;
530 for (auto repeat_v : ri.vertices) {
531 DEBUG_PRINTF("repeat vertex %zu\n", h[repeat_v].index);
532 succ(h, repeat_v, &exits_and_repeat_verts);
533 exits_and_repeat_verts.insert(repeat_v);
534 }
535
536 DEBUG_PRINTF("repeat {%s,%s}\n", ri.repeatMin.str().c_str(),
537 ri.repeatMax.str().c_str());
538
539 set<NFAVertex> rep_verts;
540 insert(&rep_verts, ri.vertices);
541
542 set<NFAVertex> exits;
543 exits = exits_and_repeat_verts;
544 erase_all(&exits, rep_verts);
545
546 auto base_succ = succs<set<NFAVertex>>(base, h);
547 base_succ.erase(h.startDs);
548
549 if (is_subset_of(base_succ, rep_verts)) {
550 /* all good: repeat dominates the rest of the pattern */
551 } else if (ri.repeatMin == depth(1)
552 && is_subset_of(exits, base_succ)
553 && is_subset_of(base_succ, exits_and_repeat_verts)) {
554 /* we have a jump edge */
555 ri.repeatMin = depth(0);
556 } else {
557 return false;
558 }
559
560 DEBUG_PRINTF("repeat {%s,%s}\n", ri.repeatMin.str().c_str(),
561 ri.repeatMax.str().c_str());
562 DEBUG_PRINTF("woot?\n");
563
564 shared_ptr<NGHolder> h_new = make_shared<NGHolder>();
565 unordered_map<NFAVertex, NFAVertex> rhs_map;
566 vector<NFAVertex> exits_vec;
567 insert(&exits_vec, exits_vec.end(), exits);
568 splitRHS(h, exits_vec, h_new.get(), &rhs_map);
569 h_new->kind = NFA_PREFIX;
570
571 if (num_vertices(*h_new) <= N_SPECIALS) {
572 DEBUG_PRINTF("not a hybrid??\n");
573 /* TODO: pick up these cases, unify code */
574 return false;
575 }
576
577 for (auto w : adjacent_vertices_range(h_new->start, *h_new)) {
578 if (w != h_new->startDs) {
579 add_edge(h_new->startDs, w, *h_new);
580 }
581 }
582 clear_out_edges(h_new->start, *h_new);
583 add_edge(h_new->start, h_new->startDs, *h_new);
584
585 depth width = findMinWidth(*h_new);
586 if (width != findMaxWidth(*h_new)) {
587 return false;
588 }
589
590 if (g[v].left.dfa) {
591 /* we were unable to implement initial graph as an nfa;
592 * we need to to check if we still need a dfa and, if so, rebuild. */
593 if (!isImplementableNFA(*h_new, nullptr, cc)) {
594 return false; /* TODO: handle rebuilding dfa */
595 }
596 }
597
598 if (anchored) {
599 if (ri.repeatMax.is_infinite()) {
600 return false; /* TODO */
601 }
602
603 if (source(e_old, g) == ar) {
604 setEdgeBounds(g, e_old, ri.repeatMin + width, ri.repeatMax + width);
605 } else {
606 RoseEdge e_new = add_edge(ar, v, g);
607 setEdgeBounds(g, e_new, ri.repeatMin + width, ri.repeatMax + width);
608 to_delete->push_back(e_old);
609 }
610
611 } else {
612 assert(g[e_old].minBound <= ri.repeatMin + width);
613 setEdgeBounds(g, e_old, ri.repeatMin + width, ROSE_BOUND_INF);
614 }
615
616 g[v].left.dfa.reset();
617 g[v].left.graph = h_new;
618
619 return true;
620}
621
622/* turns simple prefixes like /^.{30,} into bounds on the root roles */
623void convertPrefixToBounds(RoseBuildImpl &tbi) {
624 RoseGraph &g = tbi.g;
625
626 vector<RoseEdge> to_delete;
627 RoseVertex ar = tbi.anchored_root;
628
629 /* graphs with prefixes produced by rose are wired to tbi.root */
630
631 for (const auto &e : out_edges_range(tbi.root, g)) {
632 RoseVertex v = target(e, g);
633
634 if (in_degree(v, g) != 1) {
635 continue;
636 }
637
638 if (!g[v].left.graph) {
639 continue;
640 }
641
642 if (g[v].left.tracksSom()) {
643 continue;
644 }
645
646 const NGHolder &h = *g[v].left.graph;
647
648 if (g[v].left.lag != tbi.minLiteralLen(v)
649 || g[v].left.lag != tbi.maxLiteralLen(v)) {
650 continue;
651 }
652
653 if (all_reports(h).size() != 1) {
654 assert(0);
655 continue;
656 }
657
658 DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index);
659
660 if (!proper_out_degree(h.startDs, h)) {
661 if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) {
662 continue;
663 }
664 } else {
665 if (handleStartDsPrefixCliche(h, g, v, e)) {
666 continue;
667 }
668 }
669
670 /* prefix is not just a simple dot repeat. However, it is still
671 * possible that it consists of dot repeat and fixed width mask that we
672 * can handle. */
673 handleMixedPrefixCliche(h, g, v, e, ar, &to_delete, tbi.cc);
674 }
675
676 for (const auto &e : out_edges_range(ar, g)) {
677 RoseVertex v = target(e, g);
678
679 /* note: vertices that we have rehomed will currently have an in-degree
680 * of 2 */
681 if (in_degree(v, g) != 1) {
682 continue;
683 }
684
685 if (!g[v].left.graph) {
686 continue;
687 }
688
689 if (g[v].left.tracksSom()) {
690 continue;
691 }
692
693 if (g[v].left.lag != tbi.minLiteralLen(v)
694 || g[v].left.lag != tbi.maxLiteralLen(v)) {
695 continue;
696 }
697
698 const NGHolder &h = *g[v].left.graph;
699 if (all_reports(h).size() != 1) {
700 assert(0);
701 continue;
702 }
703
704 DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index);
705
706 if (!proper_out_degree(h.startDs, h)) {
707 if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) {
708 continue;
709 }
710 } else {
711 if (handleStartDsPrefixCliche(h, g, v, e)) {
712 continue;
713 }
714 }
715
716 /* prefix is not just a simple dot repeat. However, it is still
717 * possible that it consists of dot repeat and fixed width mask that we
718 * can handle. */
719 handleMixedPrefixCliche(h, g, v, e, ar, &to_delete, tbi.cc);
720 }
721
722 for (const auto &e : to_delete) {
723 remove_edge(e, g);
724 }
725}
726
727/**
728 * Identify dot-repeat infixes after fixed-depth literals and convert them to
729 * edges with ROSE_ROLE_HISTORY_ANCH history and equivalent bounds.
730 */
731void convertAnchPrefixToBounds(RoseBuildImpl &tbi) {
732 RoseGraph &g = tbi.g;
733
734 for (const auto v : vertices_range(g)) {
735 if (!g[v].left) {
736 continue;
737 }
738
739 DEBUG_PRINTF("vertex %zu\n", g[v].index);
740
741 // This pass runs after makeCastles, so we use the fact that bounded
742 // repeat detection has already been done for us.
743
744 if (!g[v].left.castle) {
745 DEBUG_PRINTF("not a castle\n");
746 continue;
747 }
748
749 const CastleProto &castle = *g[v].left.castle;
750
751 if (castle.repeats.size() != 1) {
752 DEBUG_PRINTF("too many repeats\n");
753 assert(0); // Castles should not have been merged yet.
754 continue;
755 }
756
757 if (!castle.reach().all()) {
758 DEBUG_PRINTF("not dot\n");
759 continue;
760 }
761
762 if (in_degree(v, g) != 1) {
763 DEBUG_PRINTF("too many in-edges\n");
764 continue;
765 }
766
767 RoseEdge e = *in_edges(v, g).first;
768 RoseVertex u = source(e, g);
769
770 if (g[e].history != ROSE_ROLE_HISTORY_NONE) {
771 DEBUG_PRINTF("history already set to something other than NONE?\n");
772 assert(0);
773 continue;
774 }
775
776 if (g[u].min_offset != g[u].max_offset) {
777 DEBUG_PRINTF("pred not fixed offset\n");
778 continue;
779 }
780 DEBUG_PRINTF("pred is fixed offset, at %u\n", g[u].min_offset);
781 assert(g[u].min_offset < ROSE_BOUND_INF);
782
783 size_t lit_length = tbi.minLiteralLen(v);
784 if (lit_length != tbi.maxLiteralLen(v)) {
785 assert(0);
786 DEBUG_PRINTF("variable literal lengths\n");
787 continue;
788 }
789
790 u32 lag = g[v].left.lag;
791 DEBUG_PRINTF("lit_length=%zu, lag=%u\n", lit_length, lag);
792 assert(lag <= lit_length);
793 depth delay_adj(lit_length - lag);
794
795 const PureRepeat &pr = castle.repeats.begin()->second;
796 DEBUG_PRINTF("castle has repeat %s\n", pr.bounds.str().c_str());
797 DEBUG_PRINTF("delay adj %u\n", (u32)delay_adj);
798
799 if (delay_adj >= pr.bounds.max) {
800 DEBUG_PRINTF("delay adj too large\n");
801 continue;
802 }
803
804 DepthMinMax bounds(pr.bounds); // copy
805 if (delay_adj > bounds.min) {
806 bounds.min = depth(0);
807 } else {
808 bounds.min -= delay_adj;
809 }
810 bounds.max -= delay_adj;
811 setEdgeBounds(g, e, bounds.min, bounds.max.is_finite()
812 ? (u32)bounds.max
813 : ROSE_BOUND_INF);
814 g[v].left.reset();
815 }
816}
817
818} // namespace ue2
819