1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Rose compile-time analysis for lookaround masks.
31 */
32#include "rose_build_lookaround.h"
33
34#include "rose_build_impl.h"
35#include "nfa/castlecompile.h"
36#include "nfa/goughcompile.h"
37#include "nfa/rdfa.h"
38#include "nfagraph/ng_repeat.h"
39#include "nfagraph/ng_util.h"
40#include "util/container.h"
41#include "util/dump_charclass.h"
42#include "util/graph_range.h"
43#include "util/flat_containers.h"
44#include "util/verify_types.h"
45
46#include <cstdlib>
47#include <queue>
48#include <sstream>
49
50using namespace std;
51
52namespace ue2 {
53
54/** \brief Max search distance for reachability in front of a role. */
55static const u32 MAX_FWD_LEN = 64;
56
57/** \brief Max search distance for reachability behind a role. */
58static const u32 MAX_BACK_LEN = 64;
59
60/** \brief Max lookaround entries for a role. */
61static const u32 MAX_LOOKAROUND_ENTRIES = 16;
62
63/** \brief We would rather have lookarounds with smaller reach than this. */
64static const u32 LOOKAROUND_WIDE_REACH = 200;
65
66#if defined(DEBUG) || defined(DUMP_SUPPORT)
67static UNUSED
68string dump(const map<s32, CharReach> &look) {
69 ostringstream oss;
70 for (auto it = look.begin(), ite = look.end(); it != ite; ++it) {
71 if (it != look.begin()) {
72 oss << ", ";
73 }
74 oss << "{" << it->first << ": " << describeClass(it->second) << "}";
75 }
76 return oss.str();
77}
78#endif
79
80static
81void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) {
82 flat_set<NFAVertex> curr, next;
83
84 // Consider only successors of start with the required top.
85 for (const auto &e : out_edges_range(g.start, g)) {
86 NFAVertex v = target(e, g);
87 if (v == g.startDs) {
88 continue;
89 }
90 if (contains(g[e].tops, top)) {
91 curr.insert(v);
92 }
93 }
94
95 for (u32 i = 0; i < MAX_FWD_LEN; i++) {
96 if (curr.empty() || contains(curr, g.accept) ||
97 contains(curr, g.acceptEod)) {
98 break;
99 }
100
101 next.clear();
102 CharReach cr;
103
104 for (auto v : curr) {
105 assert(!is_special(v, g));
106 cr |= g[v].char_reach;
107 insert(&next, adjacent_vertices(v, g));
108 }
109
110 assert(cr.any());
111 look[i] |= cr;
112 curr.swap(next);
113 }
114}
115
116static
117void getBackwardReach(const NGHolder &g, ReportID report, u32 lag,
118 map<s32, CharReach> &look) {
119 flat_set<NFAVertex> curr, next;
120
121 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
122 if (contains(g[v].reports, report)) {
123 curr.insert(v);
124 }
125 }
126
127 for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) {
128 if (curr.empty() || contains(curr, g.start) ||
129 contains(curr, g.startDs)) {
130 break;
131 }
132
133 next.clear();
134 CharReach cr;
135
136 for (auto v : curr) {
137 assert(!is_special(v, g));
138 cr |= g[v].char_reach;
139 insert(&next, inv_adjacent_vertices(v, g));
140 }
141
142 assert(cr.any());
143 look[0 - i] |= cr;
144 curr.swap(next);
145 }
146}
147
148static
149void getForwardReach(const CastleProto &castle, u32 top,
150 map<s32, CharReach> &look) {
151 depth len = castle.repeats.at(top).bounds.min;
152 len = min(len, depth(MAX_FWD_LEN));
153 assert(len.is_finite());
154
155 const CharReach &cr = castle.reach();
156 for (u32 i = 0; i < len; i++) {
157 look[i] |= cr;
158 }
159}
160
161static
162void getBackwardReach(const CastleProto &castle, ReportID report, u32 lag,
163 map<s32, CharReach> &look) {
164 depth min_depth = depth::infinity();
165 for (const auto &m : castle.repeats) {
166 const PureRepeat &pr = m.second;
167 if (contains(pr.reports, report)) {
168 min_depth = min(min_depth, pr.bounds.min);
169 }
170 }
171
172 if (!min_depth.is_finite()) {
173 assert(0);
174 return;
175 }
176
177 const CharReach &cr = castle.reach();
178 for (u32 i = lag + 1; i <= min(lag + (u32)min_depth, MAX_BACK_LEN);
179 i++) {
180 look[0 - i] |= cr;
181 }
182}
183
184static
185void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) {
186 if (rdfa.states.size() < 2) {
187 return;
188 }
189
190 flat_set<dstate_id_t> curr, next;
191 curr.insert(rdfa.start_anchored);
192
193 for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) {
194 next.clear();
195 CharReach cr;
196
197 for (const auto state_id : curr) {
198 const dstate &ds = rdfa.states[state_id];
199
200 if (!ds.reports.empty() || !ds.reports_eod.empty()) {
201 return;
202 }
203
204 for (unsigned c = 0; c < N_CHARS; c++) {
205 dstate_id_t succ = ds.next[rdfa.alpha_remap[c]];
206 if (succ != DEAD_STATE) {
207 cr.set(c);
208 next.insert(succ);
209 }
210 }
211 }
212
213 assert(cr.any());
214 look[i] |= cr;
215 curr.swap(next);
216 }
217}
218
219static
220void getSuffixForwardReach(const suffix_id &suff, u32 top,
221 map<s32, CharReach> &look) {
222 if (suff.graph()) {
223 getForwardReach(*suff.graph(), top, look);
224 } else if (suff.castle()) {
225 getForwardReach(*suff.castle(), top, look);
226 } else if (suff.dfa()) {
227 assert(top == 0); // DFA isn't multi-top capable.
228 getForwardReach(*suff.dfa(), look);
229 } else if (suff.haig()) {
230 assert(top == 0); // DFA isn't multi-top capable.
231 getForwardReach(*suff.haig(), look);
232 }
233}
234
235static
236void getRoseForwardReach(const left_id &left, u32 top,
237 map<s32, CharReach> &look) {
238 if (left.graph()) {
239 getForwardReach(*left.graph(), top, look);
240 } else if (left.castle()) {
241 getForwardReach(*left.castle(), top, look);
242 } else if (left.dfa()) {
243 assert(top == 0); // DFA isn't multi-top capable.
244 getForwardReach(*left.dfa(), look);
245 } else if (left.haig()) {
246 assert(top == 0); // DFA isn't multi-top capable.
247 getForwardReach(*left.haig(), look);
248 }
249}
250
251static
252void combineForwardMasks(const vector<map<s32, CharReach> > &rose_look,
253 map<s32, CharReach> &look) {
254 for (u32 i = 0; i < MAX_FWD_LEN; i++) {
255 for (const auto &rlook : rose_look) {
256 if (contains(rlook, i)) {
257 look[i] |= rlook.at(i);
258 } else {
259 look[i].setall();
260 }
261 }
262 }
263}
264
265static
266void findForwardReach(const RoseGraph &g, const RoseVertex v,
267 map<s32, CharReach> &look) {
268 if (!g[v].reports.empty()) {
269 DEBUG_PRINTF("acceptor\n");
270 return;
271 }
272
273 // Non-leaf vertices can pick up a mask per successor prefix rose
274 // engine.
275 vector<map<s32, CharReach>> rose_look;
276 for (const auto &e : out_edges_range(v, g)) {
277 RoseVertex t = target(e, g);
278 if (!g[t].left) {
279 DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].index);
280 return;
281 }
282 rose_look.push_back(map<s32, CharReach>());
283 getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back());
284 }
285
286 if (g[v].suffix) {
287 DEBUG_PRINTF("suffix engine\n");
288 rose_look.push_back(map<s32, CharReach>());
289 getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back());
290 }
291
292 combineForwardMasks(rose_look, look);
293}
294
295static
296void findBackwardReach(const RoseGraph &g, const RoseVertex v,
297 map<s32, CharReach> &look) {
298 if (!g[v].left) {
299 return;
300 }
301
302 DEBUG_PRINTF("leftfix, report=%u, lag=%u\n", g[v].left.leftfix_report,
303 g[v].left.lag);
304
305 if (g[v].left.graph) {
306 getBackwardReach(*g[v].left.graph, g[v].left.leftfix_report,
307 g[v].left.lag, look);
308 } else if (g[v].left.castle) {
309 getBackwardReach(*g[v].left.castle, g[v].left.leftfix_report,
310 g[v].left.lag, look);
311 }
312
313 // TODO: implement DFA variants if necessary.
314}
315
316static
317void normalise(map<s32, CharReach> &look) {
318 // We can erase entries where the reach is "all characters".
319 vector<s32> dead;
320 for (const auto &m : look) {
321 if (m.second.all()) {
322 dead.push_back(m.first);
323 }
324 }
325 erase_all(&look, dead);
326}
327
328namespace {
329
330struct LookPriority {
331 explicit LookPriority(const map<s32, CharReach> &look_in) : look(look_in) {}
332
333 bool operator()(s32 a, s32 b) const {
334 const CharReach &a_reach = look.at(a);
335 const CharReach &b_reach = look.at(b);
336 if (a_reach.count() != b_reach.count()) {
337 return a_reach.count() < b_reach.count();
338 }
339 return abs(a) < abs(b);
340 }
341
342private:
343 const map<s32, CharReach> &look;
344};
345
346} // namespace
347
348static
349bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) {
350 for (const auto &m : look) {
351 const CharReach &look_cr = m.second;
352 if (!overlaps(look_cr, flood_cr)) {
353 return false;
354 }
355 }
356 DEBUG_PRINTF("look can't escape flood on %s\n",
357 describeClass(flood_cr).c_str());
358 return true;
359}
360
361static
362bool isFloodProne(const map<s32, CharReach> &look,
363 const set<CharReach> &flood_reach) {
364 if (flood_reach.empty()) {
365 return false;
366 }
367
368 for (const CharReach &flood_cr : flood_reach) {
369 if (isFloodProne(look, flood_cr)) {
370 return true;
371 }
372 }
373
374 return false;
375}
376
377static
378void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) {
379 if (look.size() <= MAX_LOOKAROUND_ENTRIES) {
380 return;
381 }
382
383 DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str());
384
385 // First, remove floods that we already can't escape; they shouldn't affect
386 // the analysis below.
387 for (auto it = flood_reach.begin(); it != flood_reach.end();) {
388 if (isFloodProne(look, *it)) {
389 DEBUG_PRINTF("removing inescapable flood on %s from analysis\n",
390 describeClass(*it).c_str());
391 flood_reach.erase(it++);
392 } else {
393 ++it;
394 }
395 }
396
397 LookPriority cmp(look);
398 priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
399 for (const auto &m : look) {
400 pq.push(m.first);
401 }
402
403 while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) {
404 s32 d = pq.top();
405 assert(contains(look, d));
406 const CharReach cr(look[d]); // copy
407 pq.pop();
408
409 DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
410 look.erase(d);
411
412 // If removing this entry would result in us becoming flood_prone on a
413 // particular flood_reach case, reinstate it and move on.
414 if (isFloodProne(look, flood_reach)) {
415 DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
416 describeClass(cr).c_str());
417 look.insert(make_pair(d, cr));
418 }
419 }
420
421 while (!pq.empty()) {
422 s32 d = pq.top();
423 assert(contains(look, d));
424 const CharReach cr(look[d]); // copy
425 pq.pop();
426
427 if (cr.count() < LOOKAROUND_WIDE_REACH) {
428 continue;
429 }
430
431 DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
432 look.erase(d);
433
434 // If removing this entry would result in us becoming flood_prone on a
435 // particular flood_reach case, reinstate it and move on.
436 if (isFloodProne(look, flood_reach)) {
437 DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
438 describeClass(cr).c_str());
439 look.insert(make_pair(d, cr));
440 }
441 }
442
443 DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str());
444}
445
446static
447void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v,
448 set<CharReach> &flood_reach) {
449 for (u32 lit_id : tbi.g[v].literals) {
450 const ue2_literal &s = tbi.literals.at(lit_id).s;
451 if (s.empty()) {
452 continue;
453 }
454 if (is_flood(s)) {
455 CharReach cr(*s.begin());
456 DEBUG_PRINTF("flood-prone with reach: %s\n",
457 describeClass(cr).c_str());
458 flood_reach.insert(cr);
459 }
460 }
461}
462
463
464namespace {
465struct LookProto {
466 LookProto(s32 offset_in, CharReach reach_in)
467 : offset(offset_in), reach(move(reach_in)) {}
468 s32 offset;
469 CharReach reach;
470};
471}
472
473static
474vector<LookProto> findLiteralReach(const rose_literal_id &lit) {
475 vector<LookProto> look;
476 look.reserve(lit.s.length());
477
478 s32 i = 0 - lit.s.length() - lit.delay;
479 for (const auto &c : lit.s) {
480 look.emplace_back(i, c);
481 i++;
482 }
483
484 return look;
485}
486
487static
488vector<LookProto> findLiteralReach(const RoseBuildImpl &build,
489 const RoseVertex v) {
490 bool first = true;
491 vector<LookProto> look;
492
493 for (u32 lit_id : build.g[v].literals) {
494 const rose_literal_id &lit = build.literals.at(lit_id);
495 auto lit_look = findLiteralReach(lit);
496
497 if (first) {
498 look = std::move(lit_look);
499 first = false;
500 continue;
501 }
502
503 // Erase elements from look with keys not in lit_look. Where a key is
504 // in both maps, union its reach with the lookaround.
505 auto jt = begin(lit_look);
506 for (auto it = begin(look); it != end(look);) {
507 if (jt == end(lit_look)) {
508 // No further lit_look entries, erase remaining elements from
509 // look.
510 look.erase(it, end(look));
511 break;
512 }
513 if (it->offset < jt->offset) {
514 // Offset is present in look but not in lit_look, erase.
515 it = look.erase(it);
516 } else if (it->offset > jt->offset) {
517 // Offset is preset in lit_look but not in look, ignore.
518 ++jt;
519 } else {
520 // Offset is present in both, union its reach with look.
521 it->reach |= jt->reach;
522 ++it;
523 ++jt;
524 }
525 }
526 }
527
528 return look;
529}
530
531/**
532 * Trim lookaround checks from the prefix that overlap with the literals
533 * themselves.
534 */
535static
536void trimLiterals(const RoseBuildImpl &build, const RoseVertex v,
537 map<s32, CharReach> &look) {
538 DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str());
539
540 for (const auto &m : findLiteralReach(build, v)) {
541 auto it = look.find(m.offset);
542 if (it == end(look)) {
543 continue;
544 }
545 if (m.reach.isSubsetOf(it->second)) {
546 DEBUG_PRINTF("can trim entry at %d\n", it->first);
547 look.erase(it);
548 }
549 }
550
551 DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str());
552}
553
554static
555void normaliseLeftfix(map<s32, CharReach> &look) {
556 // We can erase entries where the reach is "all characters", except for the
557 // very first one -- this might be required to establish a minimum bound on
558 // the literal's match offset.
559
560 // TODO: It would be cleaner to use a literal program instruction to check
561 // the minimum bound explicitly.
562
563 if (look.empty()) {
564 return;
565 }
566
567 const auto earliest = begin(look)->first;
568
569 vector<s32> dead;
570 for (const auto &m : look) {
571 if (m.second.all() && m.first != earliest) {
572 dead.push_back(m.first);
573 }
574 }
575 erase_all(&look, dead);
576}
577
578static
579bool trimMultipathLeftfix(const RoseBuildImpl &build, const RoseVertex v,
580 vector<map<s32, CharReach>> &looks) {
581 size_t path_count = 0;
582 for (auto &look : looks) {
583 ++path_count;
584 DEBUG_PRINTF("Path #%ld\n", path_count);
585
586 assert(!look.empty());
587 trimLiterals(build, v, look);
588
589 if (look.empty()) {
590 return false;
591 }
592
593 // Could be optimized here, just keep the empty byte of the longest path
594 normaliseLeftfix(look);
595
596 if (look.size() > MAX_LOOKAROUND_ENTRIES) {
597 DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size());
598 return false;
599 }
600 }
601 return true;
602}
603
604static
605void transToLookaround(const vector<map<s32, CharReach>> &looks,
606 vector<vector<LookEntry>> &lookarounds) {
607 for (const auto &look : looks) {
608 vector<LookEntry> lookaround;
609 DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str());
610 lookaround.reserve(look.size());
611 for (const auto &m : look) {
612 if (m.first < -128 || m.first > 127) {
613 DEBUG_PRINTF("range too big\n");
614 lookarounds.clear();
615 return;
616 }
617 s8 offset = verify_s8(m.first);
618 lookaround.emplace_back(offset, m.second);
619 }
620 lookarounds.push_back(lookaround);
621 }
622}
623
624void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
625 vector<LookEntry> &lookaround) {
626 lookaround.clear();
627
628 const RoseGraph &g = tbi.g;
629
630 map<s32, CharReach> look;
631 findBackwardReach(g, v, look);
632 findForwardReach(g, v, look);
633 trimLiterals(tbi, v, look);
634
635 if (look.empty()) {
636 return;
637 }
638
639 normalise(look);
640
641 if (look.empty()) {
642 return;
643 }
644
645 set<CharReach> flood_reach;
646 findFloodReach(tbi, v, flood_reach);
647 reduce(look, flood_reach);
648
649 if (look.empty()) {
650 return;
651 }
652
653 DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str());
654 lookaround.reserve(look.size());
655 for (const auto &m : look) {
656 s8 offset = verify_s8(m.first);
657 lookaround.emplace_back(offset, m.second);
658 }
659}
660
661static
662bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks,
663 u32 bucket_size) {
664 set<u32> bucket;
665 for (const auto &look : looks) {
666 for (const auto &l : look) {
667 CharReach cr = l.second;
668 if (cr.count() > 128) {
669 cr.flip();
670 }
671 map <u16, u16> lo2hi;
672
673 for (size_t i = cr.find_first(); i != CharReach::npos;) {
674 u8 it_hi = i >> 4;
675 u16 low_encode = 0;
676 while (i != CharReach::npos && (i >> 4) == it_hi) {
677 low_encode |= 1 << (i &0xf);
678 i = cr.find_next(i);
679 }
680 lo2hi[low_encode] |= 1 << it_hi;
681 }
682
683 for (const auto &it : lo2hi) {
684 u32 hi_lo = (it.second << 16) | it.first;
685 bucket.insert(hi_lo);
686 }
687 }
688 }
689 DEBUG_PRINTF("shufti has %lu bucket(s)\n", bucket.size());
690 return bucket.size() <= bucket_size;
691}
692
693static
694bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag,
695 vector<map<s32, CharReach>> &looks) {
696 if (!isAcyclic(g)) {
697 DEBUG_PRINTF("contains back-edge\n");
698 return false;
699 }
700
701 // Must be floating chains wired to startDs.
702 if (!isFloating(g)) {
703 DEBUG_PRINTF("not a floating start\n");
704 return false;
705 }
706
707 vector<NFAVertex> curr;
708 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
709 if (v == g.start || v == g.startDs) {
710 DEBUG_PRINTF("empty graph\n");
711 return true;
712 }
713 if (contains(g[v].reports, report)) {
714 curr.push_back(v);
715 }
716 }
717
718 assert(!curr.empty());
719
720 u32 total_len = curr.size();
721
722 for (const auto &v : curr) {
723 looks.emplace_back(map<s32, CharReach>());
724 looks.back()[0 - (lag + 1)] = g[v].char_reach;
725 }
726
727 bool curr_active = false;
728
729 /* For each offset -i, we backwardly trace the path by vertices in curr.
730 * Once there are more than 8 paths and more than 64 bits total_len,
731 * which means that neither MULTIPATH_LOOKAROUND nor MULTIPATH_SHUFTI
732 * could be successfully built, we will give up the path finding.
733 * Otherwise, the loop will halt when all vertices in curr are startDs.
734 */
735 for (u32 i = lag + 2; i < (lag + 2) + MAX_BACK_LEN; i++) {
736 curr_active = false;
737 size_t curr_size = curr.size();
738 if (curr.size() > 1 && i > lag + MULTIPATH_MAX_LEN) {
739 DEBUG_PRINTF("range is larger than 16 in multi-path\n");
740 return false;
741 }
742
743 for (size_t idx = 0; idx < curr_size; idx++) {
744 NFAVertex v = curr[idx];
745 if (v == g.startDs) {
746 continue;
747 }
748 assert(!is_special(v, g));
749
750 for (auto u : inv_adjacent_vertices_range(v, g)) {
751 if (u == g.start || u == g.startDs) {
752 curr[idx] = g.startDs;
753 break;
754 }
755 }
756
757 if (is_special(curr[idx], g)) {
758 continue;
759 }
760
761 for (auto u : inv_adjacent_vertices_range(v, g)) {
762 curr_active = true;
763 if (curr[idx] == v) {
764 curr[idx] = u;
765 looks[idx][0 - i] = g[u].char_reach;
766 total_len++;
767 } else {
768 curr.push_back(u);
769 looks.push_back(looks[idx]);
770 (looks.back())[0 - i] = g[u].char_reach;
771 total_len += looks.back().size();
772 }
773
774 if (curr.size() > MAX_LOOKAROUND_PATHS && total_len > 64) {
775 DEBUG_PRINTF("too many branches\n");
776 return false;
777 }
778 }
779 }
780 if (!curr_active) {
781 break;
782 }
783 }
784
785 if (curr_active) {
786 DEBUG_PRINTF("single path too long\n");
787 return false;
788 }
789
790 // More than 8 paths, check multi-path shufti.
791 if (curr.size() > MAX_LOOKAROUND_PATHS) {
792 u32 bucket_size = total_len > 32 ? 8 : 16;
793 if (!checkShuftiBuckets(looks, bucket_size)) {
794 DEBUG_PRINTF("shufti has too many buckets\n");
795 return false;
796 }
797 }
798
799 assert(!looks.empty());
800 if (looks.size() == 1) {
801 DEBUG_PRINTF("single lookaround\n");
802 } else {
803 DEBUG_PRINTF("multi-path lookaround\n");
804 }
805 DEBUG_PRINTF("done\n");
806 return true;
807}
808
809bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
810 vector<vector<LookEntry>> &lookaround) {
811 lookaround.clear();
812
813 const RoseGraph &g = build.g;
814 const left_id leftfix(g[v].left);
815
816 if (!contains(build.transient, leftfix)) {
817 DEBUG_PRINTF("not transient\n");
818 return false;
819 }
820
821 if (!leftfix.graph()) {
822 DEBUG_PRINTF("only supported for graphs so far\n");
823 return false;
824 }
825
826 vector<map<s32, CharReach>> looks;
827 if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.leftfix_report,
828 g[v].left.lag, looks)) {
829 DEBUG_PRINTF("graph has loop or too large\n");
830 return false;
831 }
832
833 if (!trimMultipathLeftfix(build, v, looks)) {
834 return false;
835 }
836 transToLookaround(looks, lookaround);
837
838 return !lookaround.empty();
839}
840
841void mergeLookaround(vector<LookEntry> &lookaround,
842 const vector<LookEntry> &more_lookaround) {
843 if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) {
844 DEBUG_PRINTF("big enough!\n");
845 return;
846 }
847
848 // Don't merge lookarounds at offsets we already have entries for.
849 flat_set<s8> offsets;
850 for (const auto &e : lookaround) {
851 offsets.insert(e.offset);
852 }
853
854 map<s32, CharReach> more;
855 LookPriority cmp(more);
856 priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
857 for (const auto &e : more_lookaround) {
858 if (!contains(offsets, e.offset)) {
859 more.emplace(e.offset, e.reach);
860 pq.push(e.offset);
861 }
862 }
863
864 while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) {
865 const s32 offset = pq.top();
866 pq.pop();
867 const auto &cr = more.at(offset);
868 DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str());
869 lookaround.emplace_back(verify_s8(offset), cr);
870 }
871
872 // Order by offset.
873 sort(begin(lookaround), end(lookaround),
874 [](const LookEntry &a, const LookEntry &b) {
875 return a.offset < b.offset;
876 });
877}
878
879} // namespace ue2
880