1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief NFA acceleration analysis code.
31 */
32#include "ng_limex_accel.h"
33
34#include "ng_holder.h"
35#include "ng_misc_opt.h"
36#include "ng_util.h"
37#include "ue2common.h"
38
39#include "nfa/accel.h"
40
41#include "util/bitutils.h" // for CASE_CLEAR
42#include "util/charreach.h"
43#include "util/compile_context.h"
44#include "util/container.h"
45#include "util/dump_charclass.h"
46#include "util/graph_range.h"
47#include "util/small_vector.h"
48#include "util/target_info.h"
49
50#include <algorithm>
51#include <map>
52
53#include <boost/range/adaptor/map.hpp>
54
55using namespace std;
56using boost::adaptors::map_keys;
57
58namespace ue2 {
59
60#define WIDE_FRIEND_MIN 200
61
62static
63void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
64 const flat_set<NFAVertex> &cands,
65 const flat_set<NFAVertex> &preds,
66 flat_set<NFAVertex> *next_cands,
67 flat_set<NFAVertex> *next_preds,
68 flat_set<NFAVertex> *friends) {
69 for (auto v : cands) {
70 if (contains(preds, v)) {
71 continue;
72 }
73
74 const CharReach &acr = g[v].char_reach;
75 DEBUG_PRINTF("checking %zu\n", g[v].index);
76
77 if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) {
78 DEBUG_PRINTF("bad reach %zu\n", acr.count());
79 continue;
80 }
81
82 for (auto u : inv_adjacent_vertices_range(v, g)) {
83 if (!contains(preds, u)) {
84 DEBUG_PRINTF("bad pred\n");
85 goto next_cand;
86 }
87 }
88
89 next_preds->insert(v);
90 insert(next_cands, adjacent_vertices(v, g));
91
92 DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index);
93 friends->insert(v);
94 next_cand:;
95 }
96}
97
98void findAccelFriends(const NGHolder &g, NFAVertex v,
99 const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
100 u32 offset, flat_set<NFAVertex> *friends) {
101 /* A friend of an accel state is a successor state which can only be on when
102 * the accel is on. This requires that it has a subset of the accel state's
103 * preds and a charreach which is a subset of the accel state.
104 *
105 * A friend can be safely ignored when accelerating provided there is
106 * sufficient back-off. A friend is useful if it has a wide reach.
107 */
108
109 /* BR cyclic states which may go stale cannot have friends as they may
110 * suddenly turn off leading their so-called friends stranded and alone.
111 * TODO: restrict to only stale going BR cyclics
112 */
113 if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) {
114 return;
115 }
116
117 u32 friend_depth = offset + 1;
118
119 flat_set<NFAVertex> preds;
120 insert(&preds, inv_adjacent_vertices(v, g));
121 const CharReach &cr = g[v].char_reach;
122
123 flat_set<NFAVertex> cands;
124 insert(&cands, adjacent_vertices(v, g));
125
126 flat_set<NFAVertex> next_preds;
127 flat_set<NFAVertex> next_cands;
128 for (u32 i = 0; i < friend_depth; i++) {
129 findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds,
130 friends);
131 preds.insert(next_preds.begin(), next_preds.end());
132 next_preds.clear();
133 cands.swap(next_cands);
134 next_cands.clear();
135 }
136}
137
138static
139void findPaths(const NGHolder &g, NFAVertex v,
140 const vector<CharReach> &refined_cr,
141 vector<vector<CharReach>> *paths,
142 const flat_set<NFAVertex> &forbidden, u32 depth) {
143 static const u32 MAGIC_TOO_WIDE_NUMBER = 16;
144 if (!depth) {
145 paths->push_back({});
146 return;
147 }
148 if (v == g.accept || v == g.acceptEod) {
149 paths->push_back({});
150 if (!generates_callbacks(g) || v == g.acceptEod) {
151 paths->back().push_back(CharReach()); /* red tape options */
152 }
153 return;
154 }
155
156 /* for the escape 'literals' we want to use the minimal cr so we
157 * can be more selective */
158 const CharReach &cr = refined_cr[g[v].index];
159
160 if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
161 || hasSelfLoop(v, g)) {
162 /* give up on pushing past this point */
163 paths->push_back({cr});
164 return;
165 }
166
167 vector<vector<CharReach>> curr;
168 for (auto w : adjacent_vertices_range(v, g)) {
169 if (contains(forbidden, w)) {
170 /* path has looped back to one of the active+boring acceleration
171 * states. We can ignore this path if we have sufficient back-
172 * off. */
173 paths->push_back({CharReach()});
174 continue;
175 }
176
177 u32 new_depth = depth - 1;
178 do {
179 curr.clear();
180 findPaths(g, w, refined_cr, &curr, forbidden, new_depth);
181 } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER);
182
183 for (auto &c : curr) {
184 c.push_back(cr);
185 paths->push_back(std::move(c));
186 }
187 }
188}
189
190namespace {
191struct SAccelScheme {
192 SAccelScheme(CharReach cr_in, u32 offset_in)
193 : cr(std::move(cr_in)), offset(offset_in) {
194 assert(offset <= MAX_ACCEL_DEPTH);
195 }
196
197 SAccelScheme() {}
198
199 bool operator<(const SAccelScheme &b) const {
200 const SAccelScheme &a = *this;
201
202 const size_t a_count = cr.count(), b_count = b.cr.count();
203 if (a_count != b_count) {
204 return a_count < b_count;
205 }
206
207 /* TODO: give bonus if one is a 'caseless' character */
208 ORDER_CHECK(offset);
209 ORDER_CHECK(cr);
210 return false;
211 }
212
213 CharReach cr = CharReach::dot();
214 u32 offset = MAX_ACCEL_DEPTH + 1;
215};
216}
217
218/**
219 * \brief Limit on the number of (recursive) calls to findBestInternal().
220 */
221static constexpr size_t MAX_FINDBEST_CALLS = 1000000;
222
223static
224void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
225 vector<vector<CharReach>>::const_iterator pe,
226 size_t *num_calls, const SAccelScheme &curr,
227 SAccelScheme *best) {
228 assert(curr.offset <= MAX_ACCEL_DEPTH);
229
230 if (++(*num_calls) > MAX_FINDBEST_CALLS) {
231 DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls);
232 return;
233 }
234
235 DEBUG_PRINTF("paths left %zu\n", pe - pb);
236 if (pb == pe) {
237 if (curr < *best) {
238 *best = curr;
239 DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n",
240 best->cr.count(), describeClass(best->cr).c_str(),
241 best->offset);
242 }
243 return;
244 }
245
246 DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
247
248 small_vector<SAccelScheme, 10> priority_path;
249 priority_path.reserve(pb->size());
250 u32 i = 0;
251 for (auto p = pb->begin(); p != pb->end(); ++p, i++) {
252 SAccelScheme as(*p | curr.cr, max(i, curr.offset));
253 if (*best < as) {
254 DEBUG_PRINTF("worse\n");
255 continue;
256 }
257 priority_path.push_back(move(as));
258 }
259
260 sort(priority_path.begin(), priority_path.end());
261 for (auto it = priority_path.begin(); it != priority_path.end(); ++it) {
262 auto jt = next(it);
263 for (; jt != priority_path.end(); ++jt) {
264 if (!it->cr.isSubsetOf(jt->cr)) {
265 break;
266 }
267 }
268 priority_path.erase(next(it), jt);
269 DEBUG_PRINTF("||%zu\n", it->cr.count());
270 }
271 DEBUG_PRINTF("---\n");
272
273 for (const SAccelScheme &in : priority_path) {
274 DEBUG_PRINTF("in: count %zu\n", in.cr.count());
275 if (*best < in) {
276 DEBUG_PRINTF("worse\n");
277 continue;
278 }
279 findBestInternal(pb + 1, pe, num_calls, in, best);
280
281 if (curr.cr == best->cr) {
282 return; /* could only get better by offset */
283 }
284 }
285}
286
287static
288SAccelScheme findBest(const vector<vector<CharReach>> &paths,
289 const CharReach &terminating) {
290 SAccelScheme curr(terminating, 0U);
291 SAccelScheme best;
292 size_t num_calls = 0;
293 findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best);
294 DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls);
295 DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n",
296 best.cr.count(), describeClass(best.cr).c_str(), best.offset);
297 return best;
298}
299
300namespace {
301struct DAccelScheme {
302 DAccelScheme(CharReach cr_in, u32 offset_in)
303 : double_cr(std::move(cr_in)), double_offset(offset_in) {
304 assert(double_offset <= MAX_ACCEL_DEPTH);
305 }
306
307 bool operator<(const DAccelScheme &b) const {
308 const DAccelScheme &a = *this;
309
310 size_t a_dcount = a.double_cr.count();
311 size_t b_dcount = b.double_cr.count();
312
313 assert(!a.double_byte.empty() || a_dcount || a.double_offset);
314 assert(!b.double_byte.empty() || b_dcount || b.double_offset);
315
316 if (a_dcount != b_dcount) {
317 return a_dcount < b_dcount;
318 }
319
320 if (!a_dcount) {
321 bool cd_a = buildDvermMask(a.double_byte);
322 bool cd_b = buildDvermMask(b.double_byte);
323 if (cd_a != cd_b) {
324 return cd_a > cd_b;
325 }
326 }
327
328 ORDER_CHECK(double_byte.size());
329 ORDER_CHECK(double_offset);
330
331 /* TODO: give bonus if one is a 'caseless' character */
332 ORDER_CHECK(double_byte);
333 ORDER_CHECK(double_cr);
334
335 return false;
336 }
337
338 flat_set<pair<u8, u8>> double_byte;
339 CharReach double_cr;
340 u32 double_offset = 0;
341};
342}
343
344static
345DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
346 const CharReach &cr_2_in, u32 offset_in) {
347 cr_1 &= ~as.double_cr;
348 CharReach cr_2 = cr_2_in & ~as.double_cr;
349 u32 offset = offset_in;
350
351 if (cr_1.none()) {
352 DEBUG_PRINTF("empty first element\n");
353 ENSURE_AT_LEAST(&as.double_offset, offset);
354 return as;
355 }
356
357 if (cr_2_in != cr_2 || cr_2.none()) {
358 offset = offset_in + 1;
359 }
360
361 size_t two_count = cr_1.count() * cr_2.count();
362
363 DEBUG_PRINTF("will generate raw %zu pairs\n", two_count);
364
365 if (!two_count) {
366 DEBUG_PRINTF("empty element\n");
367 ENSURE_AT_LEAST(&as.double_offset, offset);
368 return as;
369 }
370
371 if (two_count > DOUBLE_SHUFTI_LIMIT) {
372 if (cr_2.count() < cr_1.count()) {
373 as.double_cr |= cr_2;
374 offset = offset_in + 1;
375 } else {
376 as.double_cr |= cr_1;
377 }
378 } else {
379 for (auto i = cr_1.find_first(); i != CharReach::npos;
380 i = cr_1.find_next(i)) {
381 for (auto j = cr_2.find_first(); j != CharReach::npos;
382 j = cr_2.find_next(j)) {
383 as.double_byte.emplace(i, j);
384 }
385 }
386 }
387
388 ENSURE_AT_LEAST(&as.double_offset, offset);
389 DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n",
390 as.double_byte.size(), as.double_cr.count(), as.double_offset);
391 return as;
392}
393
394static
395void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
396 vector<vector<CharReach> >::const_iterator pe,
397 const DAccelScheme &curr, DAccelScheme *best) {
398 assert(curr.double_offset <= MAX_ACCEL_DEPTH);
399 DEBUG_PRINTF("paths left %zu\n", pe - pb);
400 DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n",
401 curr.double_byte.size(), curr.double_cr.count(),
402 curr.double_offset);
403 if (pb == pe) {
404 if (curr < *best) {
405 *best = curr;
406 DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n",
407 best->double_byte.size(), best->double_cr.count(),
408 best->double_offset);
409 }
410 return;
411 }
412
413 DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
414
415 small_vector<DAccelScheme, 10> priority_path;
416 priority_path.reserve(pb->size());
417 u32 i = 0;
418 for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end();
419 ++p, i++) {
420 DAccelScheme as = make_double_accel(curr, *p, *next(p), i);
421 if (*best < as) {
422 DEBUG_PRINTF("worse\n");
423 continue;
424 }
425 priority_path.push_back(move(as));
426 }
427
428 sort(priority_path.begin(), priority_path.end());
429 DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size());
430 DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n",
431 best->double_byte.size(), best->double_cr.count(),
432 best->double_offset);
433
434 for (const DAccelScheme &in : priority_path) {
435 DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
436 in.double_byte.size(), in.double_cr.count(),
437 in.double_offset);
438 if (*best < in) {
439 DEBUG_PRINTF("worse\n");
440 continue;
441 }
442 findDoubleBest(pb + 1, pe, in, best);
443 }
444}
445
446#ifdef DEBUG
447static
448void dumpPaths(const vector<vector<CharReach>> &paths) {
449 for (const auto &path : paths) {
450 DEBUG_PRINTF("path: [");
451 for (const auto &cr : path) {
452 printf(" [");
453 describeClass(stdout, cr, 20, CC_OUT_TEXT);
454 printf("]");
455 }
456 printf(" ]\n");
457 }
458}
459#endif
460
461static
462void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
463 /* paths segments which are a superset of an earlier segment should never be
464 * picked as an acceleration segment -> to improve processing just replace
465 * with dot */
466 for (auto &p : paths) {
467 for (auto it = p.begin(); it != p.end(); ++it) {
468 for (auto jt = next(it); jt != p.end(); ++jt) {
469 if (it->isSubsetOf(*jt)) {
470 *jt = CharReach::dot();
471 }
472 }
473 }
474 }
475}
476
477static
478void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
479 /* try to unify paths which only differ in the last segment */
480 for (vector<vector<CharReach> >::iterator p = paths.begin();
481 p != paths.end() && p + 1 != paths.end();) {
482 vector<CharReach> &a = *p;
483 vector<CharReach> &b = *(p + 1);
484
485 if (a.size() != b.size()) {
486 ++p;
487 continue;
488 }
489
490 u32 i = 0;
491 for (; i < a.size() - 1; i++) {
492 if (a[i] != b[i]) {
493 break;
494 }
495 }
496 if (i == a.size() - 1) {
497 /* we can unify these paths */
498 a[i] |= b[i];
499 paths.erase(p + 1);
500 } else {
501 ++p;
502 }
503 }
504}
505
506static
507void improvePaths(vector<vector<CharReach> > &paths) {
508#ifdef DEBUG
509 DEBUG_PRINTF("orig paths\n");
510 dumpPaths(paths);
511#endif
512 blowoutPathsLessStrictSegment(paths);
513
514 sort(paths.begin(), paths.end());
515
516 unifyPathsLastSegment(paths);
517
518#ifdef DEBUG
519 DEBUG_PRINTF("opt paths\n");
520 dumpPaths(paths);
521#endif
522}
523
524#define MAX_DOUBLE_ACCEL_PATHS 10
525
526static
527DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
528 const CharReach &terminating) {
529 DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n",
530 terminating.count());
531 unifyPathsLastSegment(paths);
532
533#ifdef DEBUG
534 DEBUG_PRINTF("paths:\n");
535 dumpPaths(paths);
536#endif
537
538 /* if there are too many paths, shorten the paths to reduce the number of
539 * distinct paths we have to consider */
540 while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) {
541 for (auto &p : paths) {
542 if (p.empty()) {
543 return DAccelScheme(terminating, 0U);
544 }
545 p.pop_back();
546 }
547 unifyPathsLastSegment(paths);
548 }
549
550 if (paths.empty()) {
551 return DAccelScheme(terminating, 0U);
552 }
553
554 DAccelScheme curr(terminating, 0U);
555 DAccelScheme best(CharReach::dot(), 0U);
556 findDoubleBest(paths.begin(), paths.end(), curr, &best);
557 DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(),
558 best.double_cr.count());
559 return best;
560}
561
562#define MAX_EXPLORE_PATHS 40
563
564AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths,
565 const CharReach &terminating,
566 bool look_for_double_byte) {
567 AccelScheme rv;
568 if (look_for_double_byte) {
569 DAccelScheme da = findBestDoubleAccelScheme(paths, terminating);
570 if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) {
571 rv.double_byte = std::move(da.double_byte);
572 rv.double_cr = move(da.double_cr);
573 rv.double_offset = da.double_offset;
574 }
575 }
576
577 improvePaths(paths);
578
579 DEBUG_PRINTF("we have %zu paths\n", paths.size());
580 if (paths.size() > MAX_EXPLORE_PATHS) {
581 return rv; /* too many paths to explore */
582 }
583
584 /* if we were smart we would do something netflowy on the paths to find the
585 * best cut. But we aren't, so we will just brute force it.
586 */
587 SAccelScheme best = findBest(paths, terminating);
588
589 /* find best is a bit lazy in terms of minimising the offset, see if we can
590 * make it better. need to find the min max offset that we need.*/
591 u32 offset = 0;
592 for (const auto &path : paths) {
593 u32 i = 0;
594 for (const auto &cr : path) {
595 if (cr.isSubsetOf(best.cr)) {
596 break;
597 }
598 i++;
599 }
600 offset = MAX(offset, i);
601 }
602 assert(offset <= best.offset);
603 best.offset = offset;
604
605 rv.offset = best.offset;
606 rv.cr = best.cr;
607 if (rv.cr.count() < rv.double_cr.count()) {
608 rv.double_byte.clear();
609 }
610
611 return rv;
612}
613
614AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
615 const vector<CharReach> &refined_cr,
616 const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
617 bool allow_wide, bool look_for_double_byte) {
618 CharReach terminating;
619 for (auto v : verts) {
620 if (!hasSelfLoop(v, g)) {
621 DEBUG_PRINTF("no self loop\n");
622 return AccelScheme(); /* invalid scheme */
623 }
624
625 // check that this state is reachable on most characters
626 terminating |= ~g[v].char_reach;
627 }
628
629 DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count());
630 size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
631 : ACCEL_MAX_STOP_CHAR;
632 if (terminating.count() > limit) {
633 return AccelScheme(); /* invalid scheme */
634 }
635
636 vector<vector<CharReach>> paths;
637 flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
638
639 /* Note: we can not in general (TODO: ignore when possible) ignore entries
640 * into the bounded repeat cyclic states as that is when the magic happens
641 */
642 for (auto v : br_cyclic | map_keys) {
643 /* TODO: can allow if repeatMin <= 1 ? */
644 ignore_vert_set.erase(v);
645 }
646
647 for (auto v : verts) {
648 for (auto w : adjacent_vertices_range(v, g)) {
649 if (w != v) {
650 findPaths(g, w, refined_cr, &paths, ignore_vert_set,
651 MAX_ACCEL_DEPTH);
652 }
653 }
654 }
655
656 /* paths built wrong: reverse them */
657 for (auto &path : paths) {
658 reverse(path.begin(), path.end());
659 }
660
661 return findBestAccelScheme(std::move(paths), terminating,
662 look_for_double_byte);
663}
664
665NFAVertex get_sds_or_proxy(const NGHolder &g) {
666 DEBUG_PRINTF("looking for sds proxy\n");
667 if (proper_out_degree(g.startDs, g)) {
668 return g.startDs;
669 }
670
671 NFAVertex v = NGHolder::null_vertex();
672 for (auto w : adjacent_vertices_range(g.start, g)) {
673 if (w != g.startDs) {
674 if (!v) {
675 v = w;
676 } else {
677 return g.startDs;
678 }
679 }
680 }
681
682 if (!v) {
683 return g.startDs;
684 }
685
686 while (true) {
687 if (hasSelfLoop(v, g)) {
688 DEBUG_PRINTF("woot %zu\n", g[v].index);
689 return v;
690 }
691 if (out_degree(v, g) != 1) {
692 break;
693 }
694 NFAVertex u = getSoleDestVertex(g, v);
695 if (!g[u].char_reach.all()) {
696 break;
697 }
698 v = u;
699 }
700
701 return g.startDs;
702}
703
704/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
705bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
706 const vector<CharReach> &refined_cr,
707 const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
708 AccelScheme *as, bool allow_wide) {
709 // For a state to be accelerable, our current criterion is that it be a
710 // large character class with a self-loop and narrow set of possible other
711 // successors (i.e. no special successors, union of successor reachability
712 // is small).
713 if (!hasSelfLoop(v, g)) {
714 return false;
715 }
716
717 // check that this state is reachable on most characters
718 /* we want to use the maximal reach here (in the graph) */
719 CharReach terminating = g[v].char_reach;
720 terminating.flip();
721
722 DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n",
723 g[v].index, terminating.count(),
724 allow_wide ? " (w)" : "");
725
726 size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
727 : ACCEL_MAX_STOP_CHAR;
728 if (terminating.count() > limit) {
729 DEBUG_PRINTF("too leaky\n");
730 return false;
731 }
732
733 flat_set<NFAVertex> curr, next;
734
735 insert(&curr, adjacent_vertices(v, g));
736 curr.erase(v); // erase self-loop
737
738 // We consider offsets of zero through three; this is fairly arbitrary at
739 // present and could probably be increased (FIXME)
740 /* WARNING: would/could do horrible things to compile time */
741 bool stop = false;
742 vector<CharReach> depthReach(MAX_ACCEL_DEPTH);
743 unsigned int depth;
744 for (depth = 0; !stop && depth < MAX_ACCEL_DEPTH; depth++) {
745 CharReach &cr = depthReach[depth];
746 for (auto t : curr) {
747 if (is_special(t, g)) {
748 // We've bumped into the edge of the graph, so we should stop
749 // searching.
750 // Exception: iff our cyclic state is not a dot, than we can
751 // safely accelerate towards an EOD accept.
752
753 /* Exception: nfas that don't generate callbacks so accepts are
754 * fine too */
755 if (t == g.accept && !generates_callbacks(g)) {
756 stop = true; // don't search beyond this depth
757 continue;
758 } else if (t == g.accept) {
759 goto depth_done;
760 }
761
762 assert(t == g.acceptEod);
763 stop = true; // don't search beyond this depth
764 } else {
765 // Non-special vertex
766 insert(&next, adjacent_vertices(t, g));
767 /* for the escape 'literals' we want to use the minimal cr so we
768 * can be more selective */
769 cr |= refined_cr[g[t].index];
770 }
771 }
772
773 cr |= terminating;
774 DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count());
775
776 curr.swap(next);
777 next.clear();
778 }
779
780depth_done:
781
782 if (depth == 0) {
783 return false;
784 }
785
786 DEBUG_PRINTF("selecting from depth 0..%u\n", depth);
787
788 /* Look for the most awesome acceleration evar */
789 for (unsigned int i = 0; i < depth; i++) {
790 if (depthReach[i].none()) {
791 DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
792 *as = AccelScheme();
793 as->offset = i;
794 as->cr = CharReach();
795 return true;
796 }
797 }
798
799 // First, loop over our depths and see if we have a suitable 2-byte
800 // caseful vermicelli option: this is the (second) fastest accel we have
801 if (depth > 1) {
802 for (unsigned int i = 0; i < (depth - 1); i++) {
803 const CharReach &cra = depthReach[i];
804 const CharReach &crb = depthReach[i + 1];
805 if ((cra.count() == 1 && crb.count() == 1)
806 || (cra.count() == 2 && crb.count() == 2
807 && cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
808 DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
809 *as = AccelScheme();
810 as->offset = i;
811 return true;
812 }
813 }
814 }
815
816 // Second option: a two-byte shufti (i.e. less than eight 2-byte
817 // literals)
818 if (depth > 1) {
819 for (unsigned int i = 0; i < (depth - 1); i++) {
820 if (depthReach[i].count() * depthReach[i+1].count()
821 <= DOUBLE_SHUFTI_LIMIT) {
822 DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
823 *as = AccelScheme();
824 as->offset = i;
825 return true;
826 }
827 }
828 }
829
830 // Look for offset accel schemes verm/shufti;
831 vector<NFAVertex> verts(1, v);
832 *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true);
833 DEBUG_PRINTF("as width %zu\n", as->cr.count());
834 return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide;
835}
836
837} // namespace ue2
838