1/*
2 * Copyright (c) 2015-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "rose_build_misc.h"
30#include "rose_build_impl.h"
31
32#include "rose_build_resources.h"
33#include "hwlm/hwlm_literal.h"
34#include "nfa/castlecompile.h"
35#include "nfa/goughcompile.h"
36#include "nfa/mcclellancompile_util.h"
37#include "nfa/nfa_api.h"
38#include "nfa/rdfa.h"
39#include "nfa/tamaramacompile.h"
40#include "nfagraph/ng_holder.h"
41#include "nfagraph/ng_limex.h"
42#include "nfagraph/ng_reports.h"
43#include "nfagraph/ng_repeat.h"
44#include "nfagraph/ng_util.h"
45#include "nfagraph/ng_width.h"
46#include "smallwrite/smallwrite_build.h"
47#include "util/alloc.h"
48#include "util/boundary_reports.h"
49#include "util/compile_context.h"
50#include "util/container.h"
51#include "util/graph.h"
52#include "util/graph_range.h"
53#include "util/make_unique.h"
54#include "util/order_check.h"
55#include "util/report_manager.h"
56#include "util/ue2string.h"
57#include "util/verify_types.h"
58#include "ue2common.h"
59#include "grey.h"
60
61#include <boost/graph/breadth_first_search.hpp>
62
63using namespace std;
64
65namespace ue2 {
66
67// just to get it out of the header
68RoseBuild::~RoseBuild() { }
69
70RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in,
71 SomSlotManager &ssm_in,
72 SmallWriteBuild &smwr_in,
73 const CompileContext &cc_in,
74 const BoundaryReports &boundary_in)
75 : cc(cc_in),
76 root(add_vertex(g)),
77 anchored_root(add_vertex(g)),
78 hasSom(false),
79 group_end(0),
80 ematcher_region_size(0),
81 eod_event_literal_id(MO_INVALID_IDX),
82 max_rose_anchored_floating_overlap(0),
83 rm(rm_in),
84 ssm(ssm_in),
85 smwr(smwr_in),
86 boundary(boundary_in),
87 next_nfa_report(0) {
88 // add root vertices to graph
89 g[root].min_offset = 0;
90 g[root].max_offset = 0;
91
92 g[anchored_root].min_offset = 0;
93 g[anchored_root].max_offset = 0;
94}
95
96RoseBuildImpl::~RoseBuildImpl() {
97 // empty
98}
99
100bool RoseVertexProps::isBoring(void) const {
101 return !suffix && !left;
102}
103
104bool RoseVertexProps::fixedOffset(void) const {
105 assert(min_offset <= max_offset); /* ensure offsets calculated */
106 return max_offset == min_offset && max_offset != ROSE_BOUND_INF;
107}
108
109bool RoseBuildImpl::isRootSuccessor(const RoseVertex &v) const {
110 for (auto u : inv_adjacent_vertices_range(v, g)) {
111 if (isAnyStart(u)) {
112 return true;
113 }
114 }
115 return false;
116}
117
118bool RoseBuildImpl::isNonRootSuccessor(const RoseVertex &v) const {
119 for (auto u : inv_adjacent_vertices_range(v, g)) {
120 if (!isAnyStart(u)) {
121 return true;
122 }
123 }
124 return false;
125}
126
127bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v) {
128 for (const auto &e : out_edges_range(v, g)) {
129 if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
130 return true;
131 }
132 }
133
134 return false;
135}
136
137bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v) {
138 for (const auto &e : out_edges_range(v, g)) {
139 if (g[e].history == ROSE_ROLE_HISTORY_LAST_BYTE) {
140 return true;
141 }
142 }
143
144 return false;
145}
146
147static
148bool isInTable(const RoseBuildImpl &tbi, RoseVertex v,
149 rose_literal_table table) {
150 const auto &lit_ids = tbi.g[v].literals;
151 if (lit_ids.empty()) {
152 return false; // special role with no literals
153 }
154
155 // All literals for a given vertex will be in the same table, so we need
156 // only inspect the first one.
157 const auto lit_table = tbi.literals.at(*lit_ids.begin()).table;
158
159 // Verify that all literals for this vertex are in the same table.
160 assert(all_of_in(lit_ids, [&](u32 lit_id) {
161 return tbi.literals.at(lit_id).table == lit_table;
162 }));
163
164 return lit_table == table;
165}
166
167bool RoseBuildImpl::isAnchored(RoseVertex v) const {
168 return isInTable(*this, v, ROSE_ANCHORED);
169}
170
171bool RoseBuildImpl::isFloating(RoseVertex v) const {
172 return isInTable(*this, v, ROSE_FLOATING);
173}
174
175bool RoseBuildImpl::isInETable(RoseVertex v) const {
176 return isInTable(*this, v, ROSE_EOD_ANCHORED);
177}
178
179bool RoseBuildImpl::hasLiteralInTable(RoseVertex v,
180 enum rose_literal_table t) const {
181 return isInTable(*this, v, t);
182}
183
184/* Indicates that the floating table (if it exists) will be only run
185 conditionally based on matches from the anchored table. */
186bool RoseBuildImpl::hasNoFloatingRoots() const {
187 for (auto v : adjacent_vertices_range(root, g)) {
188 if (isFloating(v)) {
189 DEBUG_PRINTF("direct floating root %zu\n", g[v].index);
190 return false;
191 }
192 }
193
194 /* need to check if the anchored_root has any literals which are too deep */
195 for (auto v : adjacent_vertices_range(anchored_root, g)) {
196 if (isFloating(v)) {
197 DEBUG_PRINTF("indirect floating root %zu\n", g[v].index);
198 return false;
199 }
200 }
201
202 return true;
203}
204
205size_t RoseBuildImpl::maxLiteralLen(RoseVertex v) const {
206 const auto &lit_ids = g[v].literals;
207 assert(!lit_ids.empty());
208
209 size_t maxlen = 0;
210
211 for (const auto &lit_id : lit_ids) {
212 maxlen = max(maxlen, literals.at(lit_id).elength());
213 }
214
215 return maxlen;
216}
217
218size_t RoseBuildImpl::minLiteralLen(RoseVertex v) const {
219 const auto &lit_ids = g[v].literals;
220 assert(!lit_ids.empty());
221
222 size_t minlen = ROSE_BOUND_INF;
223
224 for (const auto &lit_id : lit_ids) {
225 minlen = min(minlen, literals.at(lit_id).elength());
226 }
227
228 return minlen;
229}
230
231// RoseBuild factory
232unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm,
233 SomSlotManager &ssm,
234 SmallWriteBuild &smwr,
235 const CompileContext &cc,
236 const BoundaryReports &boundary) {
237 return ue2::make_unique<RoseBuildImpl>(rm, ssm, smwr, cc, boundary);
238}
239
240bool roseIsPureLiteral(const RoseEngine *t) {
241 return t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL;
242}
243
244// Returns non-zero max overlap len if a suffix of the literal 'a' overlaps
245// with a prefix of the literal 'b' or 'a' can be contained in 'b'.
246size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay) {
247 /* overly conservative if only part of the string is nocase */
248 bool nocase = a.any_nocase() || b.any_nocase();
249 DEBUG_PRINTF("max overlap %s %s+%u %d\n", dumpString(a).c_str(),
250 dumpString(b).c_str(), b_delay, (int)nocase);
251 size_t a_len = a.length();
252 size_t b_len = b.length();
253 const char *a_end = a.c_str() + a_len;
254 const char *b_end = b.c_str() + b_len;
255 if (b_delay >= a_len) {
256 return b_len + b_delay;
257 } else if (b_delay) {
258 /* a can be a substring of b which overlaps some of the end dots
259 * OR b can be a substring near the end of a */
260 /* ignore overlap due to the final trailing dot as delayed literals
261 * are delivered before undelayed */
262 for (u32 j = b_delay - 1; j > 0; j--) {
263 if (b_len + j >= a_len) {
264 if (!cmp(a.c_str(), b_end + j - a_len, a_len - j, nocase)) {
265 return b_len + j;
266 }
267 } else {
268 if (!cmp(a_end - j - b_len, b.c_str(), b_len, nocase)) {
269 return b_len + j;
270 }
271 }
272 }
273 }
274
275 return maxStringOverlap(a.get_string(), b.get_string(), nocase);
276}
277
278// Returns non-zero max overlap len if a suffix of the literal ID 'a' overlaps
279// with a prefix of the literal ID 'b' or 'a' can be contained in 'b'.
280size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b) {
281 assert(!a.delay);
282 return maxOverlap(a.s, b.s, b.delay);
283}
284
285static
286const rose_literal_id &getOverlapLiteral(const RoseBuildImpl &tbi,
287 u32 literal_id) {
288 auto it = tbi.anchoredLitSuffix.find(literal_id);
289 if (it != tbi.anchoredLitSuffix.end()) {
290 return it->second;
291 }
292 return tbi.literals.at(literal_id);
293}
294
295ue2_literal findNonOverlappingTail(const set<ue2_literal> &lits,
296 const ue2_literal &s) {
297 size_t max_overlap = 0;
298
299 for (const auto &lit : lits) {
300 size_t overlap = lit != s ? maxStringOverlap(lit, s)
301 : maxStringSelfOverlap(s);
302 max_overlap = max(max_overlap, overlap);
303 }
304
305 /* find the tail that doesn't overlap */
306 ue2_literal tail = s.substr(max_overlap);
307 DEBUG_PRINTF("%zu overlap, tail: '%s'\n", max_overlap,
308 dumpString(tail).c_str());
309 return tail;
310}
311
312size_t RoseBuildImpl::maxLiteralOverlap(RoseVertex u, RoseVertex v) const {
313 size_t overlap = 0;
314 for (auto u_lit_id : g[u].literals) {
315 const rose_literal_id &ul = getOverlapLiteral(*this, u_lit_id);
316 for (auto v_lit_id : g[v].literals) {
317 const rose_literal_id &vl = getOverlapLiteral(*this, v_lit_id);
318 overlap = max(overlap, maxOverlap(ul, vl));
319 }
320 }
321 return overlap;
322}
323
324void RoseBuildImpl::removeVertices(const vector<RoseVertex> &dead) {
325 for (auto v : dead) {
326 assert(!isAnyStart(v));
327 DEBUG_PRINTF("removing vertex %zu\n", g[v].index);
328 for (auto lit_id : g[v].literals) {
329 literal_info[lit_id].vertices.erase(v);
330 }
331 clear_vertex(v, g);
332 remove_vertex(v, g);
333 }
334 renumber_vertices(g);
335}
336
337// Find the maximum bound on the edges to this vertex's successors ignoring
338// those via infixes.
339u32 RoseBuildImpl::calcSuccMaxBound(RoseVertex u) const {
340 u32 maxBound = 0;
341 for (const auto &e : out_edges_range(u, g)) {
342 RoseVertex v = target(e, g);
343
344 if (g[v].left) {
345 continue;
346 }
347
348 u32 thisBound = g[e].maxBound;
349
350 if (thisBound == ROSE_BOUND_INF) {
351 return ROSE_BOUND_INF;
352 }
353
354 if (!g[v].eod_accept) {
355 // Add the length of the longest of our literals.
356 thisBound += maxLiteralLen(v);
357 }
358
359 maxBound = max(maxBound, thisBound);
360 }
361
362 assert(maxBound <= ROSE_BOUND_INF);
363 return maxBound;
364}
365
366u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, u32 delay,
367 rose_literal_table table) {
368 DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(),
369 table);
370 assert(table != ROSE_ANCHORED);
371 rose_literal_id key(s, table, delay);
372
373 auto m = literals.insert(key);
374 u32 id = m.first;
375 bool inserted = m.second;
376
377 if (inserted) {
378 literal_info.push_back(rose_literal_info());
379 assert(literal_info.size() == id + 1);
380
381 if (delay) {
382 u32 undelayed_id = getLiteralId(s, 0, table);
383 literal_info[id].undelayed_id = undelayed_id;
384 literal_info[undelayed_id].delayed_ids.insert(id);
385 } else {
386 literal_info[id].undelayed_id = id;
387 }
388 }
389 return id;
390}
391
392// Function that operates on a msk/cmp pair and a literal, as used in
393// hwlmLiteral, and zeroes msk elements that don't add any power to the
394// literal.
395void normaliseLiteralMask(const ue2_literal &s_in, vector<u8> &msk,
396 vector<u8> &cmp) {
397 assert(msk.size() == cmp.size());
398 assert(msk.size() <= HWLM_MASKLEN);
399
400 if (msk.empty()) {
401 return;
402 }
403
404 // Work over a caseless copy if the string contains nocase chars. This will
405 // ensure that we treat masks designed to handle mixed-sensitivity literals
406 // correctly: these will be matched by the literal matcher in caseless
407 // mode, with the mask used to narrow the matches.
408 ue2_literal s(s_in);
409 if (s.any_nocase()) {
410 make_nocase(&s);
411 }
412
413 ue2_literal::const_reverse_iterator it = s.rbegin(), ite = s.rend();
414 size_t i = msk.size();
415 while (i-- != 0 && it != ite) {
416 const CharReach &cr = *it;
417 for (size_t c = cr.find_first(); c != CharReach::npos;
418 c = cr.find_next(c)) {
419 if (((u8)c & msk[i]) != cmp[i]) {
420 goto skip;
421 }
422 }
423
424 // If we didn't jump out of the loop to skip, then this mask position
425 // doesn't further narrow the set of acceptable literals from those
426 // accepted by s. So we can zero this element.
427 msk[i] = 0;
428 cmp[i] = 0;
429 skip:
430 ++it;
431 }
432
433 // Wipe out prefix zeroes.
434 while (!msk.empty() && msk[0] == 0) {
435 msk.erase(msk.begin());
436 cmp.erase(cmp.begin());
437 }
438}
439
440rose_literal_id::rose_literal_id(const ue2_literal &s_in,
441 const vector<u8> &msk_in, const vector<u8> &cmp_in,
442 rose_literal_table table_in, u32 delay_in)
443 : s(s_in), msk(msk_in), cmp(cmp_in), table(table_in),
444 delay(delay_in), distinctiveness(0) {
445 assert(msk.size() == cmp.size());
446 assert(msk.size() <= HWLM_MASKLEN);
447 assert(delay <= MAX_DELAY);
448
449 normaliseLiteralMask(s, msk, cmp);
450}
451
452u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector<u8> &msk,
453 const vector<u8> &cmp, u32 delay,
454 rose_literal_table table) {
455 DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(),
456 table);
457 assert(table != ROSE_ANCHORED);
458 rose_literal_id key(s, msk, cmp, table, delay);
459
460 /* ue2_literals are always uppercased if nocase and must have an
461 * alpha char */
462
463 auto m = literals.insert(key);
464 u32 id = m.first;
465 bool inserted = m.second;
466
467 if (inserted) {
468 literal_info.push_back(rose_literal_info());
469 assert(literal_info.size() == id + 1);
470
471 if (delay) {
472 u32 undelayed_id = getLiteralId(s, msk, cmp, 0, table);
473 literal_info[id].undelayed_id = undelayed_id;
474 literal_info[undelayed_id].delayed_ids.insert(id);
475 } else {
476 literal_info[id].undelayed_id = id;
477 }
478 }
479 return id;
480}
481
482u32 RoseBuildImpl::getNewLiteralId() {
483 rose_literal_id key(ue2_literal(), ROSE_ANCHORED, 0);
484 u32 numLiterals = verify_u32(literals.size());
485 key.distinctiveness = numLiterals;
486
487 auto m = literals.insert(key);
488 assert(m.second);
489 u32 id = m.first;
490
491 literal_info.push_back(rose_literal_info());
492 assert(literal_info.size() == id + 1);
493
494 literal_info[id].undelayed_id = id;
495
496 return id;
497}
498
499bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) {
500 ORDER_CHECK(minBound);
501 ORDER_CHECK(maxBound);
502 ORDER_CHECK(history);
503 return false;
504}
505
506#ifndef NDEBUG
507bool roseHasTops(const RoseBuildImpl &build, RoseVertex v) {
508 const RoseGraph &g = build.g;
509 assert(g[v].left);
510
511 set<u32> graph_tops;
512 if (!build.isRootSuccessor(v)) {
513 for (const auto &e : in_edges_range(v, g)) {
514 graph_tops.insert(g[e].rose_top);
515 }
516 }
517
518 return is_subset_of(graph_tops, all_tops(g[v].left));
519}
520#endif
521
522u32 OutfixInfo::get_queue(QueueIndexFactory &qif) {
523 if (queue == ~0U) {
524 queue = qif.get_queue();
525 }
526
527 return queue;
528}
529
530namespace {
531class OutfixAllReports : public boost::static_visitor<set<ReportID>> {
532public:
533 set<ReportID> operator()(const boost::blank &) const {
534 return set<ReportID>();
535 }
536
537 template<class T>
538 set<ReportID> operator()(const unique_ptr<T> &x) const {
539 return all_reports(*x);
540 }
541
542 set<ReportID> operator()(const MpvProto &mpv) const {
543 set<ReportID> reports;
544 for (const auto &puff : mpv.puffettes) {
545 reports.insert(puff.report);
546 }
547 for (const auto &puff : mpv.triggered_puffettes) {
548 reports.insert(puff.report);
549 }
550 return reports;
551 }
552};
553}
554
555set<ReportID> all_reports(const OutfixInfo &outfix) {
556 auto reports = boost::apply_visitor(OutfixAllReports(), outfix.proto);
557 assert(!reports.empty());
558 return reports;
559}
560
561bool RoseSuffixInfo::operator==(const RoseSuffixInfo &b) const {
562 return top == b.top && graph == b.graph && castle == b.castle &&
563 rdfa == b.rdfa && haig == b.haig && tamarama == b.tamarama;
564}
565
566bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const {
567 const RoseSuffixInfo &a = *this;
568 ORDER_CHECK(top);
569 ORDER_CHECK(graph);
570 ORDER_CHECK(castle);
571 ORDER_CHECK(haig);
572 ORDER_CHECK(rdfa);
573 ORDER_CHECK(tamarama);
574 assert(a.dfa_min_width == b.dfa_min_width);
575 assert(a.dfa_max_width == b.dfa_max_width);
576 return false;
577}
578
579size_t RoseSuffixInfo::hash() const {
580 return hash_all(top, graph, castle, rdfa, haig, tamarama);
581}
582
583void RoseSuffixInfo::reset(void) {
584 top = 0;
585 graph.reset();
586 castle.reset();
587 rdfa.reset();
588 haig.reset();
589 tamarama.reset();
590 dfa_min_width = depth(0);
591 dfa_max_width = depth::infinity();
592}
593
594std::set<ReportID> all_reports(const suffix_id &s) {
595 assert(s.graph() || s.castle() || s.haig() || s.dfa());
596 if (s.tamarama()) {
597 return all_reports(*s.tamarama());
598 } else if (s.graph()) {
599 return all_reports(*s.graph());
600 } else if (s.castle()) {
601 return all_reports(*s.castle());
602 } else if (s.dfa()) {
603 return all_reports(*s.dfa());
604 } else {
605 return all_reports(*s.haig());
606 }
607}
608
609depth findMinWidth(const suffix_id &s) {
610 assert(s.graph() || s.castle() || s.haig() || s.dfa());
611 if (s.graph()) {
612 return findMinWidth(*s.graph());
613 } else if (s.castle()) {
614 return findMinWidth(*s.castle());
615 } else {
616 return s.dfa_min_width;
617 }
618}
619
620depth findMinWidth(const suffix_id &s, u32 top) {
621 assert(s.graph() || s.castle() || s.haig() || s.dfa());
622 if (s.graph()) {
623 return findMinWidth(*s.graph(), top);
624 } else if (s.castle()) {
625 return findMinWidth(*s.castle(), top);
626 } else {
627 return s.dfa_min_width;
628 }
629}
630
631depth findMaxWidth(const suffix_id &s) {
632 assert(s.graph() || s.castle() || s.haig() || s.dfa());
633 if (s.graph()) {
634 return findMaxWidth(*s.graph());
635 } else if (s.castle()) {
636 return findMaxWidth(*s.castle());
637 } else {
638 return s.dfa_max_width;
639 }
640}
641
642depth findMaxWidth(const suffix_id &s, u32 top) {
643 assert(s.graph() || s.castle() || s.haig() || s.dfa());
644 if (s.graph()) {
645 return findMaxWidth(*s.graph(), top);
646 } else if (s.castle()) {
647 return findMaxWidth(*s.castle(), top);
648 } else {
649 return s.dfa_max_width;
650 }
651}
652
653bool has_eod_accepts(const suffix_id &s) {
654 assert(s.graph() || s.castle() || s.haig() || s.dfa());
655 if (s.graph()) {
656 /* ignore accept -> eod edge */
657 return in_degree(s.graph()->acceptEod, *s.graph()) > 1;
658 } else if (s.castle()) {
659 return false;
660 } else if (s.dfa()) {
661 return has_eod_accepts(*s.dfa());
662 } else {
663 return has_eod_accepts(*s.haig());
664 }
665}
666
667bool has_non_eod_accepts(const suffix_id &s) {
668 assert(s.graph() || s.castle() || s.haig() || s.dfa());
669 if (s.graph()) {
670 return in_degree(s.graph()->accept, *s.graph());
671 } else if (s.castle()) {
672 return true;
673 } else if (s.dfa()) {
674 return has_non_eod_accepts(*s.dfa());
675 } else {
676 return has_non_eod_accepts(*s.haig());
677 }
678}
679
680set<u32> all_tops(const suffix_id &s) {
681 assert(s.graph() || s.castle() || s.haig() || s.dfa());
682 if (s.graph()) {
683 flat_set<u32> tops = getTops(*s.graph());
684 assert(!tops.empty());
685 return {tops.begin(), tops.end()};
686 }
687
688 if (s.castle()) {
689 return assoc_keys(s.castle()->repeats);
690 }
691
692 // Other types of suffix are not multi-top.
693 return {0};
694}
695
696size_t suffix_id::hash() const {
697 return hash_all(g, c, d, h, t);
698}
699
700bool isAnchored(const left_id &r) {
701 assert(r.graph() || r.castle() || r.haig() || r.dfa());
702 if (r.graph()) {
703 return isAnchored(*r.graph());
704 }
705 if (r.dfa()) {
706 return r.dfa()->start_anchored == DEAD_STATE;
707 }
708 if (r.haig()) {
709 return r.haig()->start_anchored == DEAD_STATE;
710 }
711
712 // All other types are explicitly anchored.
713 return true;
714}
715
716depth findMinWidth(const left_id &r) {
717 assert(r.graph() || r.castle() || r.haig() || r.dfa());
718 if (r.graph()) {
719 return findMinWidth(*r.graph());
720 } else if (r.castle()) {
721 return findMinWidth(*r.castle());
722 } else {
723 return r.dfa_min_width;
724 }
725}
726
727depth findMaxWidth(const left_id &r) {
728 assert(r.graph() || r.castle() || r.haig() || r.dfa());
729 if (r.graph()) {
730 return findMaxWidth(*r.graph());
731 } else if (r.castle()) {
732 return findMaxWidth(*r.castle());
733 } else {
734 return r.dfa_max_width;
735 }
736}
737
738set<u32> all_tops(const left_id &r) {
739 assert(r.graph() || r.castle() || r.haig() || r.dfa());
740 if (r.graph()) {
741 flat_set<u32> tops = getTops(*r.graph());
742 return {tops.begin(), tops.end()};
743 }
744
745 if (r.castle()) {
746 return assoc_keys(r.castle()->repeats);
747 }
748
749 // Other types of rose are not multi-top.
750 return {0};
751}
752
753set<u32> all_reports(const left_id &left) {
754 assert(left.graph() || left.castle() || left.haig() || left.dfa());
755 if (left.graph()) {
756 return all_reports(*left.graph());
757 } else if (left.castle()) {
758 return all_reports(*left.castle());
759 } else if (left.dfa()) {
760 return all_reports(*left.dfa());
761 } else {
762 return all_reports(*left.haig());
763 }
764}
765
766u32 num_tops(const left_id &r) {
767 return all_tops(r).size();
768}
769
770size_t left_id::hash() const {
771 return hash_all(g, c, d, h);
772}
773
774u64a findMaxOffset(const set<ReportID> &reports, const ReportManager &rm) {
775 assert(!reports.empty());
776 u64a maxOffset = 0;
777 for (const auto &report_id : reports) {
778 const Report &ir = rm.getReport(report_id);
779 if (ir.hasBounds()) {
780 maxOffset = max(maxOffset, ir.maxOffset);
781 } else {
782 return MAX_OFFSET;
783 }
784 }
785 return maxOffset;
786}
787
788size_t LeftEngInfo::hash() const {
789 return hash_all(graph, castle, dfa, haig, tamarama, lag, leftfix_report);
790}
791
792void LeftEngInfo::reset(void) {
793 graph.reset();
794 castle.reset();
795 dfa.reset();
796 haig.reset();
797 tamarama.reset();
798 lag = 0;
799 leftfix_report = MO_INVALID_IDX;
800 dfa_min_width = depth(0);
801 dfa_max_width = depth::infinity();
802}
803
804LeftEngInfo::operator bool() const {
805 assert((int)!!castle + (int)!!dfa + (int)!!haig <= 1);
806 assert(!castle || !graph);
807 assert(!dfa || graph); /* dfas always have the graph as well */
808 assert(!haig || graph);
809 return graph || castle || dfa || haig;
810}
811
812u32 roseQuality(const RoseResources &res, const RoseEngine *t) {
813 /* Rose is low quality if the atable is a Mcclellan 16 or has multiple DFAs
814 */
815 if (res.has_anchored) {
816 if (res.has_anchored_multiple) {
817 DEBUG_PRINTF("multiple atable engines\n");
818 return 0;
819 }
820
821 if (res.has_anchored_large) {
822 DEBUG_PRINTF("m16 atable engine\n");
823 return 0;
824 }
825 }
826
827 /* if we always run multiple engines then we are slow */
828 u32 always_run = 0;
829
830 if (res.has_anchored) {
831 always_run++;
832 }
833
834 if (t->eagerIterOffset) {
835 /* eager prefixes are always run */
836 always_run++;
837 }
838
839 if (res.has_floating) {
840 /* TODO: ignore conditional ftables, or ftables beyond smwr region */
841 always_run++;
842 }
843
844 if (t->ematcherOffset) {
845 always_run++;
846 }
847
848 /* ignore mpv outfixes as they are v good, mpv outfixes are before begin */
849 if (t->outfixBeginQueue != t->outfixEndQueue) {
850 /* TODO: ignore outfixes > smwr region */
851 always_run++;
852 }
853
854 bool eod_prefix = false;
855
856 const LeftNfaInfo *left = getLeftTable(t);
857 for (u32 i = 0; i < t->activeLeftCount; i++) {
858 if (left->eod_check) {
859 eod_prefix = true;
860 break;
861 }
862 }
863
864 if (eod_prefix) {
865 always_run++;
866 DEBUG_PRINTF("eod prefixes are slow");
867 return 0;
868 }
869
870 if (always_run > 1) {
871 DEBUG_PRINTF("we always run %u engines\n", always_run);
872 return 0;
873 }
874
875 return 1;
876}
877
878u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
879 const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
880 assert(!lit_vertices.empty());
881
882 u32 min_offset = UINT32_MAX;
883 for (const auto &v : lit_vertices) {
884 min_offset = min(min_offset, build.g[v].min_offset);
885 }
886
887 return min_offset;
888}
889
890u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) {
891 const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
892 assert(!lit_vertices.empty());
893
894 u32 max_offset = 0;
895 for (const auto &v : lit_vertices) {
896 max_offset = max(max_offset, build.g[v].max_offset);
897 }
898
899 return max_offset;
900}
901
902bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) {
903 const auto &g = build.g;
904 const auto v = target(e, g);
905
906 if (!build.g[v].eod_accept) {
907 return false;
908 }
909
910 // If there's a graph between us and EOD, we shouldn't be eager.
911 if (build.g[v].left) {
912 return false;
913 }
914
915 // Must be exactly at EOD.
916 if (g[e].minBound != 0 || g[e].maxBound != 0) {
917 return false;
918 }
919
920 // In streaming mode, we can only eagerly report EOD for literals in the
921 // EOD-anchored table, as that's the only time we actually know where EOD
922 // is. In block mode, we always have this information.
923 const auto u = source(e, g);
924 if (build.cc.streaming && !build.isInETable(u)) {
925 return false;
926 }
927
928 return true;
929}
930
931#ifndef NDEBUG
932/** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose
933 * graph are implementable. */
934bool canImplementGraphs(const RoseBuildImpl &tbi) {
935 const RoseGraph &g = tbi.g;
936
937 // First, check the Rose leftfixes.
938
939 for (auto v : vertices_range(g)) {
940 DEBUG_PRINTF("leftfix: check vertex %zu\n", g[v].index);
941
942 if (g[v].left.castle) {
943 DEBUG_PRINTF("castle ok\n");
944 continue;
945 }
946 if (g[v].left.dfa) {
947 DEBUG_PRINTF("dfa ok\n");
948 continue;
949 }
950 if (g[v].left.haig) {
951 DEBUG_PRINTF("haig ok\n");
952 continue;
953 }
954 if (g[v].left.graph) {
955 assert(g[v].left.graph->kind
956 == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX));
957 if (!isImplementableNFA(*g[v].left.graph, nullptr, tbi.cc)) {
958 DEBUG_PRINTF("nfa prefix %zu failed (%zu vertices)\n",
959 g[v].index, num_vertices(*g[v].left.graph));
960 return false;
961 }
962 }
963 }
964
965 // Suffix graphs.
966
967 for (auto v : vertices_range(g)) {
968 DEBUG_PRINTF("suffix: check vertex %zu\n", g[v].index);
969
970 const RoseSuffixInfo &suffix = g[v].suffix;
971 if (suffix.castle) {
972 DEBUG_PRINTF("castle suffix ok\n");
973 continue;
974 }
975 if (suffix.rdfa) {
976 DEBUG_PRINTF("dfa suffix ok\n");
977 continue;
978 }
979 if (suffix.haig) {
980 DEBUG_PRINTF("haig suffix ok\n");
981 continue;
982 }
983 if (suffix.graph) {
984 assert(suffix.graph->kind == NFA_SUFFIX);
985 if (!isImplementableNFA(*suffix.graph, &tbi.rm, tbi.cc)) {
986 DEBUG_PRINTF("nfa suffix %zu failed (%zu vertices)\n",
987 g[v].index, num_vertices(*suffix.graph));
988 return false;
989 }
990 }
991 }
992
993 return true;
994}
995
996/**
997 * \brief True if there is an engine with a top that is not triggered by a
998 * vertex in the Rose graph. This is a consistency check used in assertions.
999 */
1000bool hasOrphanedTops(const RoseBuildImpl &build) {
1001 const RoseGraph &g = build.g;
1002
1003 unordered_map<left_id, set<u32>> leftfixes;
1004 unordered_map<suffix_id, set<u32>> suffixes;
1005
1006 for (auto v : vertices_range(g)) {
1007 if (g[v].left) {
1008 set<u32> &tops = leftfixes[g[v].left];
1009 if (!build.isRootSuccessor(v)) {
1010 // Tops for infixes come from the in-edges.
1011 for (const auto &e : in_edges_range(v, g)) {
1012 tops.insert(g[e].rose_top);
1013 }
1014 }
1015 }
1016 if (g[v].suffix) {
1017 suffixes[g[v].suffix].insert(g[v].suffix.top);
1018 }
1019 }
1020
1021 for (const auto &e : leftfixes) {
1022 if (all_tops(e.first) != e.second) {
1023 DEBUG_PRINTF("rose tops (%s) don't match rose graph (%s)\n",
1024 as_string_list(all_tops(e.first)).c_str(),
1025 as_string_list(e.second).c_str());
1026 return true;
1027 }
1028 }
1029
1030 for (const auto &e : suffixes) {
1031 if (all_tops(e.first) != e.second) {
1032 DEBUG_PRINTF("suffix tops (%s) don't match rose graph (%s)\n",
1033 as_string_list(all_tops(e.first)).c_str(),
1034 as_string_list(e.second).c_str());
1035 return true;
1036 }
1037 }
1038
1039 return false;
1040}
1041
1042#endif // NDEBUG
1043
1044} // namespace ue2
1045