1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * \file
31 * \brief Limex NFA construction code.
32 */
33
34#include "ng_limex.h"
35
36#include "grey.h"
37#include "ng_equivalence.h"
38#include "ng_holder.h"
39#include "ng_misc_opt.h"
40#include "ng_prune.h"
41#include "ng_redundancy.h"
42#include "ng_repeat.h"
43#include "ng_reports.h"
44#include "ng_restructuring.h"
45#include "ng_squash.h"
46#include "ng_util.h"
47#include "ng_width.h"
48#include "ue2common.h"
49#include "nfa/limex_compile.h"
50#include "nfa/limex_limits.h"
51#include "nfa/nfa_internal.h"
52#include "util/compile_context.h"
53#include "util/container.h"
54#include "util/graph_range.h"
55#include "util/report_manager.h"
56#include "util/flat_containers.h"
57#include "util/verify_types.h"
58
59#include <algorithm>
60#include <map>
61#include <unordered_map>
62#include <unordered_set>
63#include <vector>
64
65#include <boost/range/adaptor/map.hpp>
66
67using namespace std;
68using boost::adaptors::map_values;
69using boost::adaptors::map_keys;
70
71namespace ue2 {
72
73#ifndef NDEBUG
74// Some sanity checking for the graph; returns false if something is wrong.
75// Only used in assertions.
76static
77bool sanityCheckGraph(const NGHolder &g,
78 const unordered_map<NFAVertex, u32> &state_ids) {
79 unordered_set<u32> seen_states;
80
81 for (auto v : vertices_range(g)) {
82 // Non-specials should have non-empty reachability.
83 if (!is_special(v, g)) {
84 if (g[v].char_reach.none()) {
85 DEBUG_PRINTF("vertex %zu has empty reach\n", g[v].index);
86 return false;
87 }
88 }
89
90 // Vertices with edges to accept or acceptEod must have reports and
91 // other vertices must not have them.
92 if (is_match_vertex(v, g) && v != g.accept) {
93 if (g[v].reports.empty()) {
94 DEBUG_PRINTF("vertex %zu has no reports\n", g[v].index);
95 return false;
96 }
97 } else if (!g[v].reports.empty()) {
98 DEBUG_PRINTF("vertex %zu has reports but no accept edge\n",
99 g[v].index);
100 return false;
101 }
102
103 // Participant vertices should have distinct state indices.
104 if (!contains(state_ids, v)) {
105 DEBUG_PRINTF("vertex %zu has no state index!\n", g[v].index);
106 return false;
107 }
108 u32 s = state_ids.at(v);
109 if (s != NO_STATE && !seen_states.insert(s).second) {
110 DEBUG_PRINTF("vertex %zu has dupe state %u\n", g[v].index, s);
111 return false;
112 }
113 }
114
115 return true;
116}
117#endif
118
119static
120unordered_map<NFAVertex, NFAStateSet> findSquashStates(const NGHolder &g,
121 const vector<BoundedRepeatData> &repeats) {
122 auto squashMap = findSquashers(g);
123 filterSquashers(g, squashMap);
124
125 /* We also filter out the cyclic states representing bounded repeats, as
126 * they are not really cyclic -- they may turn off unexpectedly. */
127 for (const auto &br : repeats) {
128 if (br.repeatMax.is_finite()) {
129 squashMap.erase(br.cyclic);
130 }
131 }
132
133 return squashMap;
134}
135
136/**
137 * \brief Drop edges from start to vertices that also have an edge from
138 * startDs.
139 *
140 * Note that this also includes the (start, startDs) edge, which is not
141 * necessary for actual NFA implementation (and is actually something we don't
142 * want to affect state numbering, etc).
143 */
144static
145void dropRedundantStartEdges(NGHolder &g) {
146 remove_out_edge_if(g.start, [&](const NFAEdge &e) {
147 return edge(g.startDs, target(e, g), g).second;
148 }, g);
149
150 // Ensure that we always remove (start, startDs), even if startDs has had
151 // its self-loop removed as an optimization.
152 remove_edge(g.start, g.startDs, g);
153}
154
155static
156CharReach calcTopVertexReach(const flat_set<u32> &tops,
157 const map<u32, CharReach> &top_reach) {
158 CharReach top_cr;
159 for (u32 t : tops) {
160 if (contains(top_reach, t)) {
161 top_cr |= top_reach.at(t);
162 } else {
163 top_cr = CharReach::dot();
164 break;
165 }
166 }
167 return top_cr;
168}
169
170static
171NFAVertex makeTopStartVertex(NGHolder &g, const flat_set<u32> &tops,
172 const flat_set<NFAVertex> &succs,
173 const map<u32, CharReach> &top_reach) {
174 assert(!succs.empty());
175 assert(!tops.empty());
176
177 bool reporter = false;
178
179 NFAVertex u = add_vertex(g[g.start], g);
180 CharReach top_cr = calcTopVertexReach(tops, top_reach);
181 g[u].char_reach = top_cr;
182
183 for (auto v : succs) {
184 if (v == g.accept || v == g.acceptEod) {
185 reporter = true;
186 }
187 add_edge(u, v, g);
188 }
189
190 // Only retain reports (which we copied on add_vertex above) for new top
191 // vertices connected to accepts.
192 if (!reporter) {
193 g[u].reports.clear();
194 }
195
196 return u;
197}
198
199static
200void pickNextTopStateToHandle(const map<u32, flat_set<NFAVertex>> &top_succs,
201 const map<NFAVertex, flat_set<u32>> &succ_tops,
202 flat_set<u32> *picked_tops,
203 flat_set<NFAVertex> *picked_succs) {
204 /* pick top or vertex we want to handle */
205 if (top_succs.size() < succ_tops.size()) {
206 auto best = top_succs.end();
207 for (auto it = top_succs.begin(); it != top_succs.end(); ++it) {
208 if (best == top_succs.end()
209 || it->second.size() < best->second.size()) {
210 best = it;
211 }
212 }
213 assert(best != top_succs.end());
214 assert(!best->second.empty()); /* should already been pruned */
215
216 *picked_tops = { best->first };
217 *picked_succs = best->second;
218 } else {
219 auto best = succ_tops.end();
220 for (auto it = succ_tops.begin(); it != succ_tops.end(); ++it) {
221 /* have to worry about determinism for this one */
222 if (best == succ_tops.end()
223 || it->second.size() < best->second.size()
224 || (it->second.size() == best->second.size()
225 && it->second < best->second)) {
226 best = it;
227 }
228 }
229 assert(best != succ_tops.end());
230 assert(!best->second.empty()); /* should already been pruned */
231
232 *picked_succs = { best->first };
233 *picked_tops = best->second;
234 }
235}
236
237static
238void expandCbsByTops(const map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
239 const map<u32, flat_set<NFAVertex>> &top_succs,
240 const map<NFAVertex, flat_set<u32>> &succ_tops,
241 flat_set<u32> &picked_tops,
242 flat_set<NFAVertex> &picked_succs) {
243 NFAVertex v = *picked_succs.begin(); /* arbitrary successor - all equiv */
244 const auto &cand_tops = succ_tops.at(v);
245
246 for (u32 t : cand_tops) {
247 if (!contains(unhandled_top_succs, t)) {
248 continue;
249 }
250 if (!has_intersection(unhandled_top_succs.at(t), picked_succs)) {
251 continue; /* not adding any useful work that hasn't already been
252 * done */
253 }
254 if (!is_subset_of(picked_succs, top_succs.at(t))) {
255 continue; /* will not form a cbs */
256 }
257 picked_tops.insert(t);
258 }
259}
260
261static
262void expandCbsBySuccs(const map<NFAVertex, flat_set<u32>> &unhandled_succ_tops,
263 const map<u32, flat_set<NFAVertex>> &top_succs,
264 const map<NFAVertex, flat_set<u32>> &succ_tops,
265 flat_set<u32> &picked_tops,
266 flat_set<NFAVertex> &picked_succs) {
267 u32 t = *picked_tops.begin(); /* arbitrary top - all equiv */
268 const auto &cand_succs = top_succs.at(t);
269
270 for (NFAVertex v : cand_succs) {
271 if (!contains(unhandled_succ_tops, v)) {
272 continue;
273 }
274 if (!has_intersection(unhandled_succ_tops.at(v), picked_tops)) {
275 continue; /* not adding any useful work that hasn't already been
276 * done */
277 }
278 if (!is_subset_of(picked_tops, succ_tops.at(v))) {
279 continue; /* will not form a cbs */
280 }
281 picked_succs.insert(v);
282 }
283}
284
285/* See if we can expand the complete bipartite subgraph (cbs) specified by the
286 * picked tops/succs by adding more to either of the tops or succs.
287 */
288static
289void expandTopSuccCbs(const map<u32, flat_set<NFAVertex>> &top_succs,
290 const map<NFAVertex, flat_set<u32>> &succ_tops,
291 const map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
292 const map<NFAVertex, flat_set<u32>> &unhandled_succ_tops,
293 flat_set<u32> &picked_tops,
294 flat_set<NFAVertex> &picked_succs) {
295 /* Note: all picked (tops|succs) are equivalent */
296
297 /* Try to expand first (as we are more likely to succeed) on the side
298 * with fewest remaining things to be handled */
299
300 if (unhandled_top_succs.size() < unhandled_succ_tops.size()) {
301 expandCbsByTops(unhandled_top_succs, top_succs, succ_tops,
302 picked_tops, picked_succs);
303 expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops,
304 picked_tops, picked_succs);
305 } else {
306 expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops,
307 picked_tops, picked_succs);
308 expandCbsByTops(unhandled_top_succs, top_succs, succ_tops,
309 picked_tops, picked_succs);
310 }
311}
312
313static
314void markTopSuccAsHandled(NFAVertex start_v,
315 const flat_set<u32> &handled_tops,
316 const flat_set<NFAVertex> &handled_succs,
317 map<u32, set<NFAVertex>> &tops_out,
318 map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
319 map<NFAVertex, flat_set<u32>> &unhandled_succ_tops) {
320 for (u32 t : handled_tops) {
321 tops_out[t].insert(start_v);
322 assert(contains(unhandled_top_succs, t));
323 erase_all(&unhandled_top_succs[t], handled_succs);
324 if (unhandled_top_succs[t].empty()) {
325 unhandled_top_succs.erase(t);
326 }
327 }
328
329 for (NFAVertex v : handled_succs) {
330 assert(contains(unhandled_succ_tops, v));
331 erase_all(&unhandled_succ_tops[v], handled_tops);
332 if (unhandled_succ_tops[v].empty()) {
333 unhandled_succ_tops.erase(v);
334 }
335 }
336}
337
338static
339void attemptToUseAsStart(const NGHolder &g, NFAVertex u,
340 const map<u32, CharReach> &top_reach,
341 map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
342 map<NFAVertex, flat_set<u32>> &unhandled_succ_tops,
343 map<u32, set<NFAVertex>> &tops_out) {
344 flat_set<u32> top_inter = unhandled_succ_tops.at(u);
345 flat_set<NFAVertex> succs;
346 for (NFAVertex v : adjacent_vertices_range(u, g)) {
347 if (!contains(unhandled_succ_tops, v)) {
348 return;
349 }
350 /* if it has vacuous reports we need to make sure that the report sets
351 * are the same */
352 if ((v == g.accept || v == g.acceptEod)
353 && g[g.start].reports != g[u].reports) {
354 DEBUG_PRINTF("different report behaviour\n");
355 return;
356 }
357 const flat_set<u32> &v_tops = unhandled_succ_tops.at(v);
358 flat_set<u32> new_inter;
359 auto ni_inserter = inserter(new_inter, new_inter.end());
360 set_intersection(top_inter.begin(), top_inter.end(),
361 v_tops.begin(), v_tops.end(), ni_inserter);
362 top_inter = std::move(new_inter);
363 succs.insert(v);
364 }
365
366 if (top_inter.empty()) {
367 return;
368 }
369
370 auto top_cr = calcTopVertexReach(top_inter, top_reach);
371 if (!top_cr.isSubsetOf(g[u].char_reach)) {
372 return;
373 }
374
375 DEBUG_PRINTF("reusing %zu is a start vertex\n", g[u].index);
376 markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs,
377 unhandled_succ_tops);
378}
379
380/* We may have cases where a top triggers something that starts with a .* (or
381 * similar state). In these cases we can make use of that state as a start
382 * state.
383 */
384static
385void reusePredsAsStarts(const NGHolder &g, const map<u32, CharReach> &top_reach,
386 map<u32, flat_set<NFAVertex>> &unhandled_top_succs,
387 map<NFAVertex, flat_set<u32>> &unhandled_succ_tops,
388 map<u32, set<NFAVertex>> &tops_out) {
389 /* create list of candidates first, to avoid issues of iter invalidation */
390 DEBUG_PRINTF("attempting to reuse vertices for top starts\n");
391 vector<NFAVertex> cand_starts;
392 for (NFAVertex u : unhandled_succ_tops | map_keys) {
393 if (hasSelfLoop(u, g)) {
394 cand_starts.push_back(u);
395 }
396 }
397
398 for (NFAVertex u : cand_starts) {
399 if (!contains(unhandled_succ_tops, u)) {
400 continue;
401 }
402 attemptToUseAsStart(g, u, top_reach, unhandled_top_succs,
403 unhandled_succ_tops, tops_out);
404 }
405}
406
407static
408void makeTopStates(NGHolder &g, map<u32, set<NFAVertex>> &tops_out,
409 const map<u32, CharReach> &top_reach) {
410 /* Ideally, we want to add the smallest number of states to the graph for
411 * tops to turn on so that they can accurately trigger their successors.
412 *
413 * The relationships between tops and their successors forms a bipartite
414 * graph. Finding the optimal number of start states to add is equivalent to
415 * finding a minimal biclique coverings. Unfortunately, this is known to be
416 * NP-complete.
417 *
418 * Given this, we will just do something simple to avoid creating something
419 * truly wasteful:
420 * 1) Try to find any cyclic states which can act as their own start states
421 * 2) Pick a top or a succ to create a start state for and then try to find
422 * the largest complete bipartite subgraph that it is part of.
423 */
424
425 map<u32, flat_set<NFAVertex>> top_succs;
426 map<NFAVertex, flat_set<u32>> succ_tops;
427 for (const auto &e : out_edges_range(g.start, g)) {
428 NFAVertex v = target(e, g);
429 for (u32 t : g[e].tops) {
430 top_succs[t].insert(v);
431 succ_tops[v].insert(t);
432 }
433 }
434
435 auto unhandled_top_succs = top_succs;
436 auto unhandled_succ_tops = succ_tops;
437
438 reusePredsAsStarts(g, top_reach, unhandled_top_succs, unhandled_succ_tops,
439 tops_out);
440
441 /* Note: there may be successors which are equivalent (in terms of
442 top-triggering), it may be more efficient to discover this and treat them
443 as a unit. TODO */
444
445 while (!unhandled_succ_tops.empty()) {
446 assert(!unhandled_top_succs.empty());
447 DEBUG_PRINTF("creating top start vertex\n");
448 flat_set<u32> u_tops;
449 flat_set<NFAVertex> u_succs;
450 pickNextTopStateToHandle(unhandled_top_succs, unhandled_succ_tops,
451 &u_tops, &u_succs);
452
453 expandTopSuccCbs(top_succs, succ_tops, unhandled_top_succs,
454 unhandled_succ_tops, u_tops, u_succs);
455
456 /* create start vertex to handle this top/succ combination */
457 NFAVertex u = makeTopStartVertex(g, u_tops, u_succs, top_reach);
458
459 /* update maps */
460 markTopSuccAsHandled(u, u_tops, u_succs, tops_out, unhandled_top_succs,
461 unhandled_succ_tops);
462 }
463 assert(unhandled_top_succs.empty());
464
465 // We are completely replacing the start vertex, so clear its reports.
466 clear_out_edges(g.start, g);
467 add_edge(g.start, g.startDs, g);
468 g[g.start].reports.clear();
469}
470
471static
472set<NFAVertex> findZombies(const NGHolder &h,
473 const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
474 const unordered_map<NFAVertex, u32> &state_ids,
475 const CompileContext &cc) {
476 set<NFAVertex> zombies;
477 if (!cc.grey.allowZombies) {
478 return zombies;
479 }
480
481 // We only use zombie masks in streaming mode.
482 if (!cc.streaming) {
483 return zombies;
484 }
485
486 if (in_degree(h.acceptEod, h) != 1 || all_reports(h).size() != 1) {
487 DEBUG_PRINTF("cannot be made undead - bad reports\n");
488 return zombies;
489 }
490
491 for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
492 assert(h[u].reports.size() == 1);
493 for (auto v : adjacent_vertices_range(u, h)) {
494 if (edge(v, h.accept, h).second
495 && h[v].char_reach.all()) {
496 if (!contains(br_cyclic, v)) {
497 goto ok;
498 }
499
500 const BoundedRepeatSummary &sum = br_cyclic.at(v);
501
502 if (u == v && sum.repeatMax.is_infinite()) {
503 goto ok;
504 }
505
506 }
507 }
508 DEBUG_PRINTF("does not go to dot accept\n");
509 return zombies;
510 ok:;
511 }
512
513 for (const auto &v : inv_adjacent_vertices_range(h.accept, h)) {
514 if (state_ids.at(v) != NO_STATE) {
515 zombies.insert(v);
516 }
517 }
518 return zombies;
519}
520
521static
522void reverseStateOrdering(unordered_map<NFAVertex, u32> &state_ids) {
523 vector<NFAVertex> ordering;
524 for (auto &e : state_ids) {
525 if (e.second == NO_STATE) {
526 continue;
527 }
528 ordering.push_back(e.first);
529 }
530
531 // Sort in reverse order by state ID.
532 sort(ordering.begin(), ordering.end(),
533 [&state_ids](NFAVertex a, NFAVertex b) {
534 return state_ids.at(a) > state_ids.at(b);
535 });
536
537 u32 stateNum = 0;
538
539 for (const auto &v : ordering) {
540 DEBUG_PRINTF("renumber, %u -> %u\n", state_ids.at(v), stateNum);
541 state_ids[v] = stateNum++;
542 }
543}
544
545static
546map<u32, CharReach>
547findTopReach(const map<u32, vector<vector<CharReach>>> &triggers) {
548 map<u32, CharReach> top_reach;
549
550 for (const auto &m : triggers) {
551 const auto top = m.first;
552 CharReach cr;
553 for (const auto &trigger : m.second) {
554 if (trigger.empty()) {
555 // We don't know anything about this trigger. Assume it can
556 // have any reach.
557 cr.setall();
558 break;
559 }
560 cr |= *trigger.rbegin();
561 }
562
563 top_reach.emplace(top, cr);
564 }
565
566 return top_reach;
567}
568
569static
570unique_ptr<NGHolder>
571prepareGraph(const NGHolder &h_in, const ReportManager *rm,
572 const map<u32, u32> &fixed_depth_tops,
573 const map<u32, vector<vector<CharReach>>> &triggers,
574 bool impl_test_only, const CompileContext &cc,
575 unordered_map<NFAVertex, u32> &state_ids,
576 vector<BoundedRepeatData> &repeats,
577 map<u32, set<NFAVertex>> &tops) {
578 assert(is_triggered(h_in) || fixed_depth_tops.empty());
579
580 unique_ptr<NGHolder> h = cloneHolder(h_in);
581
582 // Bounded repeat handling.
583 analyseRepeats(*h, rm, fixed_depth_tops, triggers, &repeats, cc.streaming,
584 impl_test_only, cc.grey);
585
586 // If we're building a rose/suffix, do the top dance.
587 flat_set<NFAVertex> topVerts;
588 if (is_triggered(*h)) {
589 makeTopStates(*h, tops, findTopReach(triggers));
590
591 for (const auto &vv : tops | map_values) {
592 insert(&topVerts, vv);
593 }
594 }
595
596 dropRedundantStartEdges(*h);
597
598 // Do state numbering
599 state_ids = numberStates(*h, topVerts);
600
601 // In debugging, we sometimes like to reverse the state numbering to stress
602 // the NFA construction code.
603 if (cc.grey.numberNFAStatesWrong) {
604 reverseStateOrdering(state_ids);
605 }
606
607 assert(sanityCheckGraph(*h, state_ids));
608 return h;
609}
610
611static
612void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) {
613 for (const auto &v : vertices_range(h)) {
614 auto &reports = h[v].reports;
615 if (reports.empty()) {
616 continue;
617 }
618 auto old_reports = reports;
619 reports.clear();
620 for (const ReportID &id : old_reports) {
621 u32 program = rm.getProgramOffset(id);
622 reports.insert(program);
623 }
624 DEBUG_PRINTF("vertex %zu: remapped reports {%s} to programs {%s}\n",
625 h[v].index, as_string_list(old_reports).c_str(),
626 as_string_list(reports).c_str());
627 }
628}
629
630static
631bytecode_ptr<NFA>
632constructNFA(const NGHolder &h_in, const ReportManager *rm,
633 const map<u32, u32> &fixed_depth_tops,
634 const map<u32, vector<vector<CharReach>>> &triggers,
635 bool compress_state, bool do_accel, bool impl_test_only, u32 hint,
636 const CompileContext &cc) {
637 if (!has_managed_reports(h_in)) {
638 rm = nullptr;
639 } else {
640 assert(rm);
641 }
642
643 unordered_map<NFAVertex, u32> state_ids;
644 vector<BoundedRepeatData> repeats;
645 map<u32, set<NFAVertex>> tops;
646 unique_ptr<NGHolder> h
647 = prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc,
648 state_ids, repeats, tops);
649
650 // Quick exit: if we've got an embarrassment of riches, i.e. more states
651 // than we can implement in our largest NFA model, bail here.
652 u32 numStates = countStates(state_ids);
653 if (numStates > NFA_MAX_STATES) {
654 DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
655 return nullptr;
656 }
657
658 map<NFAVertex, BoundedRepeatSummary> br_cyclic;
659 for (const auto &br : repeats) {
660 br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax);
661 }
662
663 unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
664 unordered_map<NFAVertex, NFAStateSet> squashMap;
665
666 // build map of squashed and squashers
667 if (cc.grey.squashNFA) {
668 squashMap = findSquashStates(*h, repeats);
669
670 if (rm && cc.grey.highlanderSquash) {
671 reportSquashMap = findHighlanderSquashers(*h, *rm);
672 }
673 }
674
675 set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc);
676
677 if (has_managed_reports(*h)) {
678 assert(rm);
679 remapReportsToPrograms(*h, *rm);
680 }
681
682 if (!cc.streaming || !cc.grey.compressNFAState) {
683 compress_state = false;
684 }
685
686 return generate(*h, state_ids, repeats, reportSquashMap, squashMap, tops,
687 zombies, do_accel, compress_state, hint, cc);
688}
689
690bytecode_ptr<NFA>
691constructNFA(const NGHolder &h_in, const ReportManager *rm,
692 const map<u32, u32> &fixed_depth_tops,
693 const map<u32, vector<vector<CharReach>>> &triggers,
694 bool compress_state, const CompileContext &cc) {
695 const u32 hint = INVALID_NFA;
696 const bool do_accel = cc.grey.accelerateNFA;
697 const bool impl_test_only = false;
698 return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
699 do_accel, impl_test_only, hint, cc);
700}
701
702#ifndef RELEASE_BUILD
703// Variant that allows a hint to be specified.
704bytecode_ptr<NFA>
705constructNFA(const NGHolder &h_in, const ReportManager *rm,
706 const map<u32, u32> &fixed_depth_tops,
707 const map<u32, vector<vector<CharReach>>> &triggers,
708 bool compress_state, u32 hint, const CompileContext &cc) {
709 const bool do_accel = cc.grey.accelerateNFA;
710 const bool impl_test_only = false;
711 return constructNFA(h_in, rm, fixed_depth_tops, triggers,
712 compress_state, do_accel, impl_test_only, hint, cc);
713}
714#endif // RELEASE_BUILD
715
716static
717bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
718 const CompileContext &cc) {
719 // Make a mutable copy of the graph that we can renumber etc.
720 NGHolder h;
721 cloneHolder(h, h_in);
722 assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */
723
724 // Do state numbering.
725 auto state_ids = numberStates(h, {});
726
727 // Quick exit: if we've got an embarrassment of riches, i.e. more states
728 // than we can implement in our largest NFA model, bail here.
729 u32 numStates = countStates(state_ids);
730 if (numStates > NFA_MAX_STATES) {
731 DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
732 return nullptr;
733 }
734
735 assert(sanityCheckGraph(h, state_ids));
736
737 map<u32, set<NFAVertex>> tops; /* only the standards tops for nfas */
738 set<NFAVertex> zombies;
739 vector<BoundedRepeatData> repeats;
740 unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
741 unordered_map<NFAVertex, NFAStateSet> squashMap;
742
743 return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
744 zombies, false, false, hint, cc);
745}
746
747bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in,
748 const CompileContext &cc) {
749 u32 hint = INVALID_NFA; // no hint
750 return constructReversedNFA_i(h_in, hint, cc);
751}
752
753#ifndef RELEASE_BUILD
754// Variant that allows a hint to be specified.
755bytecode_ptr<NFA> constructReversedNFA(const NGHolder &h_in, u32 hint,
756 const CompileContext &cc) {
757 return constructReversedNFA_i(h_in, hint, cc);
758}
759#endif // RELEASE_BUILD
760
761u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
762 const CompileContext &cc) {
763 if (!cc.grey.allowLimExNFA) {
764 return false;
765 }
766
767 assert(!can_never_match(g));
768
769 // Quick check: we can always implement an NFA with less than NFA_MAX_STATES
770 // states. Note that top masks can generate extra states, so we account for
771 // those here too.
772 if (num_vertices(g) + getTops(g).size() < NFA_MAX_STATES) {
773 return true;
774 }
775
776 if (!has_managed_reports(g)) {
777 rm = nullptr;
778 } else {
779 assert(rm);
780 }
781
782 // The BEST way to tell if an NFA is implementable is to implement it!
783 const bool impl_test_only = true;
784 const map<u32, u32> fixed_depth_tops; // empty
785 const map<u32, vector<vector<CharReach>>> triggers; // empty
786
787 /* Perform the first part of the construction process and see if the
788 * resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can
789 * implement it as an NFA. */
790
791 unordered_map<NFAVertex, u32> state_ids;
792 vector<BoundedRepeatData> repeats;
793 map<u32, set<NFAVertex>> tops;
794 unique_ptr<NGHolder> h
795 = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
796 state_ids, repeats, tops);
797 assert(h);
798 u32 numStates = countStates(state_ids);
799 if (numStates <= NFA_MAX_STATES) {
800 return numStates;
801 }
802
803 return 0;
804}
805
806void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm,
807 const CompileContext &cc) {
808 NGHolder g_pristine;
809 cloneHolder(g_pristine, g);
810
811 reduceGraphEquivalences(g, cc);
812
813 removeRedundancy(g, som);
814
815 if (rm && has_managed_reports(g)) {
816 pruneHighlanderDominated(g, *rm);
817 }
818
819 if (!isImplementableNFA(g, rm, cc)) {
820 DEBUG_PRINTF("reductions made graph unimplementable, roll back\n");
821 clear_graph(g);
822 cloneHolder(g, g_pristine);
823 }
824}
825
826u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
827 const CompileContext &cc) {
828 if (!has_managed_reports(g)) {
829 rm = nullptr;
830 } else {
831 assert(rm);
832 }
833
834 const bool impl_test_only = true;
835 const map<u32, u32> fixed_depth_tops; // empty
836 const map<u32, vector<vector<CharReach>>> triggers; // empty
837
838 unordered_map<NFAVertex, u32> state_ids;
839 vector<BoundedRepeatData> repeats;
840 map<u32, set<NFAVertex>> tops;
841 unique_ptr<NGHolder> h
842 = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
843 state_ids, repeats, tops);
844
845 if (!h || countStates(state_ids) > NFA_MAX_STATES) {
846 DEBUG_PRINTF("not constructible\n");
847 return NFA_MAX_ACCEL_STATES + 1;
848 }
849
850 assert(h->kind == g.kind);
851
852 // Should have no bearing on accel calculation, so we leave these empty.
853 const set<NFAVertex> zombies;
854 unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
855 unordered_map<NFAVertex, NFAStateSet> squashMap;
856
857 return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap,
858 tops, zombies, cc);
859}
860
861} // namespace ue2
862