1/*
2 * Copyright (c) 2015-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Build code for Haig SOM DFA.
31 */
32#include "ng_haig.h"
33
34#include "grey.h"
35#include "nfa/goughcompile.h"
36#include "ng_holder.h"
37#include "ng_mcclellan_internal.h"
38#include "ng_som_util.h"
39#include "ng_squash.h"
40#include "util/bitfield.h"
41#include "util/container.h"
42#include "util/determinise.h"
43#include "util/flat_containers.h"
44#include "util/graph.h"
45#include "util/graph_range.h"
46#include "util/hash_dynamic_bitset.h"
47#include "util/make_unique.h"
48#include "util/unordered.h"
49
50#include <algorithm>
51#include <functional>
52#include <map>
53#include <set>
54#include <vector>
55#include <boost/dynamic_bitset.hpp>
56
57using namespace std;
58using boost::dynamic_bitset;
59
60namespace ue2 {
61
62#define NFA_STATE_LIMIT 256
63
64#define HAIG_MAX_NFA_STATE 600
65#define HAIG_MAX_LIVE_SOM_SLOTS 32
66
67namespace {
68struct haig_too_wide {
69};
70
71template<typename stateset>
72static
73void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused,
74 stateset *init, stateset *initDS,
75 vector<NFAVertex> *v_by_index) {
76 DEBUG_PRINTF("graph kind: %s\n", to_string(g.kind).c_str());
77 for (auto v : vertices_range(g)) {
78 if (contains(unused, v)) {
79 continue;
80 }
81 u32 v_index = g[v].index;
82 if (is_any_start(v, g)) {
83 init->set(v_index);
84 if (hasSelfLoop(v, g) || is_triggered(g)) {
85 DEBUG_PRINTF("setting %u\n", v_index);
86 initDS->set(v_index);
87 }
88 }
89 assert(v_index < init->size());
90 }
91
92 v_by_index->clear();
93 v_by_index->resize(num_vertices(g), NGHolder::null_vertex());
94
95 for (auto v : vertices_range(g)) {
96 u32 v_index = g[v].index;
97 assert((*v_by_index)[v_index] == NGHolder::null_vertex());
98 (*v_by_index)[v_index] = v;
99 }
100}
101
102template<typename StateSet>
103void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) {
104 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
105 accept->set(g[v].index);
106 }
107 for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
108 if (v == g.accept) {
109 continue;
110 }
111 acceptEod->set(g[v].index);
112 }
113}
114
115template<typename Automaton_Traits>
116class Automaton_Base {
117public:
118 using StateSet = typename Automaton_Traits::StateSet;
119 using StateMap = typename Automaton_Traits::StateMap;
120
121protected:
122 Automaton_Base(const NGHolder &graph_in, som_type som,
123 const vector<vector<CharReach>> &triggers,
124 bool unordered_som)
125 : graph(graph_in), numStates(num_vertices(graph)),
126 unused(getRedundantStarts(graph_in)),
127 init(Automaton_Traits::init_states(numStates)),
128 initDS(Automaton_Traits::init_states(numStates)),
129 squash(Automaton_Traits::init_states(numStates)),
130 accept(Automaton_Traits::init_states(numStates)),
131 acceptEod(Automaton_Traits::init_states(numStates)),
132 toppable(Automaton_Traits::init_states(numStates)),
133 dead(Automaton_Traits::init_states(numStates)) {
134 calculateAlphabet(graph, alpha, unalpha, &alphasize);
135 assert(alphasize <= ALPHABET_SIZE);
136
137 populateInit(graph, unused, &init, &initDS, &v_by_index);
138 populateAccepts(graph, &accept, &acceptEod);
139
140 start_anchored = DEAD_STATE + 1;
141 if (initDS == init) {
142 start_floating = start_anchored;
143 } else if (initDS.any()) {
144 start_floating = start_anchored + 1;
145 } else {
146 start_floating = DEAD_STATE;
147 }
148
149 cr_by_index = populateCR(graph, v_by_index, alpha);
150
151 if (!unordered_som) {
152 for (const auto &sq : findSquashers(graph, som)) {
153 NFAVertex v = sq.first;
154 u32 vert_id = graph[v].index;
155 squash.set(vert_id);
156 squash_mask[vert_id] = shrinkStateSet(sq.second);
157 }
158 }
159
160 if (is_triggered(graph)) {
161 dynamic_bitset<> temp(numStates);
162 markToppableStarts(graph, unused, false, triggers, &temp);
163 toppable = Automaton_Traits::copy_states(temp, numStates);
164 }
165 }
166
167private:
168 // Convert an NFAStateSet (as used by the squash code) into a StateSet.
169 StateSet shrinkStateSet(const NFAStateSet &in) const {
170 StateSet out = Automaton_Traits::init_states(numStates);
171 for (size_t i = in.find_first(); i != in.npos && i < out.size();
172 i = in.find_next(i)) {
173 out.set(i);
174 }
175 return out;
176 }
177
178 void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) {
179 StateSet acc = in & (eod ? acceptEod : accept);
180 for (size_t i = acc.find_first(); i != StateSet::npos;
181 i = acc.find_next(i)) {
182 NFAVertex v = v_by_index[i];
183 DEBUG_PRINTF("marking report\n");
184 const auto &my_reports = graph[v].reports;
185 rv.insert(my_reports.begin(), my_reports.end());
186 }
187 }
188
189public:
190 void transition(const StateSet &in, StateSet *next) {
191 transition_graph(*this, v_by_index, in, next);
192 }
193
194 const vector<StateSet> initial() {
195 vector<StateSet> rv = {init};
196 if (start_floating != DEAD_STATE && start_floating != start_anchored) {
197 rv.push_back(initDS);
198 }
199 return rv;
200 }
201
202 void reports(const StateSet &in, flat_set<ReportID> &rv) {
203 reports_i(in, false, rv);
204 }
205
206 void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
207 reports_i(in, true, rv);
208 }
209
210 static bool canPrune(const flat_set<ReportID> &) { return false; }
211
212 const NGHolder &graph;
213 const u32 numStates;
214 const flat_set<NFAVertex> unused;
215
216 array<u16, ALPHABET_SIZE> alpha;
217 array<u16, ALPHABET_SIZE> unalpha;
218 u16 alphasize;
219
220 set<dstate_id_t> done_a;
221 set<dstate_id_t> done_b;
222
223 u16 start_anchored;
224 u16 start_floating;
225
226 vector<NFAVertex> v_by_index;
227 vector<CharReach> cr_by_index; /* pre alpha'ed */
228 StateSet init;
229 StateSet initDS;
230 StateSet squash; /* states which allow us to mask out other states */
231 StateSet accept;
232 StateSet acceptEod;
233 StateSet toppable; /* states which are allowed to be on when a top arrives,
234 * triggered dfas only */
235 map<u32, StateSet> squash_mask;
236 StateSet dead;
237};
238
239struct Big_Traits {
240 using StateSet = dynamic_bitset<>;
241 using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>;
242
243 static StateSet init_states(u32 num) {
244 return StateSet(num);
245 }
246
247 static StateSet copy_states(const dynamic_bitset<> &in, UNUSED u32 num) {
248 assert(in.size() == num);
249 return in;
250 }
251};
252
253class Automaton_Big : public Automaton_Base<Big_Traits> {
254public:
255 Automaton_Big(const NGHolder &graph_in, som_type som,
256 const vector<vector<CharReach>> &triggers, bool unordered_som)
257 : Automaton_Base(graph_in, som, triggers, unordered_som) {}
258};
259
260struct Graph_Traits {
261 using StateSet = bitfield<NFA_STATE_LIMIT>;
262 using StateMap = unordered_map<StateSet, dstate_id_t>;
263
264 static StateSet init_states(UNUSED u32 num) {
265 assert(num <= NFA_STATE_LIMIT);
266 return StateSet();
267 }
268
269 static StateSet copy_states(const dynamic_bitset<> &in, u32 num) {
270 StateSet out = init_states(num);
271 for (size_t i = in.find_first(); i != in.npos && i < out.size();
272 i = in.find_next(i)) {
273 out.set(i);
274 }
275 return out;
276 }
277};
278
279class Automaton_Graph : public Automaton_Base<Graph_Traits> {
280public:
281 Automaton_Graph(const NGHolder &graph_in, som_type som,
282 const vector<vector<CharReach>> &triggers,
283 bool unordered_som)
284 : Automaton_Base(graph_in, som, triggers, unordered_som) {}
285};
286
287class Automaton_Haig_Merge {
288public:
289 using StateSet = vector<u16>;
290 using StateMap = ue2_unordered_map<StateSet, dstate_id_t>;
291
292 explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in)
293 : nfas(in.begin(), in.end()), dead(in.size()) {
294 calculateAlphabet();
295 populateAsFs();
296 }
297
298 void populateAsFs(void) {
299 bool fs_same = true;
300 bool fs_dead = true;
301
302 as.resize(nfas.size());
303 fs.resize(nfas.size());
304 for (u32 i = 0; i < nfas.size(); i++) {
305 as[i] = nfas[i]->start_anchored;
306 fs[i] = nfas[i]->start_floating;
307
308 if (fs[i]) {
309 fs_dead = false;
310 }
311
312 if (as[i] != fs[i]) {
313 fs_same = false;
314 }
315 }
316
317 start_anchored = DEAD_STATE + 1;
318 if (fs_same) {
319 start_floating = start_anchored;
320 } else if (fs_dead) {
321 start_floating = DEAD_STATE;
322 } else {
323 start_floating = start_anchored + 1;
324 }
325 }
326
327 void calculateAlphabet(void) {
328 DEBUG_PRINTF("calculating alphabet\n");
329 vector<CharReach> esets(1, CharReach::dot());
330
331 for (const auto &haig : nfas) {
332 DEBUG_PRINTF("...next dfa alphabet\n");
333 assert(haig);
334 const auto &alpha_remap = haig->alpha_remap;
335
336 for (size_t i = 0; i < esets.size(); i++) {
337 assert(esets[i].any());
338 if (esets[i].count() == 1) {
339 DEBUG_PRINTF("skipping singleton eq set\n");
340 continue;
341 }
342
343 CharReach t;
344 u8 leader_s = alpha_remap[esets[i].find_first()];
345
346 DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s);
347
348 for (size_t s = esets[i].find_first();
349 s != CharReach::npos; s = esets[i].find_next(s)) {
350 if (alpha_remap[s] != leader_s) {
351 t.set(s);
352 }
353 }
354
355 if (t.any() && t != esets[i]) {
356 esets[i] &= ~t;
357 esets.push_back(t);
358 }
359 }
360 }
361
362 alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
363 }
364
365 void transition(const StateSet &in, StateSet *next) {
366 u16 t[ALPHABET_SIZE];
367
368 for (u32 i = 0; i < alphasize; i++) {
369 next[i].resize(nfas.size());
370 }
371
372 for (u32 j = 0; j < nfas.size(); j++) {
373 getFullTransitionFromState(*nfas[j], in[j], t);
374 for (u32 i = 0; i < alphasize; i++) {
375 next[i][j]= t[unalpha[i]];
376 }
377 }
378 }
379
380 const vector<StateSet> initial() {
381 vector<StateSet> rv(1, as);
382 if (start_floating != DEAD_STATE && start_floating != start_anchored) {
383 rv.push_back(fs);
384 }
385 return rv;
386 }
387
388private:
389 void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set,
390 flat_set<ReportID> &r) {
391 for (u32 i = 0; i < nfas.size(); i++) {
392 const auto &rs = nfas[i]->states[in[i]].*r_set;
393 insert(&r, rs);
394 }
395 }
396
397public:
398 void reports(const StateSet &in, flat_set<ReportID> &rv) {
399 reports_i(in, &dstate::reports, rv);
400 }
401 void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
402 reports_i(in, &dstate::reports_eod, rv);
403 }
404
405 static bool canPrune(const flat_set<ReportID> &) { return false; }
406
407private:
408 vector<const raw_som_dfa *> nfas;
409 vector<dstate_id_t> as;
410 vector<dstate_id_t> fs;
411public:
412 array<u16, ALPHABET_SIZE> alpha;
413 array<u16, ALPHABET_SIZE> unalpha;
414 u16 alphasize;
415 StateSet dead;
416
417 u16 start_anchored;
418 u16 start_floating;
419};
420}
421
422enum bslm_mode {
423 ONLY_EXISTING,
424 INCLUDE_INVALID
425};
426
427static
428bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) {
429 return is_virtual_start(v, g) || is_any_start(v, g);
430}
431
432static
433s32 getSlotID(const NGHolder &g, UNUSED const flat_set<NFAVertex> &unused,
434 NFAVertex v) {
435 if (is_triggered(g) && v == g.start) {
436 assert(!contains(unused, v));
437 } else if (is_any_start_inc_virtual(v, g)) {
438 return CREATE_NEW_SOM;
439 }
440
441 return g[v].index;
442}
443
444template<typename stateset>
445static
446void haig_do_preds(const NGHolder &g, const stateset &nfa_states,
447 const vector<NFAVertex> &state_mapping,
448 som_tran_info &preds) {
449 for (size_t i = nfa_states.find_first(); i != stateset::npos;
450 i = nfa_states.find_next(i)) {
451 NFAVertex v = state_mapping[i];
452 s32 slot_id = g[v].index;
453
454 DEBUG_PRINTF("d vertex %zu\n", g[v].index);
455 vector<u32> &out_map = preds[slot_id];
456 for (auto u : inv_adjacent_vertices_range(v, g)) {
457 out_map.push_back(g[u].index);
458 }
459
460 sort(out_map.begin(), out_map.end());
461 assert(!out_map.empty() || v == g.start);
462 }
463}
464
465template<typename stateset>
466static
467void haig_do_report(const NGHolder &g, const flat_set<NFAVertex> &unused,
468 NFAVertex accept_v, const stateset &source_nfa_states,
469 const vector<NFAVertex> &state_mapping,
470 set<som_report> &out) {
471 for (size_t i = source_nfa_states.find_first(); i != stateset::npos;
472 i = source_nfa_states.find_next(i)) {
473 NFAVertex v = state_mapping[i];
474 if (!edge(v, accept_v, g).second) {
475 continue;
476 }
477 for (ReportID report_id : g[v].reports) {
478 out.insert(som_report(report_id, getSlotID(g, unused, v)));
479 }
480 }
481}
482
483static
484void haig_note_starts(const NGHolder &g, map<u32, u32> *out) {
485 if (is_triggered(g)) {
486 return;
487 }
488
489 DEBUG_PRINTF("seeing who creates new som values\n");
490
491 vector<DepthMinMax> depths = getDistancesFromSOM(g);
492
493 for (auto v : vertices_range(g)) {
494 if (is_any_start_inc_virtual(v, g)) {
495 DEBUG_PRINTF("%zu creates new som value\n", g[v].index);
496 out->emplace(g[v].index, 0U);
497 continue;
498 }
499
500 if (is_any_accept(v, g)) {
501 continue;
502 }
503
504 const DepthMinMax &d = depths[g[v].index];
505 if (d.min == d.max && d.min.is_finite()) {
506 DEBUG_PRINTF("%zu is fixed at %u\n", g[v].index, (u32)d.min);
507 out->emplace(g[v].index, d.min);
508 }
509 }
510}
511
512template<class Auto>
513static
514bool doHaig(const NGHolder &g, som_type som,
515 const vector<vector<CharReach>> &triggers, bool unordered_som,
516 raw_som_dfa *rdfa) {
517 u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
518 a fight */
519 using StateSet = typename Auto::StateSet;
520 vector<StateSet> nfa_state_map;
521 Auto n(g, som, triggers, unordered_som);
522 try {
523 if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
524 DEBUG_PRINTF("state limit exceeded\n");
525 return false;
526 }
527 } catch (haig_too_wide &) {
528 DEBUG_PRINTF("too many live som states\n");
529 return false;
530 }
531
532 rdfa->start_anchored = n.start_anchored;
533 rdfa->start_floating = n.start_floating;
534 rdfa->alpha_size = n.alphasize;
535 rdfa->alpha_remap = n.alpha;
536
537 rdfa->state_som.reserve(rdfa->states.size());
538 for (u32 i = 0; i < rdfa->states.size(); i++) {
539 rdfa->state_som.push_back(dstate_som());
540 const StateSet &source_states = nfa_state_map[i];
541 if (source_states.count() > HAIG_MAX_LIVE_SOM_SLOTS) {
542 DEBUG_PRINTF("too many live states\n");
543 return false;
544 }
545
546 DEBUG_PRINTF("generating som info for %u\n", i);
547
548 haig_do_preds(g, source_states, n.v_by_index,
549 rdfa->state_som.back().preds);
550
551 haig_do_report(g, n.unused, g.accept, source_states, n.v_by_index,
552 rdfa->state_som.back().reports);
553 haig_do_report(g, n.unused, g.acceptEod, source_states, n.v_by_index,
554 rdfa->state_som.back().reports_eod);
555 }
556
557 haig_note_starts(g, &rdfa->new_som_nfa_states);
558
559 return true;
560}
561
562unique_ptr<raw_som_dfa>
563attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision,
564 const vector<vector<CharReach>> &triggers, const Grey &grey,
565 bool unordered_som) {
566 assert(is_triggered(g) != triggers.empty());
567 assert(!unordered_som || is_triggered(g));
568
569 if (!grey.allowGough) {
570 /* must be at least one engine capable of handling raw som dfas */
571 return nullptr;
572 }
573
574 DEBUG_PRINTF("attempting to build haig \n");
575 assert(allMatchStatesHaveReports(g));
576 assert(hasCorrectlyNumberedVertices(g));
577
578 u32 numStates = num_vertices(g);
579 if (numStates > HAIG_MAX_NFA_STATE) {
580 DEBUG_PRINTF("giving up... looks too big\n");
581 return nullptr;
582 }
583
584 auto rdfa = ue2::make_unique<raw_som_dfa>(g.kind, unordered_som, NODE_START,
585 somPrecision);
586
587 DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
588 bool rv;
589 if (numStates <= NFA_STATE_LIMIT) {
590 /* fast path */
591 rv = doHaig<Automaton_Graph>(g, som, triggers, unordered_som,
592 rdfa.get());
593 } else {
594 /* not the fast path */
595 rv = doHaig<Automaton_Big>(g, som, triggers, unordered_som, rdfa.get());
596 }
597
598 if (!rv) {
599 return nullptr;
600 }
601
602 DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n",
603 rdfa->start_anchored, rdfa->start_floating);
604
605 assert(rdfa->kind == g.kind);
606 return rdfa;
607}
608
609static
610void haig_merge_do_preds(const vector<const raw_som_dfa *> &dfas,
611 const vector<u32> &per_dfa_adj,
612 const vector<dstate_id_t> &source_nfa_states,
613 som_tran_info &som_tran) {
614 for (u32 d = 0; d < dfas.size(); ++d) {
615 u32 adj = per_dfa_adj[d];
616
617 const som_tran_info &som_tran_d
618 = dfas[d]->state_som[source_nfa_states[d]].preds;
619 for (som_tran_info::const_iterator it = som_tran_d.begin();
620 it != som_tran_d.end(); ++it) {
621 assert(it->first != CREATE_NEW_SOM);
622 u32 dest_slot = it->first < N_SPECIALS ? it->first
623 : it->first + adj;
624 vector<u32> &out = som_tran[dest_slot];
625
626 if (!out.empty()) {
627 /* stylised specials already done; it does not matter who builds
628 the preds */
629 assert(dest_slot < N_SPECIALS);
630 continue;
631 }
632 for (vector<u32>::const_iterator jt = it->second.begin();
633 jt != it->second.end(); ++jt) {
634 if (*jt < N_SPECIALS || *jt == CREATE_NEW_SOM) {
635 out.push_back(*jt);
636 } else {
637 out.push_back(*jt + adj);
638 }
639 }
640 }
641 }
642}
643
644static
645void haig_merge_note_starts(const vector<const raw_som_dfa *> &dfas,
646 const vector<u32> &per_dfa_adj,
647 map<u32, u32> *out) {
648 for (u32 d = 0; d < dfas.size(); ++d) {
649 u32 adj = per_dfa_adj[d];
650 const map<u32, u32> &new_soms = dfas[d]->new_som_nfa_states;
651 for (map<u32, u32>::const_iterator it = new_soms.begin();
652 it != new_soms.end(); ++it) {
653 if (it->first < N_SPECIALS) {
654 assert(!it->second);
655 out->emplace(it->first, 0U);
656 } else {
657 assert(d + 1 >= per_dfa_adj.size()
658 || it->first + adj < per_dfa_adj[d + 1]);
659 out->emplace(it->first + adj, it->second);
660 }
661 }
662 }
663}
664
665static never_inline
666void haig_merge_do_report(const vector<const raw_som_dfa *> &dfas,
667 const vector<u32> &per_dfa_adj,
668 const vector<dstate_id_t> &source_nfa_states,
669 bool eod, set<som_report> &out) {
670 for (u32 d = 0; d < dfas.size(); ++d) {
671 u32 adj = per_dfa_adj[d];
672
673 const set<som_report> &reps = eod
674 ? dfas[d]->state_som[source_nfa_states[d]].reports_eod
675 : dfas[d]->state_som[source_nfa_states[d]].reports;
676 for (set<som_report>::const_iterator it = reps.begin();
677 it != reps.end(); ++it) {
678 u32 slot = it->slot;
679 if (slot != CREATE_NEW_SOM && slot >= N_SPECIALS) {
680 slot += adj;
681 }
682 out.insert(som_report(it->report, slot));
683 }
684 }
685}
686
687static
688u32 total_slots_used(const raw_som_dfa &rdfa) {
689 u32 rv = 0;
690 for (vector<dstate_som>::const_iterator it = rdfa.state_som.begin();
691 it != rdfa.state_som.end(); ++it) {
692 for (som_tran_info::const_iterator jt = it->preds.begin();
693 jt != it->preds.end(); ++jt) {
694 assert(jt->first != CREATE_NEW_SOM);
695 ENSURE_AT_LEAST(&rv, jt->first + 1);
696 }
697 }
698 const map<u32, u32> &new_soms = rdfa.new_som_nfa_states;
699 for (map<u32, u32>::const_iterator it = new_soms.begin();
700 it != new_soms.end(); ++it) {
701 ENSURE_AT_LEAST(&rv, it->first + 1);
702 }
703 return rv;
704}
705
706unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &dfas,
707 u32 limit) {
708 assert(!dfas.empty());
709
710 Automaton_Haig_Merge n(dfas);
711
712 DEBUG_PRINTF("merging %zu dfas\n", dfas.size());
713
714 bool unordered_som = false;
715 for (const auto &haig : dfas) {
716 assert(haig);
717 assert(haig->kind == dfas.front()->kind);
718 unordered_som |= haig->unordered_som_triggers;
719 if (haig->states.size() > limit) {
720 DEBUG_PRINTF("too many states!\n");
721 return nullptr;
722 }
723 }
724
725 using StateSet = Automaton_Haig_Merge::StateSet;
726 vector<StateSet> nfa_state_map;
727 auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som,
728 NODE_START,
729 dfas[0]->stream_som_loc_width);
730
731 if (!determinise(n, rdfa->states, limit, &nfa_state_map)) {
732 DEBUG_PRINTF("state limit (%u) exceeded\n", limit);
733 return nullptr; /* over state limit */
734 }
735
736 rdfa->start_anchored = n.start_anchored;
737 rdfa->start_floating = n.start_floating;
738 rdfa->alpha_size = n.alphasize;
739 rdfa->alpha_remap = n.alpha;
740
741 vector<u32> per_dfa_adj;
742 u32 curr_adj = 0;
743 for (const auto &haig : dfas) {
744 per_dfa_adj.push_back(curr_adj);
745 curr_adj += total_slots_used(*haig);
746 if (curr_adj < per_dfa_adj.back()) {
747 /* overflowed our som slot count */
748 return nullptr;
749 }
750 }
751
752 rdfa->state_som.reserve(rdfa->states.size());
753 for (u32 i = 0; i < rdfa->states.size(); i++) {
754 rdfa->state_som.push_back(dstate_som());
755 const vector<dstate_id_t> &source_nfa_states = nfa_state_map[i];
756 DEBUG_PRINTF("finishing state %u\n", i);
757
758 haig_merge_do_preds(dfas, per_dfa_adj, source_nfa_states,
759 rdfa->state_som.back().preds);
760
761 if (rdfa->state_som.back().preds.size() > HAIG_MAX_LIVE_SOM_SLOTS) {
762 DEBUG_PRINTF("som slot limit exceeded (%zu)\n",
763 rdfa->state_som.back().preds.size());
764 return nullptr;
765 }
766
767 haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
768 false /* not eod */,
769 rdfa->state_som.back().reports);
770 haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
771 true /* eod */,
772 rdfa->state_som.back().reports_eod);
773 }
774
775 haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states);
776
777 DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n",
778 rdfa->start_anchored, rdfa->start_floating);
779
780 return rdfa;
781}
782
783} // namespace ue2
784