1 | /* |
2 | * Copyright (c) 2015-2018, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Build code for Haig SOM DFA. |
31 | */ |
32 | #include "ng_haig.h" |
33 | |
34 | #include "grey.h" |
35 | #include "nfa/goughcompile.h" |
36 | #include "ng_holder.h" |
37 | #include "ng_mcclellan_internal.h" |
38 | #include "ng_som_util.h" |
39 | #include "ng_squash.h" |
40 | #include "util/bitfield.h" |
41 | #include "util/container.h" |
42 | #include "util/determinise.h" |
43 | #include "util/flat_containers.h" |
44 | #include "util/graph.h" |
45 | #include "util/graph_range.h" |
46 | #include "util/hash_dynamic_bitset.h" |
47 | #include "util/make_unique.h" |
48 | #include "util/unordered.h" |
49 | |
50 | #include <algorithm> |
51 | #include <functional> |
52 | #include <map> |
53 | #include <set> |
54 | #include <vector> |
55 | #include <boost/dynamic_bitset.hpp> |
56 | |
57 | using namespace std; |
58 | using boost::dynamic_bitset; |
59 | |
60 | namespace ue2 { |
61 | |
62 | #define NFA_STATE_LIMIT 256 |
63 | |
64 | #define HAIG_MAX_NFA_STATE 600 |
65 | #define HAIG_MAX_LIVE_SOM_SLOTS 32 |
66 | |
67 | namespace { |
68 | struct haig_too_wide { |
69 | }; |
70 | |
71 | template<typename stateset> |
72 | static |
73 | void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused, |
74 | stateset *init, stateset *initDS, |
75 | vector<NFAVertex> *v_by_index) { |
76 | DEBUG_PRINTF("graph kind: %s\n" , to_string(g.kind).c_str()); |
77 | for (auto v : vertices_range(g)) { |
78 | if (contains(unused, v)) { |
79 | continue; |
80 | } |
81 | u32 v_index = g[v].index; |
82 | if (is_any_start(v, g)) { |
83 | init->set(v_index); |
84 | if (hasSelfLoop(v, g) || is_triggered(g)) { |
85 | DEBUG_PRINTF("setting %u\n" , v_index); |
86 | initDS->set(v_index); |
87 | } |
88 | } |
89 | assert(v_index < init->size()); |
90 | } |
91 | |
92 | v_by_index->clear(); |
93 | v_by_index->resize(num_vertices(g), NGHolder::null_vertex()); |
94 | |
95 | for (auto v : vertices_range(g)) { |
96 | u32 v_index = g[v].index; |
97 | assert((*v_by_index)[v_index] == NGHolder::null_vertex()); |
98 | (*v_by_index)[v_index] = v; |
99 | } |
100 | } |
101 | |
102 | template<typename StateSet> |
103 | void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) { |
104 | for (auto v : inv_adjacent_vertices_range(g.accept, g)) { |
105 | accept->set(g[v].index); |
106 | } |
107 | for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { |
108 | if (v == g.accept) { |
109 | continue; |
110 | } |
111 | acceptEod->set(g[v].index); |
112 | } |
113 | } |
114 | |
115 | template<typename Automaton_Traits> |
116 | class Automaton_Base { |
117 | public: |
118 | using StateSet = typename Automaton_Traits::StateSet; |
119 | using StateMap = typename Automaton_Traits::StateMap; |
120 | |
121 | protected: |
122 | Automaton_Base(const NGHolder &graph_in, som_type som, |
123 | const vector<vector<CharReach>> &triggers, |
124 | bool unordered_som) |
125 | : graph(graph_in), numStates(num_vertices(graph)), |
126 | unused(getRedundantStarts(graph_in)), |
127 | init(Automaton_Traits::init_states(numStates)), |
128 | initDS(Automaton_Traits::init_states(numStates)), |
129 | squash(Automaton_Traits::init_states(numStates)), |
130 | accept(Automaton_Traits::init_states(numStates)), |
131 | acceptEod(Automaton_Traits::init_states(numStates)), |
132 | toppable(Automaton_Traits::init_states(numStates)), |
133 | dead(Automaton_Traits::init_states(numStates)) { |
134 | calculateAlphabet(graph, alpha, unalpha, &alphasize); |
135 | assert(alphasize <= ALPHABET_SIZE); |
136 | |
137 | populateInit(graph, unused, &init, &initDS, &v_by_index); |
138 | populateAccepts(graph, &accept, &acceptEod); |
139 | |
140 | start_anchored = DEAD_STATE + 1; |
141 | if (initDS == init) { |
142 | start_floating = start_anchored; |
143 | } else if (initDS.any()) { |
144 | start_floating = start_anchored + 1; |
145 | } else { |
146 | start_floating = DEAD_STATE; |
147 | } |
148 | |
149 | cr_by_index = populateCR(graph, v_by_index, alpha); |
150 | |
151 | if (!unordered_som) { |
152 | for (const auto &sq : findSquashers(graph, som)) { |
153 | NFAVertex v = sq.first; |
154 | u32 vert_id = graph[v].index; |
155 | squash.set(vert_id); |
156 | squash_mask[vert_id] = shrinkStateSet(sq.second); |
157 | } |
158 | } |
159 | |
160 | if (is_triggered(graph)) { |
161 | dynamic_bitset<> temp(numStates); |
162 | markToppableStarts(graph, unused, false, triggers, &temp); |
163 | toppable = Automaton_Traits::copy_states(temp, numStates); |
164 | } |
165 | } |
166 | |
167 | private: |
168 | // Convert an NFAStateSet (as used by the squash code) into a StateSet. |
169 | StateSet shrinkStateSet(const NFAStateSet &in) const { |
170 | StateSet out = Automaton_Traits::init_states(numStates); |
171 | for (size_t i = in.find_first(); i != in.npos && i < out.size(); |
172 | i = in.find_next(i)) { |
173 | out.set(i); |
174 | } |
175 | return out; |
176 | } |
177 | |
178 | void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) { |
179 | StateSet acc = in & (eod ? acceptEod : accept); |
180 | for (size_t i = acc.find_first(); i != StateSet::npos; |
181 | i = acc.find_next(i)) { |
182 | NFAVertex v = v_by_index[i]; |
183 | DEBUG_PRINTF("marking report\n" ); |
184 | const auto &my_reports = graph[v].reports; |
185 | rv.insert(my_reports.begin(), my_reports.end()); |
186 | } |
187 | } |
188 | |
189 | public: |
190 | void transition(const StateSet &in, StateSet *next) { |
191 | transition_graph(*this, v_by_index, in, next); |
192 | } |
193 | |
194 | const vector<StateSet> initial() { |
195 | vector<StateSet> rv = {init}; |
196 | if (start_floating != DEAD_STATE && start_floating != start_anchored) { |
197 | rv.push_back(initDS); |
198 | } |
199 | return rv; |
200 | } |
201 | |
202 | void reports(const StateSet &in, flat_set<ReportID> &rv) { |
203 | reports_i(in, false, rv); |
204 | } |
205 | |
206 | void reportsEod(const StateSet &in, flat_set<ReportID> &rv) { |
207 | reports_i(in, true, rv); |
208 | } |
209 | |
210 | static bool canPrune(const flat_set<ReportID> &) { return false; } |
211 | |
212 | const NGHolder &graph; |
213 | const u32 numStates; |
214 | const flat_set<NFAVertex> unused; |
215 | |
216 | array<u16, ALPHABET_SIZE> alpha; |
217 | array<u16, ALPHABET_SIZE> unalpha; |
218 | u16 alphasize; |
219 | |
220 | set<dstate_id_t> done_a; |
221 | set<dstate_id_t> done_b; |
222 | |
223 | u16 start_anchored; |
224 | u16 start_floating; |
225 | |
226 | vector<NFAVertex> v_by_index; |
227 | vector<CharReach> cr_by_index; /* pre alpha'ed */ |
228 | StateSet init; |
229 | StateSet initDS; |
230 | StateSet squash; /* states which allow us to mask out other states */ |
231 | StateSet accept; |
232 | StateSet acceptEod; |
233 | StateSet toppable; /* states which are allowed to be on when a top arrives, |
234 | * triggered dfas only */ |
235 | map<u32, StateSet> squash_mask; |
236 | StateSet dead; |
237 | }; |
238 | |
239 | struct Big_Traits { |
240 | using StateSet = dynamic_bitset<>; |
241 | using StateMap = unordered_map<StateSet, dstate_id_t, hash_dynamic_bitset>; |
242 | |
243 | static StateSet init_states(u32 num) { |
244 | return StateSet(num); |
245 | } |
246 | |
247 | static StateSet copy_states(const dynamic_bitset<> &in, UNUSED u32 num) { |
248 | assert(in.size() == num); |
249 | return in; |
250 | } |
251 | }; |
252 | |
253 | class Automaton_Big : public Automaton_Base<Big_Traits> { |
254 | public: |
255 | Automaton_Big(const NGHolder &graph_in, som_type som, |
256 | const vector<vector<CharReach>> &triggers, bool unordered_som) |
257 | : Automaton_Base(graph_in, som, triggers, unordered_som) {} |
258 | }; |
259 | |
260 | struct Graph_Traits { |
261 | using StateSet = bitfield<NFA_STATE_LIMIT>; |
262 | using StateMap = unordered_map<StateSet, dstate_id_t>; |
263 | |
264 | static StateSet init_states(UNUSED u32 num) { |
265 | assert(num <= NFA_STATE_LIMIT); |
266 | return StateSet(); |
267 | } |
268 | |
269 | static StateSet copy_states(const dynamic_bitset<> &in, u32 num) { |
270 | StateSet out = init_states(num); |
271 | for (size_t i = in.find_first(); i != in.npos && i < out.size(); |
272 | i = in.find_next(i)) { |
273 | out.set(i); |
274 | } |
275 | return out; |
276 | } |
277 | }; |
278 | |
279 | class Automaton_Graph : public Automaton_Base<Graph_Traits> { |
280 | public: |
281 | Automaton_Graph(const NGHolder &graph_in, som_type som, |
282 | const vector<vector<CharReach>> &triggers, |
283 | bool unordered_som) |
284 | : Automaton_Base(graph_in, som, triggers, unordered_som) {} |
285 | }; |
286 | |
287 | class Automaton_Haig_Merge { |
288 | public: |
289 | using StateSet = vector<u16>; |
290 | using StateMap = ue2_unordered_map<StateSet, dstate_id_t>; |
291 | |
292 | explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in) |
293 | : nfas(in.begin(), in.end()), dead(in.size()) { |
294 | calculateAlphabet(); |
295 | populateAsFs(); |
296 | } |
297 | |
298 | void populateAsFs(void) { |
299 | bool fs_same = true; |
300 | bool fs_dead = true; |
301 | |
302 | as.resize(nfas.size()); |
303 | fs.resize(nfas.size()); |
304 | for (u32 i = 0; i < nfas.size(); i++) { |
305 | as[i] = nfas[i]->start_anchored; |
306 | fs[i] = nfas[i]->start_floating; |
307 | |
308 | if (fs[i]) { |
309 | fs_dead = false; |
310 | } |
311 | |
312 | if (as[i] != fs[i]) { |
313 | fs_same = false; |
314 | } |
315 | } |
316 | |
317 | start_anchored = DEAD_STATE + 1; |
318 | if (fs_same) { |
319 | start_floating = start_anchored; |
320 | } else if (fs_dead) { |
321 | start_floating = DEAD_STATE; |
322 | } else { |
323 | start_floating = start_anchored + 1; |
324 | } |
325 | } |
326 | |
327 | void calculateAlphabet(void) { |
328 | DEBUG_PRINTF("calculating alphabet\n" ); |
329 | vector<CharReach> esets(1, CharReach::dot()); |
330 | |
331 | for (const auto &haig : nfas) { |
332 | DEBUG_PRINTF("...next dfa alphabet\n" ); |
333 | assert(haig); |
334 | const auto &alpha_remap = haig->alpha_remap; |
335 | |
336 | for (size_t i = 0; i < esets.size(); i++) { |
337 | assert(esets[i].any()); |
338 | if (esets[i].count() == 1) { |
339 | DEBUG_PRINTF("skipping singleton eq set\n" ); |
340 | continue; |
341 | } |
342 | |
343 | CharReach t; |
344 | u8 leader_s = alpha_remap[esets[i].find_first()]; |
345 | |
346 | DEBUG_PRINTF("checking eq set, leader %02hhx \n" , leader_s); |
347 | |
348 | for (size_t s = esets[i].find_first(); |
349 | s != CharReach::npos; s = esets[i].find_next(s)) { |
350 | if (alpha_remap[s] != leader_s) { |
351 | t.set(s); |
352 | } |
353 | } |
354 | |
355 | if (t.any() && t != esets[i]) { |
356 | esets[i] &= ~t; |
357 | esets.push_back(t); |
358 | } |
359 | } |
360 | } |
361 | |
362 | alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha); |
363 | } |
364 | |
365 | void transition(const StateSet &in, StateSet *next) { |
366 | u16 t[ALPHABET_SIZE]; |
367 | |
368 | for (u32 i = 0; i < alphasize; i++) { |
369 | next[i].resize(nfas.size()); |
370 | } |
371 | |
372 | for (u32 j = 0; j < nfas.size(); j++) { |
373 | getFullTransitionFromState(*nfas[j], in[j], t); |
374 | for (u32 i = 0; i < alphasize; i++) { |
375 | next[i][j]= t[unalpha[i]]; |
376 | } |
377 | } |
378 | } |
379 | |
380 | const vector<StateSet> initial() { |
381 | vector<StateSet> rv(1, as); |
382 | if (start_floating != DEAD_STATE && start_floating != start_anchored) { |
383 | rv.push_back(fs); |
384 | } |
385 | return rv; |
386 | } |
387 | |
388 | private: |
389 | void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set, |
390 | flat_set<ReportID> &r) { |
391 | for (u32 i = 0; i < nfas.size(); i++) { |
392 | const auto &rs = nfas[i]->states[in[i]].*r_set; |
393 | insert(&r, rs); |
394 | } |
395 | } |
396 | |
397 | public: |
398 | void reports(const StateSet &in, flat_set<ReportID> &rv) { |
399 | reports_i(in, &dstate::reports, rv); |
400 | } |
401 | void reportsEod(const StateSet &in, flat_set<ReportID> &rv) { |
402 | reports_i(in, &dstate::reports_eod, rv); |
403 | } |
404 | |
405 | static bool canPrune(const flat_set<ReportID> &) { return false; } |
406 | |
407 | private: |
408 | vector<const raw_som_dfa *> nfas; |
409 | vector<dstate_id_t> as; |
410 | vector<dstate_id_t> fs; |
411 | public: |
412 | array<u16, ALPHABET_SIZE> alpha; |
413 | array<u16, ALPHABET_SIZE> unalpha; |
414 | u16 alphasize; |
415 | StateSet dead; |
416 | |
417 | u16 start_anchored; |
418 | u16 start_floating; |
419 | }; |
420 | } |
421 | |
422 | enum bslm_mode { |
423 | ONLY_EXISTING, |
424 | INCLUDE_INVALID |
425 | }; |
426 | |
427 | static |
428 | bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) { |
429 | return is_virtual_start(v, g) || is_any_start(v, g); |
430 | } |
431 | |
432 | static |
433 | s32 getSlotID(const NGHolder &g, UNUSED const flat_set<NFAVertex> &unused, |
434 | NFAVertex v) { |
435 | if (is_triggered(g) && v == g.start) { |
436 | assert(!contains(unused, v)); |
437 | } else if (is_any_start_inc_virtual(v, g)) { |
438 | return CREATE_NEW_SOM; |
439 | } |
440 | |
441 | return g[v].index; |
442 | } |
443 | |
444 | template<typename stateset> |
445 | static |
446 | void haig_do_preds(const NGHolder &g, const stateset &nfa_states, |
447 | const vector<NFAVertex> &state_mapping, |
448 | som_tran_info &preds) { |
449 | for (size_t i = nfa_states.find_first(); i != stateset::npos; |
450 | i = nfa_states.find_next(i)) { |
451 | NFAVertex v = state_mapping[i]; |
452 | s32 slot_id = g[v].index; |
453 | |
454 | DEBUG_PRINTF("d vertex %zu\n" , g[v].index); |
455 | vector<u32> &out_map = preds[slot_id]; |
456 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
457 | out_map.push_back(g[u].index); |
458 | } |
459 | |
460 | sort(out_map.begin(), out_map.end()); |
461 | assert(!out_map.empty() || v == g.start); |
462 | } |
463 | } |
464 | |
465 | template<typename stateset> |
466 | static |
467 | void haig_do_report(const NGHolder &g, const flat_set<NFAVertex> &unused, |
468 | NFAVertex accept_v, const stateset &source_nfa_states, |
469 | const vector<NFAVertex> &state_mapping, |
470 | set<som_report> &out) { |
471 | for (size_t i = source_nfa_states.find_first(); i != stateset::npos; |
472 | i = source_nfa_states.find_next(i)) { |
473 | NFAVertex v = state_mapping[i]; |
474 | if (!edge(v, accept_v, g).second) { |
475 | continue; |
476 | } |
477 | for (ReportID report_id : g[v].reports) { |
478 | out.insert(som_report(report_id, getSlotID(g, unused, v))); |
479 | } |
480 | } |
481 | } |
482 | |
483 | static |
484 | void haig_note_starts(const NGHolder &g, map<u32, u32> *out) { |
485 | if (is_triggered(g)) { |
486 | return; |
487 | } |
488 | |
489 | DEBUG_PRINTF("seeing who creates new som values\n" ); |
490 | |
491 | vector<DepthMinMax> depths = getDistancesFromSOM(g); |
492 | |
493 | for (auto v : vertices_range(g)) { |
494 | if (is_any_start_inc_virtual(v, g)) { |
495 | DEBUG_PRINTF("%zu creates new som value\n" , g[v].index); |
496 | out->emplace(g[v].index, 0U); |
497 | continue; |
498 | } |
499 | |
500 | if (is_any_accept(v, g)) { |
501 | continue; |
502 | } |
503 | |
504 | const DepthMinMax &d = depths[g[v].index]; |
505 | if (d.min == d.max && d.min.is_finite()) { |
506 | DEBUG_PRINTF("%zu is fixed at %u\n" , g[v].index, (u32)d.min); |
507 | out->emplace(g[v].index, d.min); |
508 | } |
509 | } |
510 | } |
511 | |
512 | template<class Auto> |
513 | static |
514 | bool doHaig(const NGHolder &g, som_type som, |
515 | const vector<vector<CharReach>> &triggers, bool unordered_som, |
516 | raw_som_dfa *rdfa) { |
517 | u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from |
518 | a fight */ |
519 | using StateSet = typename Auto::StateSet; |
520 | vector<StateSet> nfa_state_map; |
521 | Auto n(g, som, triggers, unordered_som); |
522 | try { |
523 | if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) { |
524 | DEBUG_PRINTF("state limit exceeded\n" ); |
525 | return false; |
526 | } |
527 | } catch (haig_too_wide &) { |
528 | DEBUG_PRINTF("too many live som states\n" ); |
529 | return false; |
530 | } |
531 | |
532 | rdfa->start_anchored = n.start_anchored; |
533 | rdfa->start_floating = n.start_floating; |
534 | rdfa->alpha_size = n.alphasize; |
535 | rdfa->alpha_remap = n.alpha; |
536 | |
537 | rdfa->state_som.reserve(rdfa->states.size()); |
538 | for (u32 i = 0; i < rdfa->states.size(); i++) { |
539 | rdfa->state_som.push_back(dstate_som()); |
540 | const StateSet &source_states = nfa_state_map[i]; |
541 | if (source_states.count() > HAIG_MAX_LIVE_SOM_SLOTS) { |
542 | DEBUG_PRINTF("too many live states\n" ); |
543 | return false; |
544 | } |
545 | |
546 | DEBUG_PRINTF("generating som info for %u\n" , i); |
547 | |
548 | haig_do_preds(g, source_states, n.v_by_index, |
549 | rdfa->state_som.back().preds); |
550 | |
551 | haig_do_report(g, n.unused, g.accept, source_states, n.v_by_index, |
552 | rdfa->state_som.back().reports); |
553 | haig_do_report(g, n.unused, g.acceptEod, source_states, n.v_by_index, |
554 | rdfa->state_som.back().reports_eod); |
555 | } |
556 | |
557 | haig_note_starts(g, &rdfa->new_som_nfa_states); |
558 | |
559 | return true; |
560 | } |
561 | |
562 | unique_ptr<raw_som_dfa> |
563 | attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, |
564 | const vector<vector<CharReach>> &triggers, const Grey &grey, |
565 | bool unordered_som) { |
566 | assert(is_triggered(g) != triggers.empty()); |
567 | assert(!unordered_som || is_triggered(g)); |
568 | |
569 | if (!grey.allowGough) { |
570 | /* must be at least one engine capable of handling raw som dfas */ |
571 | return nullptr; |
572 | } |
573 | |
574 | DEBUG_PRINTF("attempting to build haig \n" ); |
575 | assert(allMatchStatesHaveReports(g)); |
576 | assert(hasCorrectlyNumberedVertices(g)); |
577 | |
578 | u32 numStates = num_vertices(g); |
579 | if (numStates > HAIG_MAX_NFA_STATE) { |
580 | DEBUG_PRINTF("giving up... looks too big\n" ); |
581 | return nullptr; |
582 | } |
583 | |
584 | auto rdfa = ue2::make_unique<raw_som_dfa>(g.kind, unordered_som, NODE_START, |
585 | somPrecision); |
586 | |
587 | DEBUG_PRINTF("determinising nfa with %u vertices\n" , numStates); |
588 | bool rv; |
589 | if (numStates <= NFA_STATE_LIMIT) { |
590 | /* fast path */ |
591 | rv = doHaig<Automaton_Graph>(g, som, triggers, unordered_som, |
592 | rdfa.get()); |
593 | } else { |
594 | /* not the fast path */ |
595 | rv = doHaig<Automaton_Big>(g, som, triggers, unordered_som, rdfa.get()); |
596 | } |
597 | |
598 | if (!rv) { |
599 | return nullptr; |
600 | } |
601 | |
602 | DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n" , |
603 | rdfa->start_anchored, rdfa->start_floating); |
604 | |
605 | assert(rdfa->kind == g.kind); |
606 | return rdfa; |
607 | } |
608 | |
609 | static |
610 | void haig_merge_do_preds(const vector<const raw_som_dfa *> &dfas, |
611 | const vector<u32> &per_dfa_adj, |
612 | const vector<dstate_id_t> &source_nfa_states, |
613 | som_tran_info &som_tran) { |
614 | for (u32 d = 0; d < dfas.size(); ++d) { |
615 | u32 adj = per_dfa_adj[d]; |
616 | |
617 | const som_tran_info &som_tran_d |
618 | = dfas[d]->state_som[source_nfa_states[d]].preds; |
619 | for (som_tran_info::const_iterator it = som_tran_d.begin(); |
620 | it != som_tran_d.end(); ++it) { |
621 | assert(it->first != CREATE_NEW_SOM); |
622 | u32 dest_slot = it->first < N_SPECIALS ? it->first |
623 | : it->first + adj; |
624 | vector<u32> &out = som_tran[dest_slot]; |
625 | |
626 | if (!out.empty()) { |
627 | /* stylised specials already done; it does not matter who builds |
628 | the preds */ |
629 | assert(dest_slot < N_SPECIALS); |
630 | continue; |
631 | } |
632 | for (vector<u32>::const_iterator jt = it->second.begin(); |
633 | jt != it->second.end(); ++jt) { |
634 | if (*jt < N_SPECIALS || *jt == CREATE_NEW_SOM) { |
635 | out.push_back(*jt); |
636 | } else { |
637 | out.push_back(*jt + adj); |
638 | } |
639 | } |
640 | } |
641 | } |
642 | } |
643 | |
644 | static |
645 | void haig_merge_note_starts(const vector<const raw_som_dfa *> &dfas, |
646 | const vector<u32> &per_dfa_adj, |
647 | map<u32, u32> *out) { |
648 | for (u32 d = 0; d < dfas.size(); ++d) { |
649 | u32 adj = per_dfa_adj[d]; |
650 | const map<u32, u32> &new_soms = dfas[d]->new_som_nfa_states; |
651 | for (map<u32, u32>::const_iterator it = new_soms.begin(); |
652 | it != new_soms.end(); ++it) { |
653 | if (it->first < N_SPECIALS) { |
654 | assert(!it->second); |
655 | out->emplace(it->first, 0U); |
656 | } else { |
657 | assert(d + 1 >= per_dfa_adj.size() |
658 | || it->first + adj < per_dfa_adj[d + 1]); |
659 | out->emplace(it->first + adj, it->second); |
660 | } |
661 | } |
662 | } |
663 | } |
664 | |
665 | static never_inline |
666 | void haig_merge_do_report(const vector<const raw_som_dfa *> &dfas, |
667 | const vector<u32> &per_dfa_adj, |
668 | const vector<dstate_id_t> &source_nfa_states, |
669 | bool eod, set<som_report> &out) { |
670 | for (u32 d = 0; d < dfas.size(); ++d) { |
671 | u32 adj = per_dfa_adj[d]; |
672 | |
673 | const set<som_report> &reps = eod |
674 | ? dfas[d]->state_som[source_nfa_states[d]].reports_eod |
675 | : dfas[d]->state_som[source_nfa_states[d]].reports; |
676 | for (set<som_report>::const_iterator it = reps.begin(); |
677 | it != reps.end(); ++it) { |
678 | u32 slot = it->slot; |
679 | if (slot != CREATE_NEW_SOM && slot >= N_SPECIALS) { |
680 | slot += adj; |
681 | } |
682 | out.insert(som_report(it->report, slot)); |
683 | } |
684 | } |
685 | } |
686 | |
687 | static |
688 | u32 total_slots_used(const raw_som_dfa &rdfa) { |
689 | u32 rv = 0; |
690 | for (vector<dstate_som>::const_iterator it = rdfa.state_som.begin(); |
691 | it != rdfa.state_som.end(); ++it) { |
692 | for (som_tran_info::const_iterator jt = it->preds.begin(); |
693 | jt != it->preds.end(); ++jt) { |
694 | assert(jt->first != CREATE_NEW_SOM); |
695 | ENSURE_AT_LEAST(&rv, jt->first + 1); |
696 | } |
697 | } |
698 | const map<u32, u32> &new_soms = rdfa.new_som_nfa_states; |
699 | for (map<u32, u32>::const_iterator it = new_soms.begin(); |
700 | it != new_soms.end(); ++it) { |
701 | ENSURE_AT_LEAST(&rv, it->first + 1); |
702 | } |
703 | return rv; |
704 | } |
705 | |
706 | unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &dfas, |
707 | u32 limit) { |
708 | assert(!dfas.empty()); |
709 | |
710 | Automaton_Haig_Merge n(dfas); |
711 | |
712 | DEBUG_PRINTF("merging %zu dfas\n" , dfas.size()); |
713 | |
714 | bool unordered_som = false; |
715 | for (const auto &haig : dfas) { |
716 | assert(haig); |
717 | assert(haig->kind == dfas.front()->kind); |
718 | unordered_som |= haig->unordered_som_triggers; |
719 | if (haig->states.size() > limit) { |
720 | DEBUG_PRINTF("too many states!\n" ); |
721 | return nullptr; |
722 | } |
723 | } |
724 | |
725 | using StateSet = Automaton_Haig_Merge::StateSet; |
726 | vector<StateSet> nfa_state_map; |
727 | auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som, |
728 | NODE_START, |
729 | dfas[0]->stream_som_loc_width); |
730 | |
731 | if (!determinise(n, rdfa->states, limit, &nfa_state_map)) { |
732 | DEBUG_PRINTF("state limit (%u) exceeded\n" , limit); |
733 | return nullptr; /* over state limit */ |
734 | } |
735 | |
736 | rdfa->start_anchored = n.start_anchored; |
737 | rdfa->start_floating = n.start_floating; |
738 | rdfa->alpha_size = n.alphasize; |
739 | rdfa->alpha_remap = n.alpha; |
740 | |
741 | vector<u32> per_dfa_adj; |
742 | u32 curr_adj = 0; |
743 | for (const auto &haig : dfas) { |
744 | per_dfa_adj.push_back(curr_adj); |
745 | curr_adj += total_slots_used(*haig); |
746 | if (curr_adj < per_dfa_adj.back()) { |
747 | /* overflowed our som slot count */ |
748 | return nullptr; |
749 | } |
750 | } |
751 | |
752 | rdfa->state_som.reserve(rdfa->states.size()); |
753 | for (u32 i = 0; i < rdfa->states.size(); i++) { |
754 | rdfa->state_som.push_back(dstate_som()); |
755 | const vector<dstate_id_t> &source_nfa_states = nfa_state_map[i]; |
756 | DEBUG_PRINTF("finishing state %u\n" , i); |
757 | |
758 | haig_merge_do_preds(dfas, per_dfa_adj, source_nfa_states, |
759 | rdfa->state_som.back().preds); |
760 | |
761 | if (rdfa->state_som.back().preds.size() > HAIG_MAX_LIVE_SOM_SLOTS) { |
762 | DEBUG_PRINTF("som slot limit exceeded (%zu)\n" , |
763 | rdfa->state_som.back().preds.size()); |
764 | return nullptr; |
765 | } |
766 | |
767 | haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states, |
768 | false /* not eod */, |
769 | rdfa->state_som.back().reports); |
770 | haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states, |
771 | true /* eod */, |
772 | rdfa->state_som.back().reports_eod); |
773 | } |
774 | |
775 | haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states); |
776 | |
777 | DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n" , |
778 | rdfa->start_anchored, rdfa->start_floating); |
779 | |
780 | return rdfa; |
781 | } |
782 | |
783 | } // namespace ue2 |
784 | |