1/*
2 * Copyright (c) 2016-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "mcsheng_compile.h"
30
31#include "accel.h"
32#include "accelcompile.h"
33#include "grey.h"
34#include "mcclellancompile.h"
35#include "mcclellancompile_util.h"
36#include "mcsheng_internal.h"
37#include "nfa_internal.h"
38#include "rdfa_graph.h"
39#include "shufticompile.h"
40#include "trufflecompile.h"
41#include "ue2common.h"
42#include "util/alloc.h"
43#include "util/bitutils.h"
44#include "util/charreach.h"
45#include "util/compare.h"
46#include "util/compile_context.h"
47#include "util/container.h"
48#include "util/flat_containers.h"
49#include "util/graph.h"
50#include "util/graph_range.h"
51#include "util/make_unique.h"
52#include "util/order_check.h"
53#include "util/report_manager.h"
54#include "util/unaligned.h"
55#include "util/unordered.h"
56#include "util/verify_types.h"
57
58#include <algorithm>
59#include <cstdio>
60#include <cstdlib>
61#include <cstring>
62#include <map>
63#include <memory>
64#include <set>
65#include <deque>
66#include <vector>
67
68#include <boost/range/adaptor/map.hpp>
69
70using namespace std;
71using boost::adaptors::map_keys;
72
73namespace ue2 {
74
75namespace /* anon */ {
76
77#define MIN_SHENG_SIZE 6
78#define INVALID_SHENG_ID 255
79
80struct dstate_extra {
81 u16 daddytaken = 0;
82 bool shermanState = false;
83 bool sheng_succ = false;
84 u8 sheng_id = INVALID_SHENG_ID;
85};
86
87struct dfa_info {
88 accel_dfa_build_strat &strat;
89 raw_dfa &raw;
90 vector<dstate> &states;
91 vector<dstate_extra> extra;
92 const u16 alpha_size; /* including special symbols */
93 const array<u16, ALPHABET_SIZE> &alpha_remap;
94 vector<CharReach> rev_alpha;
95 const u16 impl_alpha_size;
96
97 u8 getAlphaShift() const;
98
99 explicit dfa_info(accel_dfa_build_strat &s)
100 : strat(s),
101 raw(s.get_raw()),
102 states(raw.states),
103 extra(raw.states.size()),
104 alpha_size(raw.alpha_size),
105 alpha_remap(raw.alpha_remap),
106 impl_alpha_size(raw.getImplAlphaSize()) {
107 rev_alpha.resize(impl_alpha_size);
108 for (u32 i = 0; i < N_CHARS; i++) {
109 rev_alpha[alpha_remap[i]].set(i);
110 }
111 }
112
113 dstate_id_t implId(dstate_id_t raw_id) const {
114 return states[raw_id].impl_id;
115 }
116
117 bool is_sherman(dstate_id_t raw_id) const {
118 return extra[raw_id].shermanState;
119 }
120
121 bool is_sheng(dstate_id_t raw_id) const {
122 return extra[raw_id].sheng_id != INVALID_SHENG_ID;
123 }
124
125 bool is_sheng_succ(dstate_id_t raw_id) const {
126 return extra[raw_id].sheng_succ;
127 }
128
129 /* states which use the normal transition/successor table */
130 bool is_normal(dstate_id_t raw_id) const {
131 return raw_id != DEAD_STATE && !is_sheng(raw_id) && !is_sherman(raw_id);
132 }
133 size_t size(void) const { return states.size(); }
134};
135
136u8 dfa_info::getAlphaShift() const {
137 if (impl_alpha_size < 2) {
138 return 1;
139 } else {
140 /* log2 round up */
141 return 32 - clz32(impl_alpha_size - 1);
142 }
143}
144
145} // namespace
146
147static
148mstate_aux *getAux(NFA *n, dstate_id_t i) {
149 mcsheng *m = (mcsheng *)getMutableImplNfa(n);
150 mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
151
152 mstate_aux *aux = aux_base + i;
153 assert((const char *)aux < (const char *)n + m->length);
154 return aux;
155}
156
157static
158void createShuffleMasks(mcsheng *m, const dfa_info &info,
159 dstate_id_t sheng_end,
160 const map<dstate_id_t, AccelScheme> &accel_escape_info) {
161 DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end);
162 assert(sheng_end > DEAD_STATE + 1);
163 assert(sheng_end <= sizeof(m128) + 1);
164 vector<array<u8, sizeof(m128)>> masks;
165 masks.resize(info.alpha_size);
166 /* -1 to avoid wasting a slot as we do not include dead state */
167 vector<dstate_id_t> raw_ids;
168 raw_ids.resize(sheng_end - 1);
169 for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) {
170 assert(info.implId(s)); /* should not map to DEAD_STATE */
171 if (info.is_sheng(s)) {
172 raw_ids[info.extra[s].sheng_id] = s;
173 }
174 }
175 for (u32 i = 0; i < info.alpha_size; i++) {
176 if (i == info.alpha_remap[TOP]) {
177 continue;
178 }
179 auto &mask = masks[i];
180 assert(sizeof(mask) == sizeof(m128));
181 mask.fill(0);
182
183 for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) {
184 dstate_id_t raw_id = raw_ids[sheng_id];
185 dstate_id_t next_id = info.implId(info.states[raw_id].next[i]);
186 if (next_id == DEAD_STATE) {
187 next_id = sheng_end - 1;
188 } else if (next_id < sheng_end) {
189 next_id--;
190 }
191 DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id);
192 mask[sheng_id] = verify_u8(next_id);
193 }
194 }
195 for (u32 i = 0; i < N_CHARS; i++) {
196 assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
197 memcpy((u8 *)&m->sheng_masks[i],
198 (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128));
199 }
200 m->sheng_end = sheng_end;
201 m->sheng_accel_limit = sheng_end - 1;
202
203 for (dstate_id_t s : raw_ids) {
204 if (contains(accel_escape_info, s)) {
205 LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id);
206 }
207 }
208}
209
210static
211void populateBasicInfo(size_t state_size, const dfa_info &info,
212 u32 total_size, u32 aux_offset, u32 accel_offset,
213 u32 accel_count, ReportID arb, bool single, NFA *nfa) {
214 assert(state_size == sizeof(u16) || state_size == sizeof(u8));
215
216 nfa->length = total_size;
217 nfa->nPositions = info.states.size();
218
219 nfa->scratchStateSize = verify_u32(state_size);
220 nfa->streamStateSize = verify_u32(state_size);
221
222 if (state_size == sizeof(u8)) {
223 nfa->type = MCSHENG_NFA_8;
224 } else {
225 nfa->type = MCSHENG_NFA_16;
226 }
227
228 mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
229 for (u32 i = 0; i < 256; i++) {
230 m->remap[i] = verify_u8(info.alpha_remap[i]);
231 }
232 m->alphaShift = info.getAlphaShift();
233 m->length = total_size;
234 m->aux_offset = aux_offset;
235 m->accel_offset = accel_offset;
236 m->arb_report = arb;
237 m->state_count = verify_u16(info.size());
238 m->start_anchored = info.implId(info.raw.start_anchored);
239 m->start_floating = info.implId(info.raw.start_floating);
240 m->has_accel = accel_count ? 1 : 0;
241
242 if (single) {
243 m->flags |= MCSHENG_FLAG_SINGLE;
244 }
245}
246
247static
248size_t calcShermanRegionSize(const dfa_info &info) {
249 size_t rv = 0;
250
251 for (size_t i = 0; i < info.size(); i++) {
252 if (info.is_sherman(i)) {
253 rv += SHERMAN_FIXED_SIZE;
254 }
255 }
256
257 return ROUNDUP_16(rv);
258}
259
260static
261void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
262 const vector<u32> &reports, const vector<u32> &reports_eod,
263 const vector<u32> &reportOffsets) {
264 const dstate &raw_state = info.states[i];
265 aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]];
266 aux->accept_eod = raw_state.reports_eod.empty() ? 0
267 : reportOffsets[reports_eod[i]];
268 aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]]
269 : info.raw.start_floating);
270}
271
272/* returns false on error */
273static
274bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end,
275 dstate_id_t *sherman_base) {
276 info.states[0].impl_id = 0; /* dead is always 0 */
277
278 vector<dstate_id_t> norm;
279 vector<dstate_id_t> sherm;
280 vector<dstate_id_t> norm_sheng_succ;
281 vector<dstate_id_t> sherm_sheng_succ;
282
283 if (info.size() > (1 << 16)) {
284 DEBUG_PRINTF("too many states\n");
285 *sherman_base = 0;
286 return false;
287 }
288
289 for (u32 i = 1; i < info.size(); i++) {
290 if (info.is_sheng(i)) {
291 continue; /* sheng impl ids have already been allocated */
292 } if (info.is_sherman(i)) {
293 if (info.is_sheng_succ(i)) {
294 sherm_sheng_succ.push_back(i);
295 } else {
296 sherm.push_back(i);
297 }
298 } else {
299 if (info.is_sheng_succ(i)) {
300 norm_sheng_succ.push_back(i);
301 } else {
302 norm.push_back(i);
303 }
304 }
305 }
306
307 dstate_id_t next_norm = sheng_end;
308 for (dstate_id_t s : norm_sheng_succ) {
309 info.states[s].impl_id = next_norm++;
310 }
311 if (next_norm + norm.size() + sherm_sheng_succ.size() > UINT8_MAX) {
312 /* we need to give sheng_succs ids which fit into a u8 -- demote these
313 * to normal states */
314 for (dstate_id_t s : sherm_sheng_succ) {
315 info.states[s].impl_id = next_norm++;
316 info.extra[s].shermanState = false;
317 }
318 sherm_sheng_succ.clear();
319 }
320 for (dstate_id_t s : norm) {
321 info.states[s].impl_id = next_norm++;
322 }
323
324 *sherman_base = next_norm;
325 dstate_id_t next_sherman = next_norm;
326
327 for (dstate_id_t s : sherm_sheng_succ) {
328 info.states[s].impl_id = next_sherman++;
329 }
330
331 for (dstate_id_t s : sherm) {
332 info.states[s].impl_id = next_sherman++;
333 }
334
335 /* Check to see if we haven't over allocated our states */
336 DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman,
337 (dstate_id_t)(next_sherman & STATE_MASK));
338 return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK);
339}
340
341typedef RdfaGraph::vertex_descriptor RdfaVertex;
342
343static
344bool mark_sheng_succs(const RdfaGraph &g, dfa_info &info,
345 const flat_set<RdfaVertex> &sheng_states) {
346 u32 exit_count = 0;
347
348 for (auto v : sheng_states) {
349 dstate_id_t s = g[v].index;
350 for (u32 i = 0; i != info.alpha_size; i++) {
351 if (i == info.alpha_remap[TOP]) {
352 continue;
353 }
354 dstate_id_t next = info.states[s].next[i];
355 if (!next || info.is_sheng(next) || info.is_sheng_succ(next)) {
356 continue;
357 }
358 exit_count++;
359 info.extra[next].sheng_succ = true;
360 }
361 }
362
363 if (exit_count + sheng_states.size() < UINT8_MAX) {
364 return true;
365 } else {
366 DEBUG_PRINTF("fail: unable to fit %u exits in byte", exit_count);
367 return false;
368 }
369}
370
371static
372CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
373 CharReach rv;
374 for (u32 i = 0; i < info.impl_alpha_size; i++) {
375 if (info.raw.states[u].next[i] == v) {
376 assert(info.rev_alpha[i].any());
377 rv |= info.rev_alpha[i];
378 }
379 }
380 assert(rv.any());
381 return rv;
382}
383
384#define MAX_SHENG_STATES 16
385#define MAX_SHENG_LEAKINESS 0.05
386
387using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>;
388
389/**
390 * Returns the proportion of strings of length 'depth' which will leave the
391 * sheng region when starting at state 'u'.
392 */
393static
394double leakiness(const RdfaGraph &g, dfa_info &info,
395 const flat_set<RdfaVertex> &sheng_states, RdfaVertex u,
396 u32 depth, LeakinessCache &cache) {
397 double rv = 0;
398 if (contains(cache, make_pair(u, depth))) {
399 return cache[make_pair(u, depth)];
400 }
401 for (RdfaVertex v : adjacent_vertices_range(u, g)) {
402 if (g[v].index == DEAD_STATE) {
403 continue;
404 }
405 double width = get_edge_reach(g[u].index, g[v].index, info).count();
406 width /= N_CHARS;
407
408 double weight;
409 if (!contains(sheng_states, v)) {
410 weight = 1;
411 } else if (depth > 1) {
412 weight = leakiness(g, info, sheng_states, v, depth - 1, cache);
413 } else {
414 continue; /* weight = 0 */
415 }
416 rv += width * weight;
417 }
418
419 cache[make_pair(u, depth)] = rv;
420 DEBUG_PRINTF("%zu [%u] q = %g\n", g[u].index, depth, rv);
421 return rv;
422}
423
424/**
425 * Returns the proportion of 8 byte strings which will leave the sheng region
426 * when starting at state 'u'.
427 */
428static
429double leakiness(const RdfaGraph &g, dfa_info &info,
430 const flat_set<RdfaVertex> &sheng_states, RdfaVertex u) {
431 LeakinessCache cache;
432 double rv = leakiness(g, info, sheng_states, u, 8, cache);
433 return rv;
434}
435
436static
437dstate_id_t find_sheng_states(dfa_info &info,
438 map<dstate_id_t, AccelScheme> &accel_escape_info) {
439 RdfaGraph g(info.raw);
440 auto cyclics = find_vertices_in_cycles(g);
441
442 auto base_cyclic = RdfaGraph::null_vertex();
443 for (const auto &v : cyclics) {
444 if (g[v].index == DEAD_STATE) {
445 continue;
446 }
447 DEBUG_PRINTF("considering cyclic %zu\n", g[v].index);
448 /* get an estimate of stickness of the cyclic: assume any edges from
449 * states with larger state ids are back edges */
450 CharReach est_back_reach;
451 for (const auto &u : inv_adjacent_vertices_range(v, g)) {
452 if (g[u].index < g[v].index) {
453 continue;
454 }
455 est_back_reach |= get_edge_reach(g[u].index, g[v].index, info);
456 }
457
458 if (est_back_reach.count() < 30) {
459 continue;
460 }
461 base_cyclic = v;
462 break;
463 }
464 if (!base_cyclic) {
465 return DEAD_STATE;
466 }
467
468 flat_set<RdfaVertex> sheng_states;
469 deque<RdfaVertex> to_consider = { base_cyclic };
470 flat_set<dstate_id_t> considered = { DEAD_STATE };
471 bool seen_back_edge = false;
472 while (!to_consider.empty()
473 && sheng_states.size() < MAX_SHENG_STATES) {
474 auto v = to_consider.front();
475 to_consider.pop_front();
476 if (!considered.insert(g[v].index).second) {
477 continue;
478 }
479
480 assert(!contains(sheng_states, v));
481
482 if (generates_callbacks(info.raw.kind)
483 && !info.states[g[v].index].reports.empty()) {
484 /* cannot raise callbacks from sheng region */
485 continue;
486 }
487
488 sheng_states.insert(v);
489 for (const auto &t : adjacent_vertices_range(v, g)) {
490 if (!contains(considered, g[t].index)) {
491 to_consider.push_back(t);
492 }
493 if (t == base_cyclic) {
494 seen_back_edge = true;
495 }
496 }
497 }
498
499 /* allocate normal ids */
500 dstate_id_t sheng_end = DEAD_STATE + 1;
501 for (auto v : sheng_states) {
502 dstate_id_t s = g[v].index;
503 if (!contains(accel_escape_info, s)) {
504 info.states[s].impl_id = sheng_end++;
505 info.extra[s].sheng_id = info.states[s].impl_id - 1;
506 }
507 }
508
509 /* allocate accel ids */
510 for (auto v : sheng_states) {
511 dstate_id_t s = g[v].index;
512 if (contains(accel_escape_info, s)) {
513 assert(!info.states[s].impl_id);
514 info.states[s].impl_id = sheng_end++;
515 info.extra[s].sheng_id = info.states[s].impl_id - 1;
516 }
517 }
518
519 if (sheng_states.size() < MIN_SHENG_SIZE) {
520 DEBUG_PRINTF("sheng region too small\n");
521 return DEAD_STATE;
522 }
523
524 if (!seen_back_edge) {
525 DEBUG_PRINTF("did not include cyclic\n");
526 return DEAD_STATE;
527 }
528
529 double leak = leakiness(g, info, sheng_states, base_cyclic);
530 if (leak > MAX_SHENG_LEAKINESS) {
531 DEBUG_PRINTF("too leaky (%g)\n", leak);
532 return DEAD_STATE;
533 }
534
535 if (!mark_sheng_succs(g, info, sheng_states)) {
536 return DEAD_STATE;
537 }
538
539 /* TODO: ensure sufficiently 'sticky' */
540 /* TODO: check not all states accel */
541 DEBUG_PRINTF("sheng_end = %hu\n", sheng_end);
542 return sheng_end;
543}
544
545static
546void fill_in_aux_info(NFA *nfa, const dfa_info &info,
547 const map<dstate_id_t, AccelScheme> &accel_escape_info,
548 u32 accel_offset, UNUSED u32 accel_end_offset,
549 const vector<u32> &reports,
550 const vector<u32> &reports_eod,
551 u32 report_base_offset,
552 const raw_report_info &ri) {
553 mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
554
555 vector<u32> reportOffsets;
556
557 ri.fillReportLists(nfa, report_base_offset, reportOffsets);
558
559 for (u32 i = 0; i < info.size(); i++) {
560 u16 impl_id = info.implId(i);
561 mstate_aux *this_aux = getAux(nfa, impl_id);
562
563 fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
564 if (contains(accel_escape_info, i)) {
565 this_aux->accel_offset = accel_offset;
566 accel_offset += info.strat.accelSize();
567 assert(accel_offset <= accel_end_offset);
568 assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
569 info.strat.buildAccel(i, accel_escape_info.at(i),
570 (void *)((char *)m + this_aux->accel_offset));
571 }
572 }
573}
574
575static
576u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) {
577 mstate_aux *aux = getAux(nfa, target_impl_id);
578 u16 flags = 0;
579
580 if (aux->accept) {
581 flags |= ACCEPT_FLAG;
582 }
583
584 if (aux->accel_offset) {
585 flags |= ACCEL_FLAG;
586 }
587
588 return flags;
589}
590
591static
592void fill_in_succ_table_16(NFA *nfa, const dfa_info &info,
593 dstate_id_t sheng_end,
594 UNUSED dstate_id_t sherman_base) {
595 u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng));
596
597 u8 alphaShift = info.getAlphaShift();
598 assert(alphaShift <= 8);
599
600 for (size_t i = 0; i < info.size(); i++) {
601 if (!info.is_normal(i)) {
602 assert(info.implId(i) < sheng_end || info.is_sherman(i));
603 continue;
604 }
605
606 assert(info.implId(i) < sherman_base);
607 u16 normal_id = verify_u16(info.implId(i) - sheng_end);
608
609 for (size_t s = 0; s < info.impl_alpha_size; s++) {
610 dstate_id_t raw_succ = info.states[i].next[s];
611 u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s];
612
613 entry = info.implId(raw_succ);
614 entry |= get_edge_flags(nfa, entry);
615 }
616 }
617}
618
619#define MAX_SHERMAN_LIST_LEN 8
620
621static
622void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate,
623 dstate_id_t max) {
624 if (candidate < max) {
625 dest.insert(candidate);
626 }
627}
628
629static
630void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source,
631 u16 alphasize, dstate_id_t curr_id) {
632 for (symbol_t s = 0; s < alphasize; s++) {
633 addIfEarlier(dest, source.next[s], curr_id);
634 }
635}
636
637/* \brief Returns a set of states to search for a better daddy. */
638static
639flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info,
640 dstate_id_t curr_id) {
641 flat_set<dstate_id_t> hinted;
642
643 addIfEarlier(hinted, 0, curr_id);
644 addIfEarlier(hinted, info.raw.start_anchored, curr_id);
645 addIfEarlier(hinted, info.raw.start_floating, curr_id);
646
647 // Add existing daddy and his successors, then search back one generation.
648 const u16 alphasize = info.impl_alpha_size;
649 dstate_id_t daddy = info.states[curr_id].daddy;
650 for (u32 level = 0; daddy && level < 2; level++) {
651 addIfEarlier(hinted, daddy, curr_id);
652 addSuccessors(hinted, info.states[daddy], alphasize, curr_id);
653 daddy = info.states[daddy].daddy;
654 }
655
656 return hinted;
657}
658
659#define MAX_SHERMAN_SELF_LOOP 20
660
661static
662void find_better_daddy(dfa_info &info, dstate_id_t curr_id,
663 bool any_cyclic_near_anchored_state, const Grey &grey) {
664 if (!grey.allowShermanStates) {
665 return;
666 }
667
668 const u16 width = sizeof(u16);
669 const u16 alphasize = info.impl_alpha_size;
670
671 if (info.raw.start_anchored != DEAD_STATE
672 && any_cyclic_near_anchored_state
673 && curr_id < alphasize * 3) {
674 /* crude attempt to prevent frequent states from being sherman'ed
675 * depends on the fact that states are numbers are currently in bfs
676 * order */
677 DEBUG_PRINTF("%hu is banned\n", curr_id);
678 return;
679 }
680
681 if (info.raw.start_floating != DEAD_STATE
682 && curr_id >= info.raw.start_floating
683 && curr_id < info.raw.start_floating + alphasize * 3) {
684 /* crude attempt to prevent frequent states from being sherman'ed
685 * depends on the fact that states are numbers are currently in bfs
686 * order */
687 DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating);
688 return;
689 }
690
691 const u16 full_state_size = width * alphasize;
692 const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN,
693 (full_state_size - 2)/(width + 1));
694 u16 best_score = 0;
695 dstate_id_t best_daddy = 0;
696 dstate &currState = info.states[curr_id];
697
698 flat_set<dstate_id_t> hinted = find_daddy_candidates(info, curr_id);
699
700 for (const dstate_id_t &donor : hinted) {
701 assert(donor < curr_id);
702 u32 score = 0;
703
704 if (!info.is_normal(donor)) {
705 continue;
706 }
707
708 const dstate &donorState = info.states[donor];
709 for (symbol_t s = 0; s < alphasize; s++) {
710 if (currState.next[s] == donorState.next[s]) {
711 score++;
712 }
713 }
714
715 /* prefer lower ids to provide some stability amongst potential
716 * siblings */
717 if (score > best_score || (score == best_score && donor < best_daddy)) {
718 best_daddy = donor;
719 best_score = score;
720
721 if (score == alphasize) {
722 break;
723 }
724 }
725 }
726
727 currState.daddy = best_daddy;
728 info.extra[curr_id].daddytaken = best_score;
729 DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy,
730 best_score, alphasize);
731
732 if (best_daddy == DEAD_STATE) {
733 return; /* No good daddy */
734 }
735
736 if (best_score + max_list_len < alphasize) {
737 return; /* ??? */
738 }
739
740 assert(info.is_normal(currState.daddy));
741
742 u32 self_loop_width = 0;
743 const dstate &curr_raw = info.states[curr_id];
744 for (unsigned i = 0; i < N_CHARS; i++) {
745 if (curr_raw.next[info.alpha_remap[i]] == curr_id) {
746 self_loop_width++;
747 }
748 }
749
750 if (self_loop_width > MAX_SHERMAN_SELF_LOOP) {
751 DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id,
752 self_loop_width);
753 return;
754 }
755
756 if (info.is_sheng(curr_id)) {
757 return;
758 }
759
760 DEBUG_PRINTF("%hu is sherman\n", curr_id);
761 info.extra[curr_id].shermanState = true;
762}
763
764static
765bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
766 symbol_t alphasize = raw.getImplAlphaSize();
767 for (symbol_t s = 0; s < alphasize; s++) {
768 dstate_id_t succ_id = raw.states[root].next[s];
769 if (succ_id == DEAD_STATE) {
770 continue;
771 }
772
773 const dstate &succ = raw.states[succ_id];
774 for (symbol_t t = 0; t < alphasize; t++) {
775 if (succ.next[t] == root || succ.next[t] == succ_id) {
776 return true;
777 }
778 }
779 }
780 return false;
781}
782
783static
784void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
785 char *nfa_base = (char *)nfa;
786 mcsheng *m = (mcsheng *)getMutableImplNfa(nfa);
787 char *sherman_table = nfa_base + m->sherman_offset;
788
789 assert(ISALIGNED_16(sherman_table));
790 for (size_t i = 0; i < info.size(); i++) {
791 if (!info.is_sherman(i)) {
792 continue;
793 }
794 u16 fs = verify_u16(info.implId(i));
795 DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs);
796
797 assert(fs >= sherman_limit);
798
799 char *curr_sherman_entry
800 = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
801 assert(curr_sherman_entry <= nfa_base + m->length);
802
803 u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken);
804 assert(len <= 9);
805 dstate_id_t d = info.states[i].daddy;
806
807 *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
808 *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
809 *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
810 u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
811
812 for (u16 s = 0; s < info.impl_alpha_size; s++) {
813 if (info.states[i].next[s] != info.states[d].next[s]) {
814 *(chars++) = (u8)s;
815 }
816 }
817
818 u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
819 for (u16 s = 0; s < info.impl_alpha_size; s++) {
820 if (info.states[i].next[s] != info.states[d].next[s]) {
821 DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
822 info.implId(d),
823 info.implId(info.states[i].next[s]));
824 u16 entry_val = info.implId(info.states[i].next[s]);
825 entry_val |= get_edge_flags(nfa, entry_val);
826 unaligned_store_u16((u8 *)states++, entry_val);
827 }
828 }
829 }
830}
831
832static
833bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
834 const map<dstate_id_t, AccelScheme> &accel_escape_info,
835 const Grey &grey) {
836 DEBUG_PRINTF("building mcsheng 16\n");
837
838 vector<u32> reports; /* index in ri for the appropriate report list */
839 vector<u32> reports_eod; /* as above */
840 ReportID arb;
841 u8 single;
842
843 assert(info.getAlphaShift() <= 8);
844
845 u16 total_daddy = 0;
846 for (u32 i = 0; i < info.size(); i++) {
847 find_better_daddy(info, i,
848 is_cyclic_near(info.raw, info.raw.start_anchored),
849 grey);
850 total_daddy += info.extra[i].daddytaken;
851 }
852
853 DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
854 info.size() * info.impl_alpha_size, info.size(),
855 info.impl_alpha_size);
856
857 u16 sherman_limit;
858 if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
859 DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
860 info.size());
861 return nullptr;
862 }
863 u16 count_real_states = sherman_limit - sheng_end;
864
865 auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
866
867 size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16)
868 * count_real_states;
869
870 size_t aux_size = sizeof(mstate_aux) * info.size();
871
872 size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size);
873 size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
874 size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
875 + ri->getReportListSize(), 32);
876 size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
877 size_t sherman_size = calcShermanRegionSize(info);
878
879 size_t total_size = sherman_offset + sherman_size;
880
881 accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
882 assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
883
884 auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
885 mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
886
887 populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
888 accel_escape_info.size(), arb, single, nfa.get());
889 createShuffleMasks(m, info, sheng_end, accel_escape_info);
890
891 /* copy in the mc header information */
892 m->sherman_offset = sherman_offset;
893 m->sherman_end = total_size;
894 m->sherman_limit = sherman_limit;
895
896 DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end,
897 count_real_states, info.size());
898
899 fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset,
900 sherman_offset - sizeof(NFA), reports, reports_eod,
901 aux_offset + aux_size, *ri);
902
903 fill_in_succ_table_16(nfa.get(), info, sheng_end, sherman_limit);
904
905 fill_in_sherman(nfa.get(), info, sherman_limit);
906
907 return nfa;
908}
909
910static
911void fill_in_succ_table_8(NFA *nfa, const dfa_info &info,
912 dstate_id_t sheng_end) {
913 u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng);
914
915 u8 alphaShift = info.getAlphaShift();
916 assert(alphaShift <= 8);
917
918 for (size_t i = 0; i < info.size(); i++) {
919 assert(!info.is_sherman(i));
920 if (!info.is_normal(i)) {
921 assert(info.implId(i) < sheng_end);
922 continue;
923 }
924 u8 normal_id = verify_u8(info.implId(i) - sheng_end);
925
926 for (size_t s = 0; s < info.impl_alpha_size; s++) {
927 dstate_id_t raw_succ = info.states[i].next[s];
928 succ_table[((size_t)normal_id << alphaShift) + s]
929 = info.implId(raw_succ);
930 }
931 }
932}
933
934static
935void allocateImplId8(dfa_info &info, dstate_id_t sheng_end,
936 const map<dstate_id_t, AccelScheme> &accel_escape_info,
937 u16 *accel_limit, u16 *accept_limit) {
938 info.states[0].impl_id = 0; /* dead is always 0 */
939
940 vector<dstate_id_t> norm;
941 vector<dstate_id_t> accel;
942 vector<dstate_id_t> accept;
943
944 assert(info.size() <= (1 << 8));
945
946 for (u32 i = 1; i < info.size(); i++) {
947 if (info.is_sheng(i)) {
948 continue; /* already allocated */
949 } else if (!info.states[i].reports.empty()) {
950 accept.push_back(i);
951 } else if (contains(accel_escape_info, i)) {
952 accel.push_back(i);
953 } else {
954 norm.push_back(i);
955 }
956 }
957
958 u32 j = sheng_end;
959 for (const dstate_id_t &s : norm) {
960 assert(j <= 256);
961 DEBUG_PRINTF("mapping state %u to %u\n", s, j);
962 info.states[s].impl_id = j++;
963 }
964 *accel_limit = j;
965 for (const dstate_id_t &s : accel) {
966 assert(j <= 256);
967 DEBUG_PRINTF("mapping state %u to %u\n", s, j);
968 info.states[s].impl_id = j++;
969 }
970 *accept_limit = j;
971 for (const dstate_id_t &s : accept) {
972 assert(j <= 256);
973 DEBUG_PRINTF("mapping state %u to %u\n", s, j);
974 info.states[s].impl_id = j++;
975 }
976}
977
978static
979bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
980 const map<dstate_id_t, AccelScheme> &accel_escape_info) {
981 DEBUG_PRINTF("building mcsheng 8\n");
982
983 vector<u32> reports;
984 vector<u32> reports_eod;
985 ReportID arb;
986 u8 single;
987
988 auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
989
990 size_t normal_count = info.size() - sheng_end;
991
992 size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count;
993 size_t aux_size = sizeof(mstate_aux) * info.size();
994 size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size);
995 size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
996 size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
997 + ri->getReportListSize(), 32);
998 size_t total_size = accel_offset + accel_size;
999
1000 DEBUG_PRINTF("aux_size %zu\n", aux_size);
1001 DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
1002 DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
1003 DEBUG_PRINTF("accel_size %zu\n", accel_size);
1004 DEBUG_PRINTF("accel_offset %zu\n", accel_offset);
1005 DEBUG_PRINTF("total_size %zu\n", total_size);
1006
1007 accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
1008 assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
1009
1010 auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
1011 mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get());
1012
1013 allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
1014 &m->accept_limit_8);
1015
1016 populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset,
1017 accel_escape_info.size(), arb, single, nfa.get());
1018 createShuffleMasks(m, info, sheng_end, accel_escape_info);
1019
1020 fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset,
1021 total_size - sizeof(NFA), reports, reports_eod,
1022 aux_offset + aux_size, *ri);
1023
1024 fill_in_succ_table_8(nfa.get(), info, sheng_end);
1025
1026 DEBUG_PRINTF("rl size %zu\n", ri->size());
1027
1028 return nfa;
1029}
1030
1031bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
1032 const ReportManager &rm) {
1033 if (!cc.grey.allowMcSheng) {
1034 return nullptr;
1035 }
1036
1037 mcclellan_build_strat mbs(raw, rm, false);
1038 dfa_info info(mbs);
1039 bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
1040
1041 if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
1042 * mode with our semantics */
1043 raw.stripExtraEodReports();
1044 }
1045
1046 bool has_eod_reports = raw.hasEodReports();
1047
1048 map<dstate_id_t, AccelScheme> accel_escape_info
1049 = info.strat.getAccelInfo(cc.grey);
1050
1051 dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info);
1052 if (sheng_end <= DEAD_STATE + 1) {
1053 return nullptr;
1054 }
1055
1056 bytecode_ptr<NFA> nfa;
1057 if (!using8bit) {
1058 nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey);
1059 } else {
1060 nfa = mcshengCompile8(info, sheng_end, accel_escape_info);
1061 }
1062
1063 if (!nfa) {
1064 return nfa;
1065 }
1066
1067 if (has_eod_reports) {
1068 nfa->flags |= NFA_ACCEPTS_EOD;
1069 }
1070
1071 DEBUG_PRINTF("compile done\n");
1072 return nfa;
1073}
1074
1075bool has_accel_mcsheng(const NFA *) {
1076 return true; /* consider the sheng region as accelerated */
1077}
1078
1079} // namespace ue2
1080