1/*
2 * Copyright (c) 2015-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "mcclellancompile.h"
30
31#include "accel.h"
32#include "accelcompile.h"
33#include "grey.h"
34#include "mcclellan_internal.h"
35#include "mcclellancompile_util.h"
36#include "nfa_internal.h"
37#include "shufticompile.h"
38#include "trufflecompile.h"
39#include "ue2common.h"
40#include "util/alloc.h"
41#include "util/bitutils.h"
42#include "util/charreach.h"
43#include "util/compare.h"
44#include "util/compile_context.h"
45#include "util/container.h"
46#include "util/make_unique.h"
47#include "util/order_check.h"
48#include "util/report_manager.h"
49#include "util/flat_containers.h"
50#include "util/unaligned.h"
51#include "util/verify_types.h"
52
53#include <algorithm>
54#include <cstdio>
55#include <cstdlib>
56#include <cstring>
57#include <map>
58#include <memory>
59#include <queue>
60#include <set>
61#include <vector>
62
63#include <boost/range/adaptor/map.hpp>
64
65#include "mcclellandump.h"
66#include "util/dump_util.h"
67#include "util/dump_charclass.h"
68
69using namespace std;
70using boost::adaptors::map_keys;
71using boost::dynamic_bitset;
72
73#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
74
75/** Maximum tolerated number of escape character from an accel state.
76 * This is larger than nfa, as we don't have a budget and the nfa cheats on stop
77 * characters for sets of states */
78#define ACCEL_DFA_MAX_STOP_CHAR 160
79
80/** Maximum tolerated number of escape character from a sds accel state. Larger
81 * than normal states as accelerating sds is important. Matches NFA value */
82#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
83
84namespace ue2 {
85
86namespace /* anon */ {
87
88struct dstate_extra {
89 u16 daddytaken = 0;
90 bool shermanState = false;
91 bool wideState = false;
92 bool wideHead = false;
93};
94
95struct dfa_info {
96 accel_dfa_build_strat &strat;
97 raw_dfa &raw;
98 vector<dstate> &states;
99 vector<dstate_extra> extra;
100 vector<vector<dstate_id_t>> wide_state_chain;
101 vector<vector<symbol_t>> wide_symbol_chain;
102 const u16 alpha_size; /* including special symbols */
103 const array<u16, ALPHABET_SIZE> &alpha_remap;
104 const u16 impl_alpha_size;
105
106 u8 getAlphaShift() const;
107
108 explicit dfa_info(accel_dfa_build_strat &s)
109 : strat(s),
110 raw(s.get_raw()),
111 states(raw.states),
112 extra(raw.states.size()),
113 alpha_size(raw.alpha_size),
114 alpha_remap(raw.alpha_remap),
115 impl_alpha_size(raw.getImplAlphaSize()) {}
116
117 dstate_id_t implId(dstate_id_t raw_id) const {
118 return states[raw_id].impl_id;
119 }
120
121 bool is_sherman(dstate_id_t raw_id) const {
122 return extra[raw_id].shermanState;
123 }
124
125 bool is_widestate(dstate_id_t raw_id) const {
126 return extra[raw_id].wideState;
127 }
128
129 bool is_widehead(dstate_id_t raw_id) const {
130 return extra[raw_id].wideHead;
131 }
132
133 size_t size(void) const { return states.size(); }
134};
135
136u8 dfa_info::getAlphaShift() const {
137 if (impl_alpha_size < 2) {
138 return 1;
139 } else {
140 /* log2 round up */
141 return 32 - clz32(impl_alpha_size - 1);
142 }
143}
144
145struct state_prev_info {
146 vector<vector<dstate_id_t>> prev_vec;
147 explicit state_prev_info(size_t alpha_size) : prev_vec(alpha_size) {}
148};
149
150struct DfaPrevInfo {
151 u16 impl_alpha_size;
152 u16 state_num;
153 vector<state_prev_info> states;
154 set<dstate_id_t> accepts;
155
156 explicit DfaPrevInfo(raw_dfa &rdfa);
157};
158
159DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa)
160 : impl_alpha_size(rdfa.getImplAlphaSize()), state_num(rdfa.states.size()),
161 states(state_num, state_prev_info(impl_alpha_size)){
162 for (size_t i = 0; i < states.size(); i++) {
163 for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
164 dstate_id_t curr = rdfa.states[i].next[sym];
165 states[curr].prev_vec[sym].push_back(i);
166 }
167 if (!rdfa.states[i].reports.empty()
168 || !rdfa.states[i].reports_eod.empty()) {
169 DEBUG_PRINTF("accept raw state: %ld\n", i);
170 accepts.insert(i);
171 }
172 }
173}
174} // namespace
175
176static
177mstate_aux *getAux(NFA *n, dstate_id_t i) {
178 assert(isMcClellanType(n->type));
179
180 mcclellan *m = (mcclellan *)getMutableImplNfa(n);
181 mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset);
182
183 mstate_aux *aux = aux_base + i;
184 assert((const char *)aux < (const char *)n + m->length);
185 return aux;
186}
187
188static
189void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) {
190 assert((size_t)succ_table % 2 == 0);
191 assert(n->type == MCCLELLAN_NFA_16);
192 u8 alphaShift = info.getAlphaShift();
193 u16 alphaSize = info.impl_alpha_size;
194 mcclellan *m = (mcclellan *)getMutableImplNfa(n);
195
196 /* handle the normal states */
197 for (u32 i = 0; i < m->sherman_limit; i++) {
198 for (size_t j = 0; j < alphaSize; j++) {
199 size_t c_prime = (i << alphaShift) + j;
200
201 // wide state has no aux structure.
202 if (m->has_wide && succ_table[c_prime] >= m->wide_limit) {
203 continue;
204 }
205
206 mstate_aux *aux = getAux(n, succ_table[c_prime]);
207
208 if (aux->accept) {
209 succ_table[c_prime] |= ACCEPT_FLAG;
210 }
211
212 if (aux->accel_offset) {
213 succ_table[c_prime] |= ACCEL_FLAG;
214 }
215 }
216 }
217
218 /* handle the sherman states */
219 char *sherman_base_offset = (char *)n + m->sherman_offset;
220 u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count;
221 for (u16 j = m->sherman_limit; j < sherman_ceil; j++) {
222 char *sherman_cur
223 = findMutableShermanState(sherman_base_offset, m->sherman_limit, j);
224 assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE);
225 u8 len = *(u8 *)(sherman_cur + SHERMAN_LEN_OFFSET);
226 u16 *succs = (u16 *)(sherman_cur + SHERMAN_STATES_OFFSET(len));
227
228 for (u8 i = 0; i < len; i++) {
229 u16 succ_i = unaligned_load_u16((u8 *)&succs[i]);
230 // wide state has no aux structure.
231 if (m->has_wide && succ_i >= m->wide_limit) {
232 continue;
233 }
234
235 mstate_aux *aux = getAux(n, succ_i);
236
237 if (aux->accept) {
238 succ_i |= ACCEPT_FLAG;
239 }
240
241 if (aux->accel_offset) {
242 succ_i |= ACCEL_FLAG;
243 }
244
245 unaligned_store_u16((u8 *)&succs[i], succ_i);
246 }
247 }
248
249 /* handle the wide states */
250 if (m->has_wide) {
251 u32 wide_limit = m->wide_limit;
252 char *wide_base = (char *)n + m->wide_offset;
253 assert(*wide_base == WIDE_STATE);
254 u16 wide_number = verify_u16(info.wide_symbol_chain.size());
255 // traverse over wide head states.
256 for (u16 j = wide_limit; j < wide_limit + wide_number; j++) {
257 char *wide_cur
258 = findMutableWideEntry16(wide_base, wide_limit, j);
259 u16 width = *(const u16 *)(wide_cur + WIDE_WIDTH_OFFSET);
260 u16 *trans = (u16 *)(wide_cur + WIDE_TRANSITION_OFFSET16(width));
261
262 // check successful transition
263 u16 next = unaligned_load_u16((u8 *)trans);
264 if (next < wide_limit) {
265 mstate_aux *aux = getAux(n, next);
266 if (aux->accept) {
267 next |= ACCEPT_FLAG;
268 }
269 if (aux->accel_offset) {
270 next |= ACCEL_FLAG;
271 }
272 unaligned_store_u16((u8 *)trans, next);
273 }
274 trans++;
275
276 // check failure transition
277 for (symbol_t k = 0; k < alphaSize; k++) {
278 u16 next_k = unaligned_load_u16((u8 *)&trans[k]);
279 if (next_k >= wide_limit) {
280 continue;
281 }
282 mstate_aux *aux_k = getAux(n, next_k);
283 if (aux_k->accept) {
284 next_k |= ACCEPT_FLAG;
285 }
286 if (aux_k->accel_offset) {
287 next_k |= ACCEL_FLAG;
288 }
289 unaligned_store_u16((u8 *)&trans[k], next_k);
290 }
291 }
292 }
293}
294
295u32 mcclellan_build_strat::max_allowed_offset_accel() const {
296 return ACCEL_DFA_MAX_OFFSET_DEPTH;
297}
298
299u32 mcclellan_build_strat::max_stop_char() const {
300 return ACCEL_DFA_MAX_STOP_CHAR;
301}
302
303u32 mcclellan_build_strat::max_floating_stop_char() const {
304 return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
305}
306
307static
308void populateBasicInfo(size_t state_size, const dfa_info &info,
309 u32 total_size, u32 aux_offset, u32 accel_offset,
310 u32 accel_count, ReportID arb, bool single, NFA *nfa) {
311 assert(state_size == sizeof(u16) || state_size == sizeof(u8));
312
313 nfa->length = total_size;
314 nfa->nPositions = info.states.size();
315
316 nfa->scratchStateSize = verify_u32(state_size);
317 nfa->streamStateSize = verify_u32(state_size);
318
319 if (state_size == sizeof(u8)) {
320 nfa->type = MCCLELLAN_NFA_8;
321 } else {
322 nfa->type = MCCLELLAN_NFA_16;
323 }
324
325 mcclellan *m = (mcclellan *)getMutableImplNfa(nfa);
326 for (u32 i = 0; i < 256; i++) {
327 m->remap[i] = verify_u8(info.alpha_remap[i]);
328 }
329 m->alphaShift = info.getAlphaShift();
330 m->length = total_size;
331 m->aux_offset = aux_offset;
332 m->accel_offset = accel_offset;
333 m->arb_report = arb;
334 m->state_count = verify_u16(info.size());
335 m->start_anchored = info.implId(info.raw.start_anchored);
336 m->start_floating = info.implId(info.raw.start_floating);
337 m->has_accel = accel_count ? 1 : 0;
338 m->has_wide = info.wide_state_chain.size() > 0 ? 1 : 0;
339
340 if (state_size == sizeof(u8) && m->has_wide == 1) {
341 // allocate 1 more byte for wide state use.
342 nfa->scratchStateSize += sizeof(u8);
343 nfa->streamStateSize += sizeof(u8);
344 }
345
346 if (state_size == sizeof(u16) && m->has_wide == 1) {
347 // allocate 2 more bytes for wide state use.
348 nfa->scratchStateSize += sizeof(u16);
349 nfa->streamStateSize += sizeof(u16);
350 }
351
352 if (single) {
353 m->flags |= MCCLELLAN_FLAG_SINGLE;
354 }
355}
356
357namespace {
358
359struct raw_report_list {
360 flat_set<ReportID> reports;
361
362 raw_report_list(const flat_set<ReportID> &reports_in,
363 const ReportManager &rm, bool do_remap) {
364 if (do_remap) {
365 for (auto &id : reports_in) {
366 reports.insert(rm.getProgramOffset(id));
367 }
368 } else {
369 reports = reports_in;
370 }
371 }
372
373 bool operator<(const raw_report_list &b) const {
374 return reports < b.reports;
375 }
376};
377
378struct raw_report_info_impl : public raw_report_info {
379 vector<raw_report_list> rl;
380 u32 getReportListSize() const override;
381 size_t size() const override;
382 void fillReportLists(NFA *n, size_t base_offset,
383 std::vector<u32> &ro /* out */) const override;
384};
385}
386
387unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
388 vector<u32> &reports,
389 vector<u32> &reports_eod,
390 u8 *isSingleReport,
391 ReportID *arbReport) const {
392 DEBUG_PRINTF("gathering reports\n");
393
394 const bool remap_reports = has_managed_reports(rdfa.kind);
395
396 auto ri = ue2::make_unique<raw_report_info_impl>();
397 map<raw_report_list, u32> rev;
398
399 for (const dstate &s : rdfa.states) {
400 if (s.reports.empty()) {
401 reports.push_back(MO_INVALID_IDX);
402 continue;
403 }
404
405 raw_report_list rrl(s.reports, rm, remap_reports);
406 DEBUG_PRINTF("non empty r\n");
407 auto it = rev.find(rrl);
408 if (it != rev.end()) {
409 reports.push_back(it->second);
410 } else {
411 DEBUG_PRINTF("adding to rl %zu\n", ri->size());
412 rev.emplace(rrl, ri->size());
413 reports.push_back(ri->size());
414 ri->rl.push_back(rrl);
415 }
416 }
417
418 for (const dstate &s : rdfa.states) {
419 if (s.reports_eod.empty()) {
420 reports_eod.push_back(MO_INVALID_IDX);
421 continue;
422 }
423
424 DEBUG_PRINTF("non empty r eod\n");
425 raw_report_list rrl(s.reports_eod, rm, remap_reports);
426 auto it = rev.find(rrl);
427 if (it != rev.end()) {
428 reports_eod.push_back(it->second);
429 continue;
430 }
431
432 DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
433 rev.emplace(rrl, ri->size());
434 reports_eod.push_back(ri->size());
435 ri->rl.push_back(rrl);
436 }
437
438 assert(!ri->rl.empty()); /* all components should be able to generate
439 reports */
440 if (!ri->rl.empty()) {
441 *arbReport = *ri->rl.begin()->reports.begin();
442 } else {
443 *arbReport = 0;
444 }
445
446 /* if we have only a single report id generated from all accepts (not eod)
447 * we can take some short cuts */
448 flat_set<ReportID> reps;
449
450 for (u32 rl_index : reports) {
451 if (rl_index == MO_INVALID_IDX) {
452 continue;
453 }
454 assert(rl_index < ri->size());
455 insert(&reps, ri->rl[rl_index].reports);
456 }
457
458 if (reps.size() == 1) {
459 *isSingleReport = 1;
460 *arbReport = *reps.begin();
461 DEBUG_PRINTF("single -- %u\n", *arbReport);
462 } else {
463 *isSingleReport = 0;
464 }
465
466 return ri;
467}
468
469u32 raw_report_info_impl::getReportListSize() const {
470 u32 rv = 0;
471
472 for (const auto &reps : rl) {
473 rv += sizeof(report_list);
474 rv += sizeof(ReportID) * reps.reports.size();
475 }
476
477 return rv;
478}
479
480size_t raw_report_info_impl::size() const {
481 return rl.size();
482}
483
484void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
485 vector<u32> &ro) const {
486 for (const auto &reps : rl) {
487 ro.push_back(base_offset);
488
489 report_list *p = (report_list *)((char *)n + base_offset);
490
491 u32 i = 0;
492 for (const ReportID report : reps.reports) {
493 p->report[i++] = report;
494 }
495 p->count = verify_u32(reps.reports.size());
496
497 base_offset += sizeof(report_list);
498 base_offset += sizeof(ReportID) * reps.reports.size();
499 }
500}
501
502static
503void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
504 set<dstate_id_t> *accel_states) {
505 for (dstate_id_t i : accel_escape_info | map_keys) {
506 accel_states->insert(i);
507 }
508}
509
510static
511size_t calcShermanRegionSize(const dfa_info &info) {
512 size_t rv = 0;
513
514 for (size_t i = 0; i < info.size(); i++) {
515 if (info.is_sherman(i)) {
516 rv += SHERMAN_FIXED_SIZE;
517 }
518 }
519
520 return ROUNDUP_16(rv);
521}
522
523static
524size_t calcWideRegionSize(const dfa_info &info) {
525 if (info.wide_state_chain.empty()) {
526 return 0;
527 }
528
529 // wide info header
530 size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 4;
531
532 // wide info body
533 for (const auto &chain : info.wide_symbol_chain) {
534 rv += ROUNDUP_N(chain.size(), 2) +
535 (info.impl_alpha_size + 1) * sizeof(u16) + 2;
536 }
537
538 return ROUNDUP_16(rv);
539}
540
541static
542void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info,
543 const vector<u32> &reports, const vector<u32> &reports_eod,
544 vector<u32> &reportOffsets) {
545 const dstate &raw_state = info.states[i];
546 aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]];
547 aux->accept_eod = raw_state.reports_eod.empty() ? 0
548 : reportOffsets[reports_eod[i]];
549 aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]]
550 : info.raw.start_floating);
551}
552
553/* returns false on error */
554static
555bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base,
556 dstate_id_t *wide_limit) {
557 info.states[0].impl_id = 0; /* dead is always 0 */
558
559 vector<dstate_id_t> norm;
560 vector<dstate_id_t> sherm;
561 vector<dstate_id_t> wideHead;
562 vector<dstate_id_t> wideState;
563
564 if (info.size() > (1 << 16)) {
565 DEBUG_PRINTF("too many states\n");
566 *wide_limit = 0;
567 return false;
568 }
569
570 for (u32 i = 1; i < info.size(); i++) {
571 if (info.is_widehead(i)) {
572 wideHead.push_back(i);
573 } else if (info.is_widestate(i)) {
574 wideState.push_back(i);
575 } else if (info.is_sherman(i)) {
576 sherm.push_back(i);
577 } else {
578 norm.push_back(i);
579 }
580 }
581
582 dstate_id_t next = 1;
583 for (const dstate_id_t &s : norm) {
584 DEBUG_PRINTF("[norm] mapping state %u to %u\n", s, next);
585 info.states[s].impl_id = next++;
586 }
587
588 *sherman_base = next;
589 for (const dstate_id_t &s : sherm) {
590 DEBUG_PRINTF("[sherm] mapping state %u to %u\n", s, next);
591 info.states[s].impl_id = next++;
592 }
593
594 *wide_limit = next;
595 for (const dstate_id_t &s : wideHead) {
596 DEBUG_PRINTF("[widehead] mapping state %u to %u\n", s, next);
597 info.states[s].impl_id = next++;
598 }
599
600 for (const dstate_id_t &s : wideState) {
601 DEBUG_PRINTF("[wide] mapping state %u to %u\n", s, next);
602 info.states[s].impl_id = next++;
603 }
604
605 /* Check to see if we haven't over allocated our states */
606 DEBUG_PRINTF("next sherman %u masked %u\n", next,
607 (dstate_id_t)(next & STATE_MASK));
608 return (next - 1) == ((next - 1) & STATE_MASK);
609}
610
611static
612bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
613 set<dstate_id_t> *accel_states) {
614 DEBUG_PRINTF("building mcclellan 16\n");
615
616 vector<u32> reports; /* index in ri for the appropriate report list */
617 vector<u32> reports_eod; /* as above */
618 ReportID arb;
619 u8 single;
620
621 u8 alphaShift = info.getAlphaShift();
622 assert(alphaShift <= 8);
623
624 u16 count_real_states;
625 u16 wide_limit;
626 if (!allocateFSN16(info, &count_real_states, &wide_limit)) {
627 DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
628 info.size());
629 return nullptr;
630 }
631
632 DEBUG_PRINTF("count_real_states: %d\n", count_real_states);
633 DEBUG_PRINTF("non_wide_states: %d\n", wide_limit);
634
635 auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
636 map<dstate_id_t, AccelScheme> accel_escape_info
637 = info.strat.getAccelInfo(cc.grey);
638
639 size_t tran_size = (1 << info.getAlphaShift())
640 * sizeof(u16) * count_real_states;
641
642 size_t aux_size = sizeof(mstate_aux) * wide_limit;
643
644 size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
645 size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
646 size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
647 + ri->getReportListSize(), 32);
648 size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
649 size_t sherman_size = calcShermanRegionSize(info);
650 size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size);
651 size_t wide_size = calcWideRegionSize(info);
652 size_t total_size = wide_offset + wide_size;
653
654 accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
655 assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
656
657 DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
658 DEBUG_PRINTF("aux_size %zu\n", aux_size);
659 DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
660 DEBUG_PRINTF("accel_offset %zu\n", accel_offset + sizeof(NFA));
661 DEBUG_PRINTF("accel_size %zu\n", accel_size);
662 DEBUG_PRINTF("sherman_offset %zu\n", sherman_offset);
663 DEBUG_PRINTF("sherman_size %zu\n", sherman_size);
664 DEBUG_PRINTF("wide_offset %zu\n", wide_offset);
665 DEBUG_PRINTF("wide_size %zu\n", wide_size);
666 DEBUG_PRINTF("total_size %zu\n", total_size);
667
668 auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
669 char *nfa_base = (char *)nfa.get();
670
671 populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
672 accel_escape_info.size(), arb, single, nfa.get());
673
674 vector<u32> reportOffsets;
675
676 ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets);
677
678 u16 *succ_table = (u16 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan));
679 mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset);
680 mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get());
681
682 m->wide_limit = wide_limit;
683 m->wide_offset = wide_offset;
684
685 /* copy in the mc header information */
686 m->sherman_offset = sherman_offset;
687 m->sherman_end = total_size;
688 m->sherman_limit = count_real_states;
689
690 /* do normal states */
691 for (size_t i = 0; i < info.size(); i++) {
692 if (info.is_sherman(i) || info.is_widestate(i)) {
693 continue;
694 }
695
696 u16 fs = info.implId(i);
697 mstate_aux *this_aux = getAux(nfa.get(), fs);
698
699 assert(fs < count_real_states);
700
701 for (size_t j = 0; j < info.impl_alpha_size; j++) {
702 succ_table[(fs << alphaShift) + j] =
703 info.implId(info.states[i].next[j]);
704 }
705
706 fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets);
707
708 if (contains(accel_escape_info, i)) {
709 this_aux->accel_offset = accel_offset;
710 accel_offset += info.strat.accelSize();
711 assert(accel_offset + sizeof(NFA) <= sherman_offset);
712 assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
713 info.strat.buildAccel(i, accel_escape_info.at(i),
714 (void *)((char *)m + this_aux->accel_offset));
715 }
716 }
717
718 /* do sherman states */
719 char *sherman_table = nfa_base + m->sherman_offset;
720 assert(ISALIGNED_16(sherman_table));
721 for (size_t i = 0; i < info.size(); i++) {
722 if (!info.is_sherman(i)) {
723 continue;
724 }
725
726 u16 fs = verify_u16(info.implId(i));
727 mstate_aux *this_aux = getAux(nfa.get(), fs);
728
729 assert(fs >= count_real_states);
730 assert(fs < wide_limit);
731
732 char *curr_sherman_entry
733 = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
734 assert(curr_sherman_entry <= nfa_base + m->length);
735
736 fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
737
738 if (contains(accel_escape_info, i)) {
739 this_aux->accel_offset = accel_offset;
740 accel_offset += info.strat.accelSize();
741 assert(accel_offset + sizeof(NFA) <= sherman_offset);
742 assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
743 info.strat.buildAccel(i, accel_escape_info.at(i),
744 (void *)((char *)m + this_aux->accel_offset));
745 }
746
747 u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken);
748 assert(len <= 9);
749 dstate_id_t d = info.states[i].daddy;
750
751 *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
752 *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
753 *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
754 u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
755
756 for (u16 s = 0; s < info.impl_alpha_size; s++) {
757 if (info.states[i].next[s] != info.states[d].next[s]) {
758 *(chars++) = (u8)s;
759 }
760 }
761
762 u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
763 for (u16 s = 0; s < info.impl_alpha_size; s++) {
764 if (info.states[i].next[s] != info.states[d].next[s]) {
765 DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n",
766 fs, info.implId(d),
767 info.implId(info.states[i].next[s]));
768 unaligned_store_u16((u8 *)states++,
769 info.implId(info.states[i].next[s]));
770 }
771 }
772 }
773
774 if (!info.wide_state_chain.empty()) {
775 /* do wide states using info */
776 u16 wide_number = verify_u16(info.wide_symbol_chain.size());
777 char *wide_base = nfa_base + m->wide_offset;
778 assert(ISALIGNED_16(wide_base));
779
780 char *wide_top = wide_base;
781 *(u8 *)(wide_top++) = WIDE_STATE;
782 wide_top = ROUNDUP_PTR(wide_top, 2);
783 *(u16 *)(wide_top) = wide_number;
784 wide_top += 2;
785
786 char *curr_wide_entry = wide_top + wide_number * sizeof(u32);
787 u32 *wide_offset_list = (u32 *)wide_top;
788
789 /* get the order of writing wide states */
790 vector<size_t> order(wide_number);
791 for (size_t i = 0; i < wide_number; i++) {
792 dstate_id_t head = info.wide_state_chain[i].front();
793 size_t pos = info.implId(head) - m->wide_limit;
794 order[pos] = i;
795 }
796
797 for (size_t i : order) {
798 vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
799 vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i];
800
801 u16 width = verify_u16(symbol_chain.size());
802 *(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width;
803 u8 *chars = (u8 *)(curr_wide_entry + WIDE_SYMBOL_OFFSET16);
804
805 // store wide state symbol chain
806 for (size_t j = 0; j < width; j++) {
807 *(chars++) = verify_u8(symbol_chain[j]);
808 }
809
810 // store wide state transition table
811 u16 *trans = (u16 *)(curr_wide_entry
812 + WIDE_TRANSITION_OFFSET16(width));
813 dstate_id_t tail = state_chain[width - 1];
814 symbol_t last = symbol_chain[width -1];
815 dstate_id_t tran = info.states[tail].next[last];
816 // 1. successful transition
817 *trans++ = info.implId(tran);
818 // 2. failure transition
819 for (size_t j = 0; verify_u16(j) < width - 1; j++) {
820 if (symbol_chain[j] != last) {
821 tran = info.states[state_chain[j]].next[last];
822 }
823 }
824 for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) {
825 if (sym != last) {
826 *trans++ = info.implId(info.states[tail].next[sym]);
827 }
828 else {
829 *trans++ = info.implId(tran);
830 }
831 }
832
833 *wide_offset_list++ = verify_u32(curr_wide_entry - wide_base);
834
835 curr_wide_entry = (char *)trans;
836 }
837 }
838
839 markEdges(nfa.get(), succ_table, info);
840
841 if (accel_states && nfa) {
842 fillAccelOut(accel_escape_info, accel_states);
843 }
844
845 return nfa;
846}
847
848static
849void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table,
850 const vector<u32> &reportOffsets,
851 const vector<u32> &reports,
852 const vector<u32> &reports_eod, u32 i) {
853 dstate_id_t j = info.implId(i);
854 u8 alphaShift = info.getAlphaShift();
855 assert(alphaShift <= 8);
856
857 for (size_t s = 0; s < info.impl_alpha_size; s++) {
858 dstate_id_t raw_succ = info.states[i].next[s];
859 succ_table[(j << alphaShift) + s] = info.implId(raw_succ);
860 }
861
862 aux[j].accept = 0;
863 aux[j].accept_eod = 0;
864
865 if (!info.states[i].reports.empty()) {
866 DEBUG_PRINTF("i=%u r[i]=%u\n", i, reports[i]);
867 assert(reports[i] != MO_INVALID_IDX);
868 aux[j].accept = reportOffsets[reports[i]];
869 }
870
871 if (!info.states[i].reports_eod.empty()) {
872 DEBUG_PRINTF("i=%u re[i]=%u\n", i, reports_eod[i]);
873 aux[j].accept_eod = reportOffsets[reports_eod[i]];
874 }
875
876 dstate_id_t raw_top = i ? info.states[i].next[info.alpha_remap[TOP]]
877 : info.raw.start_floating;
878
879 aux[j].top = info.implId(raw_top);
880}
881
882static
883void allocateFSN8(dfa_info &info,
884 const map<dstate_id_t, AccelScheme> &accel_escape_info,
885 u16 *accel_limit, u16 *accept_limit) {
886 info.states[0].impl_id = 0; /* dead is always 0 */
887
888 vector<dstate_id_t> norm;
889 vector<dstate_id_t> accel;
890 vector<dstate_id_t> accept;
891
892 assert(info.size() <= (1 << 8));
893
894 for (u32 i = 1; i < info.size(); i++) {
895 if (!info.states[i].reports.empty()) {
896 accept.push_back(i);
897 } else if (contains(accel_escape_info, i)) {
898 accel.push_back(i);
899 } else {
900 norm.push_back(i);
901 }
902 }
903
904 u32 j = 1; /* dead is already at 0 */
905 for (const dstate_id_t &s : norm) {
906 assert(j <= 256);
907 DEBUG_PRINTF("mapping state %u to %u\n", s, j);
908 info.states[s].impl_id = j++;
909 }
910 *accel_limit = j;
911 for (const dstate_id_t &s : accel) {
912 assert(j <= 256);
913 DEBUG_PRINTF("mapping state %u to %u\n", s, j);
914 info.states[s].impl_id = j++;
915 }
916 *accept_limit = j;
917 for (const dstate_id_t &s : accept) {
918 assert(j <= 256);
919 DEBUG_PRINTF("mapping state %u to %u\n", s, j);
920 info.states[s].impl_id = j++;
921 }
922}
923
924static
925bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc,
926 set<dstate_id_t> *accel_states) {
927 DEBUG_PRINTF("building mcclellan 8\n");
928
929 vector<u32> reports;
930 vector<u32> reports_eod;
931 ReportID arb;
932 u8 single;
933
934 auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
935 map<dstate_id_t, AccelScheme> accel_escape_info
936 = info.strat.getAccelInfo(cc.grey);
937
938 size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size();
939 size_t aux_size = sizeof(mstate_aux) * info.size();
940 size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
941 size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
942 size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
943 + ri->getReportListSize(), 32);
944 size_t total_size = accel_offset + accel_size;
945
946 DEBUG_PRINTF("aux_size %zu\n", aux_size);
947 DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
948 DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
949 DEBUG_PRINTF("accel_size %zu\n", accel_size);
950 DEBUG_PRINTF("accel_offset %zu\n", accel_offset);
951 DEBUG_PRINTF("total_size %zu\n", total_size);
952
953 accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */
954 assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
955
956 auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
957 char *nfa_base = (char *)nfa.get();
958
959 mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get());
960
961 allocateFSN8(info, accel_escape_info, &m->accel_limit_8,
962 &m->accept_limit_8);
963 populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset,
964 accel_escape_info.size(), arb, single, nfa.get());
965
966 vector<u32> reportOffsets;
967
968 ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets);
969
970 /* copy in the state information */
971 u8 *succ_table = (u8 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan));
972 mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset);
973
974 for (size_t i = 0; i < info.size(); i++) {
975 if (contains(accel_escape_info, i)) {
976 u32 j = info.implId(i);
977
978 aux[j].accel_offset = accel_offset;
979 accel_offset += info.strat.accelSize();
980
981 info.strat.buildAccel(i, accel_escape_info.at(i),
982 (void *)((char *)m + aux[j].accel_offset));
983 }
984
985 fillInBasicState8(info, aux, succ_table, reportOffsets, reports,
986 reports_eod, i);
987 }
988
989 assert(accel_offset + sizeof(NFA) <= total_size);
990
991 DEBUG_PRINTF("rl size %zu\n", ri->size());
992
993 if (accel_states && nfa) {
994 fillAccelOut(accel_escape_info, accel_states);
995 }
996
997 return nfa;
998}
999
1000#define MAX_SHERMAN_LIST_LEN 9
1001
1002static
1003void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate,
1004 dstate_id_t max) {
1005 if (candidate < max) {
1006 dest.insert(candidate);
1007 }
1008}
1009
1010static
1011void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source,
1012 u16 alphasize, dstate_id_t curr_id) {
1013 for (symbol_t s = 0; s < alphasize; s++) {
1014 addIfEarlier(dest, source.next[s], curr_id);
1015 }
1016}
1017
1018/* \brief Returns a set of states to search for a better daddy. */
1019static
1020flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info,
1021 dstate_id_t curr_id) {
1022 flat_set<dstate_id_t> hinted;
1023
1024 addIfEarlier(hinted, 0, curr_id);
1025 addIfEarlier(hinted, info.raw.start_anchored, curr_id);
1026 addIfEarlier(hinted, info.raw.start_floating, curr_id);
1027
1028 // Add existing daddy and his successors, then search back one generation.
1029 const u16 alphasize = info.impl_alpha_size;
1030 dstate_id_t daddy = info.states[curr_id].daddy;
1031 for (u32 level = 0; daddy && level < 2; level++) {
1032 addIfEarlier(hinted, daddy, curr_id);
1033 addSuccessors(hinted, info.states[daddy], alphasize, curr_id);
1034 daddy = info.states[daddy].daddy;
1035 }
1036
1037 return hinted;
1038}
1039
1040#define MAX_SHERMAN_SELF_LOOP 20
1041
1042static
1043void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
1044 bool any_cyclic_near_anchored_state,
1045 bool trust_daddy_states, const Grey &grey) {
1046 if (!grey.allowShermanStates) {
1047 return;
1048 }
1049
1050 const u16 width = using8bit ? sizeof(u8) : sizeof(u16);
1051 const u16 alphasize = info.impl_alpha_size;
1052
1053 if (info.raw.start_anchored != DEAD_STATE
1054 && any_cyclic_near_anchored_state
1055 && curr_id < alphasize * 3) {
1056 /* crude attempt to prevent frequent states from being sherman'ed
1057 * depends on the fact that states are numbers are currently in bfs
1058 * order */
1059 DEBUG_PRINTF("%hu is banned\n", curr_id);
1060 return;
1061 }
1062
1063 if (info.raw.start_floating != DEAD_STATE
1064 && curr_id >= info.raw.start_floating
1065 && curr_id < info.raw.start_floating + alphasize * 3) {
1066 /* crude attempt to prevent frequent states from being sherman'ed
1067 * depends on the fact that states are numbers are currently in bfs
1068 * order */
1069 DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating);
1070 return;
1071 }
1072
1073 const u16 full_state_size = width * alphasize;
1074 const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN,
1075 (full_state_size - 2)/(width + 1));
1076 u16 best_score = 0;
1077 dstate_id_t best_daddy = 0;
1078 dstate &currState = info.states[curr_id];
1079
1080 flat_set<dstate_id_t> hinted;
1081 if (trust_daddy_states) {
1082 // Use the daddy already set for this state so long as it isn't already
1083 // a Sherman state.
1084 dstate_id_t daddy = currState.daddy;
1085 if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) {
1086 hinted.insert(currState.daddy);
1087 } else {
1088 // Fall back to granddaddy, which has already been processed (due
1089 // to BFS ordering) and cannot be a Sherman state.
1090 dstate_id_t granddaddy = info.states[currState.daddy].daddy;
1091 if (info.is_widestate(granddaddy)) {
1092 return;
1093 }
1094 assert(!info.is_sherman(granddaddy));
1095 hinted.insert(granddaddy);
1096 }
1097 } else {
1098 hinted = find_daddy_candidates(info, curr_id);
1099 }
1100
1101 for (const dstate_id_t &donor : hinted) {
1102 assert(donor < curr_id);
1103 u32 score = 0;
1104
1105 if (info.is_sherman(donor) || info.is_widestate(donor)) {
1106 continue;
1107 }
1108
1109 const dstate &donorState = info.states[donor];
1110 for (symbol_t s = 0; s < alphasize; s++) {
1111 if (currState.next[s] == donorState.next[s]) {
1112 score++;
1113 }
1114 }
1115
1116 /* prefer lower ids to provide some stability amongst potential
1117 * siblings */
1118 if (score > best_score || (score == best_score && donor < best_daddy)) {
1119 best_daddy = donor;
1120 best_score = score;
1121
1122 if (score == alphasize) {
1123 break;
1124 }
1125 }
1126 }
1127
1128 currState.daddy = best_daddy;
1129 info.extra[curr_id].daddytaken = best_score;
1130 DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy,
1131 best_score, alphasize);
1132
1133 if (best_score + max_list_len < alphasize) {
1134 return; /* ??? */
1135 }
1136
1137 if (info.is_sherman(currState.daddy)) {
1138 return;
1139 }
1140
1141 u32 self_loop_width = 0;
1142 const dstate &curr_raw = info.states[curr_id];
1143 for (unsigned i = 0; i < N_CHARS; i++) {
1144 if (curr_raw.next[info.alpha_remap[i]] == curr_id) {
1145 self_loop_width++;
1146 }
1147 }
1148
1149 if (self_loop_width > MAX_SHERMAN_SELF_LOOP) {
1150 DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id,
1151 self_loop_width);
1152 return;
1153 }
1154
1155 DEBUG_PRINTF("%hu is sherman\n", curr_id);
1156 info.extra[curr_id].shermanState = true;
1157}
1158
1159static
1160bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
1161 symbol_t alphasize = raw.getImplAlphaSize();
1162 for (symbol_t s = 0; s < alphasize; s++) {
1163 dstate_id_t succ_id = raw.states[root].next[s];
1164 if (succ_id == DEAD_STATE) {
1165 continue;
1166 }
1167
1168 const dstate &succ = raw.states[succ_id];
1169 for (symbol_t t = 0; t < alphasize; t++) {
1170 if (succ.next[t] == root || succ.next[t] == succ_id) {
1171 return true;
1172 }
1173 }
1174 }
1175 return false;
1176}
1177
1178/* \brief Test for only-one-predecessor property. */
1179static
1180bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size,
1181 const dstate_id_t curr_id, dstate_id_t &prev_id,
1182 symbol_t &prev_sym) {
1183 u32 num_prev = 0;
1184 bool test_p1 = false;
1185
1186 for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
1187 num_prev += info.states[curr_id].prev_vec[sym].size();
1188 DEBUG_PRINTF("Check symbol: %u, with its vector size: %lu\n", sym,
1189 info.states[curr_id].prev_vec[sym].size());
1190 if (num_prev == 1 && !test_p1) {
1191 test_p1 = true;
1192 prev_id = info.states[curr_id].prev_vec[sym].front(); //[0] for sure???
1193 prev_sym = sym;
1194 }
1195 }
1196
1197 return num_prev == 1;
1198}
1199
1200/* \brief Test for same-failure-action property. */
1201static
1202bool check_property2(const raw_dfa &rdfa, const u16 impl_alpha_size,
1203 const dstate_id_t curr_id, const dstate_id_t prev_id,
1204 const symbol_t curr_sym, const symbol_t prev_sym) {
1205 const dstate &prevState = rdfa.states[prev_id];
1206 const dstate &currState = rdfa.states[curr_id];
1207
1208 // Compare transition tables between currState and prevState.
1209 u16 score = 0;
1210 for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
1211 if (currState.next[sym] == prevState.next[sym]
1212 && sym != curr_sym && sym != prev_sym) {
1213 score++;
1214 }
1215 }
1216 DEBUG_PRINTF("(Score: %u/%u)\n", score, impl_alpha_size);
1217
1218 // 2 cases.
1219 if (curr_sym != prev_sym && score >= impl_alpha_size - 2
1220 && currState.next[prev_sym] == prevState.next[curr_sym]) {
1221 return true;
1222 } else if (curr_sym == prev_sym && score == impl_alpha_size - 1) {
1223 return true;
1224 }
1225 return false;
1226}
1227
1228/* \brief Check whether adding current prev_id will generate a circle.*/
1229static
1230bool check_circle(const DfaPrevInfo &info, const u16 impl_alpha_size,
1231 const vector<dstate_id_t> &chain, const dstate_id_t id) {
1232 const vector<vector<dstate_id_t>> &prev_vec = info.states[id].prev_vec;
1233 const dstate_id_t tail = chain.front();
1234 for (symbol_t sym = 0; sym < impl_alpha_size; sym++) {
1235 auto iter = find(prev_vec[sym].begin(), prev_vec[sym].end(), tail);
1236 if (iter != prev_vec[sym].end()) {
1237 // Tail is one of id's predecessors, forming a circle.
1238 return true;
1239 }
1240 }
1241 return false;
1242}
1243
1244/* \brief Returns a chain of state ids and symbols. */
1245static
1246dstate_id_t find_chain_candidate(const raw_dfa &rdfa, const DfaPrevInfo &info,
1247 const dstate_id_t curr_id,
1248 const symbol_t curr_sym,
1249 vector<dstate_id_t> &temp_chain) {
1250 //Record current id first.
1251 temp_chain.push_back(curr_id);
1252
1253 const u16 size = info.impl_alpha_size;
1254
1255 // Stop when entering root cloud.
1256 if (rdfa.start_anchored != DEAD_STATE
1257 && is_cyclic_near(rdfa, rdfa.start_anchored)
1258 && curr_id < size) {
1259 return curr_id;
1260 }
1261 if (rdfa.start_floating != DEAD_STATE
1262 && curr_id >= rdfa.start_floating
1263 && curr_id < rdfa.start_floating + size * 3) {
1264 return curr_id;
1265 }
1266
1267 // Stop when reaching anchored or floating.
1268 if (curr_id == rdfa.start_anchored || curr_id == rdfa.start_floating) {
1269 return curr_id;
1270 }
1271
1272 dstate_id_t prev_id = 0;
1273 symbol_t prev_sym = ALPHABET_SIZE;
1274
1275 // Check the only-one-predecessor property.
1276 if (!check_property1(info, size, curr_id, prev_id, prev_sym)) {
1277 return curr_id;
1278 }
1279 assert(prev_id != 0 && prev_sym != ALPHABET_SIZE);
1280 DEBUG_PRINTF("(P1 test passed.)\n");
1281
1282 // Circle testing for the prev_id that passes the P1 test.
1283 if (check_circle(info, size, temp_chain, prev_id)) {
1284 DEBUG_PRINTF("(A circle is found.)\n");
1285 return curr_id;
1286 }
1287
1288 // Check the same-failure-action property.
1289 if (!check_property2(rdfa, size, curr_id, prev_id, curr_sym, prev_sym)) {
1290 return curr_id;
1291 }
1292 DEBUG_PRINTF("(P2 test passed.)\n");
1293
1294 if (!rdfa.states[prev_id].reports.empty()
1295 || !rdfa.states[prev_id].reports_eod.empty()) {
1296 return curr_id;
1297 } else {
1298 return find_chain_candidate(rdfa, info, prev_id, prev_sym, temp_chain);
1299 }
1300}
1301
1302/* \brief Always store the non-extensible chains found till now. */
1303static
1304bool store_chain_longest(vector<vector<dstate_id_t>> &candidate_chain,
1305 vector<dstate_id_t> &temp_chain,
1306 dynamic_bitset<> &added, bool head_is_new) {
1307 dstate_id_t head = temp_chain.front();
1308 u16 length = temp_chain.size();
1309
1310 if (head_is_new) {
1311 DEBUG_PRINTF("This is a new chain!\n");
1312
1313 // Add this new chain and get it marked.
1314 candidate_chain.push_back(temp_chain);
1315
1316 for (auto &id : temp_chain) {
1317 DEBUG_PRINTF("(Marking s%u ...)\n", id);
1318 added.set(id);
1319 }
1320
1321 return true;
1322 }
1323
1324 DEBUG_PRINTF("This is a longer chain!\n");
1325 assert(!candidate_chain.empty());
1326
1327 auto chain = find_if(candidate_chain.begin(), candidate_chain.end(),
1328 [&](const vector<dstate_id_t> &it) {
1329 return it.front() == head;
1330 });
1331
1332 // Not a valid head, just do nothing and return.
1333 if (chain == candidate_chain.end()) {
1334 return false;
1335 }
1336
1337 u16 len = chain->size();
1338
1339 if (length > len) {
1340 // Find out the branch node first.
1341 size_t piv = 0;
1342 for (; piv < length; piv++) {
1343 if ((*chain)[piv] != temp_chain[piv]) {
1344 break;
1345 }
1346 }
1347
1348 for (size_t j = piv + 1; j < length; j++) {
1349 DEBUG_PRINTF("(Marking s%u (new branch) ...)\n", temp_chain[j]);
1350 added.set(temp_chain[j]);
1351 }
1352
1353 // Unmark old unuseful nodes.
1354 // (Except the tail node, which is in working queue)
1355 for (size_t j = piv + 1; j < verify_u16(len - 1); j++) {
1356 DEBUG_PRINTF("(UnMarking s%u (old branch)...)\n", (*chain)[j]);
1357 added.reset((*chain)[j]);
1358 }
1359
1360 chain->assign(temp_chain.begin(), temp_chain.end());
1361 }
1362
1363 return false;
1364}
1365
1366/* \brief Generate wide_symbol_chain from wide_state_chain. */
1367static
1368void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
1369 raw_dfa &rdfa = info.raw;
1370 assert(chain_tail.size() == info.wide_state_chain.size());
1371
1372 for (size_t i = 0; i < info.wide_state_chain.size(); i++) {
1373 vector<dstate_id_t> &state_chain = info.wide_state_chain[i];
1374 vector<symbol_t> symbol_chain;
1375
1376 info.extra[state_chain[0]].wideHead = true;
1377 size_t width = state_chain.size() - 1;
1378
1379 for (size_t j = 0; j < width; j++) {
1380 dstate_id_t curr_id = state_chain[j];
1381 dstate_id_t next_id = state_chain[j + 1];
1382
1383 // The last state of the chain doesn't belong to a wide state.
1384 info.extra[curr_id].wideState = true;
1385
1386 // The tail symbol comes from vector chain_tail;
1387 if (j == width - 1) {
1388 symbol_chain.push_back(chain_tail[i]);
1389 } else {
1390 for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) {
1391 if (rdfa.states[curr_id].next[sym] == next_id) {
1392 symbol_chain.push_back(sym);
1393 break;
1394 }
1395 }
1396 }
1397 }
1398
1399 info.wide_symbol_chain.push_back(symbol_chain);
1400 }
1401}
1402
1403/* \brief Find potential regions of states to be packed into wide states. */
1404static
1405void find_wide_state(dfa_info &info) {
1406 DfaPrevInfo dinfo(info.raw);
1407 queue<dstate_id_t> work_queue;
1408
1409 dynamic_bitset<> added(info.raw.states.size());
1410 for (auto it : dinfo.accepts) {
1411 work_queue.push(it);
1412 added.set(it);
1413 }
1414
1415 vector<symbol_t> chain_tail;
1416 while (!work_queue.empty()) {
1417 dstate_id_t curr_id = work_queue.front();
1418 work_queue.pop();
1419 DEBUG_PRINTF("Newly popped state: s%u\n", curr_id);
1420
1421 for (symbol_t sym = 0; sym < dinfo.impl_alpha_size; sym++) {
1422 for (auto info_it : dinfo.states[curr_id].prev_vec[sym]) {
1423 if (added.test(info_it)) {
1424 DEBUG_PRINTF("(s%u already marked.)\n", info_it);
1425 continue;
1426 }
1427
1428 vector<dstate_id_t> temp_chain;
1429 // Head is a state failing the test of the chain.
1430 dstate_id_t head = find_chain_candidate(info.raw, dinfo,
1431 info_it, sym,
1432 temp_chain);
1433
1434 // A candidate chain should contain 8 substates at least.
1435 if (temp_chain.size() < 8) {
1436 DEBUG_PRINTF("(Not enough substates, continue.)\n");
1437 continue;
1438 }
1439
1440 bool head_is_new = !added.test(head);
1441 if (head_is_new) {
1442 added.set(head);
1443 work_queue.push(head);
1444 DEBUG_PRINTF("Newly pushed state: s%u\n", head);
1445 }
1446
1447 reverse(temp_chain.begin(), temp_chain.end());
1448 temp_chain.push_back(curr_id);
1449
1450 assert(head > 0 && head == temp_chain.front());
1451 if (store_chain_longest(info.wide_state_chain, temp_chain,
1452 added, head_is_new)) {
1453 chain_tail.push_back(sym);
1454 }
1455 }
1456 }
1457 }
1458
1459 generate_symbol_chain(info, chain_tail);
1460}
1461
1462bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
1463 const CompileContext &cc,
1464 bool trust_daddy_states,
1465 set<dstate_id_t> *accel_states) {
1466 assert(!is_dead(raw));
1467
1468 dfa_info info(strat);
1469 bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256;
1470
1471 if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
1472 * mode with our semantics */
1473 raw.stripExtraEodReports();
1474 }
1475
1476 bool has_eod_reports = raw.hasEodReports();
1477
1478 bytecode_ptr<NFA> nfa;
1479 if (!using8bit) {
1480 if (cc.grey.allowWideStates && strat.getType() == McClellan
1481 && !is_triggered(raw.kind)) {
1482 find_wide_state(info);
1483 }
1484
1485 u16 total_daddy = 0;
1486 bool any_cyclic_near_anchored_state
1487 = is_cyclic_near(raw, raw.start_anchored);
1488
1489 for (u32 i = 0; i < info.size(); i++) {
1490 if (info.is_widestate(i)) {
1491 continue;
1492 }
1493 find_better_daddy(info, i, using8bit,
1494 any_cyclic_near_anchored_state,
1495 trust_daddy_states, cc.grey);
1496 total_daddy += info.extra[i].daddytaken;
1497 }
1498
1499 DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
1500 info.size() * info.impl_alpha_size, info.size(),
1501 info.impl_alpha_size);
1502
1503 nfa = mcclellanCompile16(info, cc, accel_states);
1504 } else {
1505 nfa = mcclellanCompile8(info, cc, accel_states);
1506 }
1507
1508 if (has_eod_reports) {
1509 nfa->flags |= NFA_ACCEPTS_EOD;
1510 }
1511
1512 DEBUG_PRINTF("compile done\n");
1513 return nfa;
1514}
1515
1516bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
1517 const ReportManager &rm,
1518 bool only_accel_init,
1519 bool trust_daddy_states,
1520 set<dstate_id_t> *accel_states) {
1521 mcclellan_build_strat mbs(raw, rm, only_accel_init);
1522 return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states);
1523}
1524
1525size_t mcclellan_build_strat::accelSize(void) const {
1526 return sizeof(AccelAux); /* McClellan accel structures are just bare
1527 * accelaux */
1528}
1529
1530u32 mcclellanStartReachSize(const raw_dfa *raw) {
1531 if (raw->states.size() < 2) {
1532 return 0;
1533 }
1534
1535 const dstate &ds = raw->states[raw->start_anchored];
1536
1537 CharReach out;
1538 for (unsigned i = 0; i < N_CHARS; i++) {
1539 if (ds.next[raw->alpha_remap[i]] != DEAD_STATE) {
1540 out.set(i);
1541 }
1542 }
1543
1544 return out.count();
1545}
1546
1547bool has_accel_mcclellan(const NFA *nfa) {
1548 const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
1549 return m->has_accel;
1550}
1551
1552} // namespace ue2
1553