mcclellancompile.cpp source code [ClickHouse/contrib/hyperscan/src/nfa/mcclellancompile.cpp]

1	/*
2	* Copyright (c) 2015-2018, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	#include "mcclellancompile.h"
30
31	#include "accel.h"
32	#include "accelcompile.h"
33	#include "grey.h"
34	#include "mcclellan_internal.h"
35	#include "mcclellancompile_util.h"
36	#include "nfa_internal.h"
37	#include "shufticompile.h"
38	#include "trufflecompile.h"
39	#include "ue2common.h"
40	#include "util/alloc.h"
41	#include "util/bitutils.h"
42	#include "util/charreach.h"
43	#include "util/compare.h"
44	#include "util/compile_context.h"
45	#include "util/container.h"
46	#include "util/make_unique.h"
47	#include "util/order_check.h"
48	#include "util/report_manager.h"
49	#include "util/flat_containers.h"
50	#include "util/unaligned.h"
51	#include "util/verify_types.h"
52
53	#include <algorithm>
54	#include <cstdio>
55	#include <cstdlib>
56	#include <cstring>
57	#include <map>
58	#include <memory>
59	#include <queue>
60	#include <set>
61	#include <vector>
62
63	#include <boost/range/adaptor/map.hpp>
64
65	#include "mcclellandump.h"
66	#include "util/dump_util.h"
67	#include "util/dump_charclass.h"
68
69	using namespace std;
70	using boost::adaptors::map_keys;
71	using boost::dynamic_bitset;
72
73	#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
74
75	/* Maximum tolerated number of escape character from an accel state.*
76	* This is larger than nfa, as we don't have a budget and the nfa cheats on stop
77	* characters for sets of states */
78	#define ACCEL_DFA_MAX_STOP_CHAR 160
79
80	/* Maximum tolerated number of escape character from a sds accel state. Larger*
81	* than normal states as accelerating sds is important. Matches NFA value */
82	#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
83
84	namespace ue2 {
85
86	namespace / anon / {
87
88	struct dstate_extra {
89	u16 daddytaken = `0`;
90	bool shermanState = false;
91	bool wideState = false;
92	bool wideHead = false;
93	};
94
95	struct dfa_info {
96	accel_dfa_build_strat &strat;
97	raw_dfa &raw;
98	vector<dstate> &states;
99	vector<dstate_extra> extra;
100	vector<vector<dstate_id_t>> wide_state_chain;
101	vector<vector<symbol_t>> wide_symbol_chain;
102	const u16 alpha_size; / including special symbols /
103	const array<u16, ALPHABET_SIZE> &alpha_remap;
104	const u16 impl_alpha_size;
105
106	u8 getAlphaShift() const;
107
108	explicit dfa_info(accel_dfa_build_strat &s)
109	: strat(s),
110	raw(s.get_raw()),
111	states(raw.states),
112	extra (raw.states.size()),
113	alpha_size(raw.alpha_size),
114	alpha_remap(raw.alpha_remap),
115	impl_alpha_size(raw.getImplAlphaSize()) {}
116
117	dstate_id_t implId(dstate_id_t raw_id) const {
118	return states [raw_id].impl_id;
119	}
120
121	bool is_sherman(dstate_id_t raw_id) const {
122	return extra [raw_id].shermanState;
123	}
124
125	bool is_widestate(dstate_id_t raw_id) const {
126	return extra [raw_id].wideState;
127	}
128
129	bool is_widehead(dstate_id_t raw_id) const {
130	return extra [raw_id].wideHead;
131	}
132
133	size_t size(void) const { return states.size(); }
134	};
135
136	u8 dfa_info::getAlphaShift() const {
137	if (impl_alpha_size < `2`) {
138	return `1`;
139	} else {
140	/ log2 round up /
141	return `32` - clz32(impl_alpha_size - `1`);
142	}
143	}
144
145	struct state_prev_info {
146	vector<vector<dstate_id_t>> prev_vec;
147	explicit state_prev_info(size_t alpha_size) : prev_vec (alpha_size) {}
148	};
149
150	struct DfaPrevInfo {
151	u16 impl_alpha_size;
152	u16 state_num;
153	vector<state_prev_info> states;
154	set<dstate_id_t> accepts;
155
156	explicit DfaPrevInfo(raw_dfa &rdfa);
157	};
158
159	DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa)
160	: impl_alpha_size(rdfa.getImplAlphaSize()), state_num(rdfa.states.size()),
161	states (state_num, state_prev_info (impl_alpha_size)){
162	for (size_t i = `0`; i < states.size(); i++) {
163	for (symbol_t sym = `0`; sym < impl_alpha_size; sym++) {
164	dstate_id_t curr = rdfa.states [i].next [sym];
165	states [curr].prev_vec [sym].push_back(i);
166	}
167	if (!rdfa.states [i].reports.empty()
168	\|\| !rdfa.states [i].reports_eod.empty()) {
169	DEBUG_PRINTF("accept raw state: %ld\n", i);
170	accepts.insert(i);
171	}
172	}
173	}
174	} // namespace
175
176	static
177	mstate_aux getAux(NFA n, dstate_id_t i) {
178	assert(isMcClellanType(n->type));
179
180	mcclellan m = (mcclellan )getMutableImplNfa(n);
181	mstate_aux aux_base = (mstate_aux )((char *)n + m->aux_offset);
182
183	mstate_aux *aux = aux_base + i;
184	assert((const char )aux < (const* char *)n + m->length);
185	return aux;
186	}
187
188	static
189	void markEdges(NFA n, u16 succ_table, const dfa_info &info) {
190	assert((size_t)succ_table % `2` == `0`);
191	assert(n->type == MCCLELLAN_NFA_16);
192	u8 alphaShift = info.getAlphaShift();
193	u16 alphaSize = info.impl_alpha_size;
194	mcclellan m = (mcclellan )getMutableImplNfa(n);
195
196	/ handle the normal states /
197	for (u32 i = `0`; i < m->sherman_limit; i++) {
198	for (size_t j = `0`; j < alphaSize; j++) {
199	size_t c_prime = (i << alphaShift) + j;
200
201	// wide state has no aux structure.
202	if (m->has_wide && succ_table[c_prime] >= m->wide_limit) {
203	continue;
204	}
205
206	mstate_aux *aux = getAux(n, succ_table[c_prime]);
207
208	if (aux->accept) {
209	succ_table[c_prime] \|= ACCEPT_FLAG;
210	}
211
212	if (aux->accel_offset) {
213	succ_table[c_prime] \|= ACCEL_FLAG;
214	}
215	}
216	}
217
218	/ handle the sherman states /
219	char sherman_base_offset = (char* *)n + m->sherman_offset;
220	u16 sherman_ceil = m->has_wide == `1` ? m->wide_limit : m->state_count;
221	for (u16 j = m->sherman_limit; j < sherman_ceil; j++) {
222	char *sherman_cur
223	= findMutableShermanState(sherman_base_offset, m->sherman_limit, j);
224	assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE);
225	u8 len = (u8 )(sherman_cur + SHERMAN_LEN_OFFSET);
226	u16 succs = (u16 )(sherman_cur + SHERMAN_STATES_OFFSET(len));
227
228	for (u8 i = `0`; i < len; i++) {
229	u16 succ_i = unaligned_load_u16((u8 *)&succs[i]);
230	// wide state has no aux structure.
231	if (m->has_wide && succ_i >= m->wide_limit) {
232	continue;
233	}
234
235	mstate_aux *aux = getAux(n, succ_i);
236
237	if (aux->accept) {
238	succ_i \|= ACCEPT_FLAG;
239	}
240
241	if (aux->accel_offset) {
242	succ_i \|= ACCEL_FLAG;
243	}
244
245	unaligned_store_u16((u8 *)&succs[i], succ_i);
246	}
247	}
248
249	/ handle the wide states /
250	if (m->has_wide) {
251	u32 wide_limit = m->wide_limit;
252	char wide_base = (char* *)n + m->wide_offset;
253	assert(*wide_base == WIDE_STATE);
254	u16 wide_number = verify_u16(info.wide_symbol_chain.size());
255	// traverse over wide head states.
256	for (u16 j = wide_limit; j < wide_limit + wide_number; j++) {
257	char *wide_cur
258	= findMutableWideEntry16(wide_base, wide_limit, j);
259	u16 width = (const* u16 *)(wide_cur + WIDE_WIDTH_OFFSET);
260	u16 trans = (u16 )(wide_cur + WIDE_TRANSITION_OFFSET16(width));
261
262	// check successful transition
263	u16 next = unaligned_load_u16((u8 *)trans);
264	if (next < wide_limit) {
265	mstate_aux *aux = getAux(n, next);
266	if (aux->accept) {
267	next \|= ACCEPT_FLAG;
268	}
269	if (aux->accel_offset) {
270	next \|= ACCEL_FLAG;
271	}
272	unaligned_store_u16((u8 *)trans, next);
273	}
274	trans++;
275
276	// check failure transition
277	for (symbol_t k = `0`; k < alphaSize; k++) {
278	u16 next_k = unaligned_load_u16((u8 *)&trans[k]);
279	if (next_k >= wide_limit) {
280	continue;
281	}
282	mstate_aux *aux_k = getAux(n, next_k);
283	if (aux_k->accept) {
284	next_k \|= ACCEPT_FLAG;
285	}
286	if (aux_k->accel_offset) {
287	next_k \|= ACCEL_FLAG;
288	}
289	unaligned_store_u16((u8 *)&trans[k], next_k);
290	}
291	}
292	}
293	}
294
295	u32 mcclellan_build_strat::max_allowed_offset_accel() const {
296	return ACCEL_DFA_MAX_OFFSET_DEPTH;
297	}
298
299	u32 mcclellan_build_strat::max_stop_char() const {
300	return ACCEL_DFA_MAX_STOP_CHAR;
301	}
302
303	u32 mcclellan_build_strat::max_floating_stop_char() const {
304	return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
305	}
306
307	static
308	void populateBasicInfo(size_t state_size, const dfa_info &info,
309	u32 total_size, u32 aux_offset, u32 accel_offset,
310	u32 accel_count, ReportID arb, bool single, NFA *nfa) {
311	assert(state_size == sizeof(u16) \|\| state_size == sizeof(u8));
312
313	nfa->length = total_size;
314	nfa->nPositions = info.states.size();
315
316	nfa->scratchStateSize = verify_u32(state_size);
317	nfa->streamStateSize = verify_u32(state_size);
318
319	if (state_size == sizeof(u8)) {
320	nfa->type = MCCLELLAN_NFA_8;
321	} else {
322	nfa->type = MCCLELLAN_NFA_16;
323	}
324
325	mcclellan m = (mcclellan )getMutableImplNfa(nfa);
326	for (u32 i = `0`; i < `256`; i++) {
327	m->remap[i] = verify_u8(info.alpha_remap [i]);
328	}
329	m->alphaShift = info.getAlphaShift();
330	m->length = total_size;
331	m->aux_offset = aux_offset;
332	m->accel_offset = accel_offset;
333	m->arb_report = arb;
334	m->state_count = verify_u16(info.size());
335	m->start_anchored = info.implId(info.raw.start_anchored);
336	m->start_floating = info.implId(info.raw.start_floating);
337	m->has_accel = accel_count ? `1` : `0`;
338	m->has_wide = info.wide_state_chain.size() > `0` ? `1` : `0`;
339
340	if (state_size == sizeof(u8) && m->has_wide == `1`) {
341	// allocate 1 more byte for wide state use.
342	nfa->scratchStateSize += sizeof(u8);
343	nfa->streamStateSize += sizeof(u8);
344	}
345
346	if (state_size == sizeof(u16) && m->has_wide == `1`) {
347	// allocate 2 more bytes for wide state use.
348	nfa->scratchStateSize += sizeof(u16);
349	nfa->streamStateSize += sizeof(u16);
350	}
351
352	if (single) {
353	m->flags \|= MCCLELLAN_FLAG_SINGLE;
354	}
355	}
356
357	namespace {
358
359	struct raw_report_list {
360	flat_set<ReportID> reports;
361
362	raw_report_list(const flat_set<ReportID> &reports_in,
363	const ReportManager &rm, bool do_remap) {
364	if (do_remap) {
365	for (auto &id : reports_in) {
366	reports.insert(rm.getProgramOffset(id));
367	}
368	} else {
369	reports = reports_in;
370	}
371	}
372
373	bool operator<(const raw_report_list &b) const {
374	return reports < b.reports;
375	}
376	};
377
378	struct raw_report_info_impl : public raw_report_info {
379	vector<raw_report_list> rl;
380	u32 getReportListSize() const override;
381	size_t size() const override;
382	void fillReportLists(NFA *n, size_t base_offset,
383	std::vector<u32> &ro / out /) const override;
384	};
385	}
386
387	unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports(
388	vector<u32> &reports,
389	vector<u32> &reports_eod,
390	u8 *isSingleReport,
391	ReportID arbReport) const* {
392	DEBUG_PRINTF("gathering reports\n");
393
394	const bool remap_reports = has_managed_reports(rdfa.kind);
395
396	auto ri = ue2::make_unique<raw_report_info_impl>();
397	map<raw_report_list, u32> rev;
398
399	for (const dstate &s : rdfa.states) {
400	if (s.reports.empty()) {
401	reports.push_back(MO_INVALID_IDX);
402	continue;
403	}
404
405	raw_report_list rrl(s.reports, rm, remap_reports);
406	DEBUG_PRINTF("non empty r\n");
407	auto it = rev.find(rrl);
408	if (it != rev.end()) {
409	reports.push_back(it ->second);
410	} else {
411	DEBUG_PRINTF("adding to rl %zu\n", ri->size());
412	rev.emplace(rrl, ri ->size());
413	reports.push_back(ri ->size());
414	ri ->rl.push_back(rrl);
415	}
416	}
417
418	for (const dstate &s : rdfa.states) {
419	if (s.reports_eod.empty()) {
420	reports_eod.push_back(MO_INVALID_IDX);
421	continue;
422	}
423
424	DEBUG_PRINTF("non empty r eod\n");
425	raw_report_list rrl(s.reports_eod, rm, remap_reports);
426	auto it = rev.find(rrl);
427	if (it != rev.end()) {
428	reports_eod.push_back(it ->second);
429	continue;
430	}
431
432	DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
433	rev.emplace(rrl, ri ->size());
434	reports_eod.push_back(ri ->size());
435	ri ->rl.push_back(rrl);
436	}
437
438	assert(!ri->rl.empty()); / all components should be able to generate*
439	reports /*
440	if (!ri ->rl.empty()) {
441	arbReport = ri ->rl.begin()->reports.begin();
442	} else {
443	*arbReport = `0`;
444	}
445
446	/ if we have only a single report id generated from all accepts (not eod)*
447	* we can take some short cuts */
448	flat_set<ReportID> reps;
449
450	for (u32 rl_index : reports) {
451	if (rl_index == MO_INVALID_IDX) {
452	continue;
453	}
454	assert(rl_index < ri->size());
455	insert(&reps, ri ->rl [rl_index].reports);
456	}
457
458	if (reps.size() == `1`) {
459	*isSingleReport = `1`;
460	arbReport = reps.begin();
461	DEBUG_PRINTF("single -- %u\n", *arbReport);
462	} else {
463	*isSingleReport = `0`;
464	}
465
466	return ri;
467	}
468
469	u32 raw_report_info_impl::getReportListSize() const {
470	u32 rv = `0`;
471
472	for (const auto &reps : rl) {
473	rv += sizeof(report_list);
474	rv += sizeof(ReportID) * reps.reports.size();
475	}
476
477	return rv;
478	}
479
480	size_t raw_report_info_impl::size() const {
481	return rl.size();
482	}
483
484	void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
485	vector<u32> &ro) const {
486	for (const auto &reps : rl) {
487	ro.push_back(base_offset);
488
489	report_list p = (report_list )((char *)n + base_offset);
490
491	u32 i = `0`;
492	for (const ReportID report : reps.reports) {
493	p->report[i++] = report;
494	}
495	p->count = verify_u32(reps.reports.size());
496
497	base_offset += sizeof(report_list);
498	base_offset += sizeof(ReportID) * reps.reports.size();
499	}
500	}
501
502	static
503	void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
504	set<dstate_id_t> *accel_states) {
505	for (dstate_id_t i : accel_escape_info \| map_keys) {
506	accel_states->insert(i);
507	}
508	}
509
510	static
511	size_t calcShermanRegionSize(const dfa_info &info) {
512	size_t rv = `0`;
513
514	for (size_t i = `0`; i < info.size(); i++) {
515	if (info.is_sherman(i)) {
516	rv += SHERMAN_FIXED_SIZE;
517	}
518	}
519
520	return ROUNDUP_16(rv);
521	}
522
523	static
524	size_t calcWideRegionSize(const dfa_info &info) {
525	if (info.wide_state_chain.empty()) {
526	return `0`;
527	}
528
529	// wide info header
530	size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + `4`;
531
532	// wide info body
533	for (const auto &chain : info.wide_symbol_chain) {
534	rv += ROUNDUP_N(chain.size(), `2`) +
535	(info.impl_alpha_size + `1`) * sizeof(u16) + `2`;
536	}
537
538	return ROUNDUP_16(rv);
539	}
540
541	static
542	void fillInAux(mstate_aux aux, dstate_id_t i, const* dfa_info &info,
543	const vector<u32> &reports, const vector<u32> &reports_eod,
544	vector<u32> &reportOffsets) {
545	const dstate &raw_state = info.states [i];
546	aux->accept = raw_state.reports.empty() ? `0` : reportOffsets [reports [i]];
547	aux->accept_eod = raw_state.reports_eod.empty() ? `0`
548	: reportOffsets [reports_eod [i]];
549	aux->top = info.implId(i ? raw_state.next [info.alpha_remap [TOP]]
550	: info.raw.start_floating);
551	}
552
553	/ returns false on error /
554	static
555	bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base,
556	dstate_id_t *wide_limit) {
557	info.states [`0`].impl_id = `0`; / dead is always 0 /
558
559	vector<dstate_id_t> norm;
560	vector<dstate_id_t> sherm;
561	vector<dstate_id_t> wideHead;
562	vector<dstate_id_t> wideState;
563
564	if (info.size() > (`1` << `16`)) {
565	DEBUG_PRINTF("too many states\n");
566	*wide_limit = `0`;
567	return false;
568	}
569
570	for (u32 i = `1`; i < info.size(); i++) {
571	if (info.is_widehead(i)) {
572	wideHead.push_back(i);
573	} else if (info.is_widestate(i)) {
574	wideState.push_back(i);
575	} else if (info.is_sherman(i)) {
576	sherm.push_back(i);
577	} else {
578	norm.push_back(i);
579	}
580	}
581
582	dstate_id_t next = `1`;
583	for (const dstate_id_t &s : norm) {
584	DEBUG_PRINTF("[norm] mapping state %u to %u\n", s, next);
585	info.states [s].impl_id = next++;
586	}
587
588	*sherman_base = next;
589	for (const dstate_id_t &s : sherm) {
590	DEBUG_PRINTF("[sherm] mapping state %u to %u\n", s, next);
591	info.states [s].impl_id = next++;
592	}
593
594	*wide_limit = next;
595	for (const dstate_id_t &s : wideHead) {
596	DEBUG_PRINTF("[widehead] mapping state %u to %u\n", s, next);
597	info.states [s].impl_id = next++;
598	}
599
600	for (const dstate_id_t &s : wideState) {
601	DEBUG_PRINTF("[wide] mapping state %u to %u\n", s, next);
602	info.states [s].impl_id = next++;
603	}
604
605	/ Check to see if we haven't over allocated our states /
606	DEBUG_PRINTF("next sherman %u masked %u\n", next,
607	(dstate_id_t)(next & STATE_MASK));
608	return (next - `1`) == ((next - `1`) & STATE_MASK);
609	}
610
611	static
612	bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc,
613	set<dstate_id_t> *accel_states) {
614	DEBUG_PRINTF("building mcclellan 16\n");
615
616	vector<u32> reports; / index in ri for the appropriate report list /
617	vector<u32> reports_eod; / as above /
618	ReportID arb;
619	u8 single;
620
621	u8 alphaShift = info.getAlphaShift();
622	assert(alphaShift <= `8`);
623
624	u16 count_real_states;
625	u16 wide_limit;
626	if (!allocateFSN16(info, &count_real_states, &wide_limit)) {
627	DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
628	info.size());
629	return nullptr;
630	}
631
632	DEBUG_PRINTF("count_real_states: %d\n", count_real_states);
633	DEBUG_PRINTF("non_wide_states: %d\n", wide_limit);
634
635	auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
636	map<dstate_id_t, AccelScheme> accel_escape_info
637	= info.strat.getAccelInfo(cc.grey);
638
639	size_t tran_size = (`1` << info.getAlphaShift())
640	* sizeof(u16) * count_real_states;
641
642	size_t aux_size = sizeof(mstate_aux) * wide_limit;
643
644	size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
645	size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
646	size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
647	+ ri ->getReportListSize(), `32`);
648	size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
649	size_t sherman_size = calcShermanRegionSize(info);
650	size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size);
651	size_t wide_size = calcWideRegionSize(info);
652	size_t total_size = wide_offset + wide_size;
653
654	accel_offset -= sizeof(NFA); / adj accel offset to be relative to m /
655	assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
656
657	DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
658	DEBUG_PRINTF("aux_size %zu\n", aux_size);
659	DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
660	DEBUG_PRINTF("accel_offset %zu\n", accel_offset + sizeof(NFA));
661	DEBUG_PRINTF("accel_size %zu\n", accel_size);
662	DEBUG_PRINTF("sherman_offset %zu\n", sherman_offset);
663	DEBUG_PRINTF("sherman_size %zu\n", sherman_size);
664	DEBUG_PRINTF("wide_offset %zu\n", wide_offset);
665	DEBUG_PRINTF("wide_size %zu\n", wide_size);
666	DEBUG_PRINTF("total_size %zu\n", total_size);
667
668	auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
669	char nfa_base = (char* *)nfa.get();
670
671	populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
672	accel_escape_info.size(), arb, single, nfa.get());
673
674	vector<u32> reportOffsets;
675
676	ri ->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets);
677
678	u16 succ_table = (u16 )(nfa_base + sizeof(NFA) + sizeof(mcclellan));
679	mstate_aux aux = (mstate_aux )(nfa_base + aux_offset);
680	mcclellan m = (mcclellan )getMutableImplNfa(nfa.get());
681
682	m->wide_limit = wide_limit;
683	m->wide_offset = wide_offset;
684
685	/ copy in the mc header information /
686	m->sherman_offset = sherman_offset;
687	m->sherman_end = total_size;
688	m->sherman_limit = count_real_states;
689
690	/ do normal states /
691	for (size_t i = `0`; i < info.size(); i++) {
692	if (info.is_sherman(i) \|\| info.is_widestate(i)) {
693	continue;
694	}
695
696	u16 fs = info.implId(i);
697	mstate_aux *this_aux = getAux(nfa.get(), fs);
698
699	assert(fs < count_real_states);
700
701	for (size_t j = `0`; j < info.impl_alpha_size; j++) {
702	succ_table[(fs << alphaShift) + j] =
703	info.implId(info.states [i].next [j]);
704	}
705
706	fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets);
707
708	if (contains(accel_escape_info, i)) {
709	this_aux->accel_offset = accel_offset;
710	accel_offset += info.strat.accelSize();
711	assert(accel_offset + sizeof(NFA) <= sherman_offset);
712	assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
713	info.strat.buildAccel(i, accel_escape_info.at(i),
714	(void )((char* *)m + this_aux->accel_offset));
715	}
716	}
717
718	/ do sherman states /
719	char *sherman_table = nfa_base + m->sherman_offset;
720	assert(ISALIGNED_16(sherman_table));
721	for (size_t i = `0`; i < info.size(); i++) {
722	if (!info.is_sherman(i)) {
723	continue;
724	}
725
726	u16 fs = verify_u16(info.implId(i));
727	mstate_aux *this_aux = getAux(nfa.get(), fs);
728
729	assert(fs >= count_real_states);
730	assert(fs < wide_limit);
731
732	char *curr_sherman_entry
733	= sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
734	assert(curr_sherman_entry <= nfa_base + m->length);
735
736	fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
737
738	if (contains(accel_escape_info, i)) {
739	this_aux->accel_offset = accel_offset;
740	accel_offset += info.strat.accelSize();
741	assert(accel_offset + sizeof(NFA) <= sherman_offset);
742	assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
743	info.strat.buildAccel(i, accel_escape_info.at(i),
744	(void )((char* *)m + this_aux->accel_offset));
745	}
746
747	u8 len = verify_u8(info.impl_alpha_size - info.extra [i].daddytaken);
748	assert(len <= `9`);
749	dstate_id_t d = info.states [i].daddy;
750
751	(u8 )(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
752	(u8 )(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
753	(u16 )(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
754	u8 chars = (u8 )(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
755
756	for (u16 s = `0`; s < info.impl_alpha_size; s++) {
757	if (info.states [i].next [s] != info.states [d].next [s]) {
758	*(chars++) = (u8)s;
759	}
760	}
761
762	u16 states = (u16 )(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
763	for (u16 s = `0`; s < info.impl_alpha_size; s++) {
764	if (info.states [i].next [s] != info.states [d].next [s]) {
765	DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n",
766	fs, info.implId(d),
767	info.implId(info.states[i].next[s]));
768	unaligned_store_u16((u8 *)states++,
769	info.implId(info.states [i].next [s]));
770	}
771	}
772	}
773
774	if (!info.wide_state_chain.empty()) {
775	/ do wide states using info /
776	u16 wide_number = verify_u16(info.wide_symbol_chain.size());
777	char *wide_base = nfa_base + m->wide_offset;
778	assert(ISALIGNED_16(wide_base));
779
780	char *wide_top = wide_base;
781	(u8 )(wide_top++) = WIDE_STATE;
782	wide_top = ROUNDUP_PTR(wide_top, `2`);
783	(u16 )(wide_top) = wide_number;
784	wide_top += `2`;
785
786	char curr_wide_entry = wide_top + wide_number sizeof(u32);
787	u32 wide_offset_list = (u32 )wide_top;
788
789	/ get the order of writing wide states /
790	vector<size_t> order(wide_number);
791	for (size_t i = `0`; i < wide_number; i++) {
792	dstate_id_t head = info.wide_state_chain [i].front();
793	size_t pos = info.implId(head) - m->wide_limit;
794	order [pos] = i;
795	}
796
797	for (size_t i : order) {
798	vector<dstate_id_t> &state_chain = info.wide_state_chain [i];
799	vector<symbol_t> &symbol_chain = info.wide_symbol_chain [i];
800
801	u16 width = verify_u16(symbol_chain.size());
802	(u16 )(curr_wide_entry + WIDE_WIDTH_OFFSET) = width;
803	u8 chars = (u8 )(curr_wide_entry + WIDE_SYMBOL_OFFSET16);
804
805	// store wide state symbol chain
806	for (size_t j = `0`; j < width; j++) {
807	*(chars++) = verify_u8(symbol_chain [j]);
808	}
809
810	// store wide state transition table
811	u16 trans = (u16 )(curr_wide_entry
812	+ WIDE_TRANSITION_OFFSET16(width));
813	dstate_id_t tail = state_chain [width - `1`];
814	symbol_t last = symbol_chain [width -`1`];
815	dstate_id_t tran = info.states [tail].next [last];
816	// 1. successful transition
817	*trans++ = info.implId(tran);
818	// 2. failure transition
819	for (size_t j = `0`; verify_u16(j) < width - `1`; j++) {
820	if (symbol_chain [j] != last) {
821	tran = info.states [state_chain [j]].next [last];
822	}
823	}
824	for (symbol_t sym = `0`; sym < info.impl_alpha_size; sym++) {
825	if (sym != last) {
826	*trans++ = info.implId(info.states [tail].next [sym]);
827	}
828	else {
829	*trans++ = info.implId(tran);
830	}
831	}
832
833	*wide_offset_list++ = verify_u32(curr_wide_entry - wide_base);
834
835	curr_wide_entry = (char *)trans;
836	}
837	}
838
839	markEdges(nfa.get(), succ_table, info);
840
841	if (accel_states && nfa) {
842	fillAccelOut(accel_escape_info, accel_states);
843	}
844
845	return nfa;
846	}
847
848	static
849	void fillInBasicState8(const dfa_info &info, mstate_aux aux, u8 succ_table,
850	const vector<u32> &reportOffsets,
851	const vector<u32> &reports,
852	const vector<u32> &reports_eod, u32 i) {
853	dstate_id_t j = info.implId(i);
854	u8 alphaShift = info.getAlphaShift();
855	assert(alphaShift <= `8`);
856
857	for (size_t s = `0`; s < info.impl_alpha_size; s++) {
858	dstate_id_t raw_succ = info.states [i].next [s];
859	succ_table[(j << alphaShift) + s] = info.implId(raw_succ);
860	}
861
862	aux[j].accept = `0`;
863	aux[j].accept_eod = `0`;
864
865	if (!info.states [i].reports.empty()) {
866	DEBUG_PRINTF("i=%u r[i]=%u\n", i, reports[i]);
867	assert(reports[i] != MO_INVALID_IDX);
868	aux[j].accept = reportOffsets [reports [i]];
869	}
870
871	if (!info.states [i].reports_eod.empty()) {
872	DEBUG_PRINTF("i=%u re[i]=%u\n", i, reports_eod[i]);
873	aux[j].accept_eod = reportOffsets [reports_eod [i]];
874	}
875
876	dstate_id_t raw_top = i ? info.states [i].next [info.alpha_remap [TOP]]
877	: info.raw.start_floating;
878
879	aux[j].top = info.implId(raw_top);
880	}
881
882	static
883	void allocateFSN8(dfa_info &info,
884	const map<dstate_id_t, AccelScheme> &accel_escape_info,
885	u16 accel_limit, u16 accept_limit) {
886	info.states [`0`].impl_id = `0`; / dead is always 0 /
887
888	vector<dstate_id_t> norm;
889	vector<dstate_id_t> accel;
890	vector<dstate_id_t> accept;
891
892	assert(info.size() <= (`1` << `8`));
893
894	for (u32 i = `1`; i < info.size(); i++) {
895	if (!info.states [i].reports.empty()) {
896	accept.push_back(i);
897	} else if (contains(accel_escape_info, i)) {
898	accel.push_back(i);
899	} else {
900	norm.push_back(i);
901	}
902	}
903
904	u32 j = `1`; / dead is already at 0 /
905	for (const dstate_id_t &s : norm) {
906	assert(j <= `256`);
907	DEBUG_PRINTF("mapping state %u to %u\n", s, j);
908	info.states [s].impl_id = j++;
909	}
910	*accel_limit = j;
911	for (const dstate_id_t &s : accel) {
912	assert(j <= `256`);
913	DEBUG_PRINTF("mapping state %u to %u\n", s, j);
914	info.states [s].impl_id = j++;
915	}
916	*accept_limit = j;
917	for (const dstate_id_t &s : accept) {
918	assert(j <= `256`);
919	DEBUG_PRINTF("mapping state %u to %u\n", s, j);
920	info.states [s].impl_id = j++;
921	}
922	}
923
924	static
925	bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc,
926	set<dstate_id_t> *accel_states) {
927	DEBUG_PRINTF("building mcclellan 8\n");
928
929	vector<u32> reports;
930	vector<u32> reports_eod;
931	ReportID arb;
932	u8 single;
933
934	auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
935	map<dstate_id_t, AccelScheme> accel_escape_info
936	= info.strat.getAccelInfo(cc.grey);
937
938	size_t tran_size = sizeof(u8) * (`1` << info.getAlphaShift()) * info.size();
939	size_t aux_size = sizeof(mstate_aux) * info.size();
940	size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size);
941	size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
942	size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
943	+ ri ->getReportListSize(), `32`);
944	size_t total_size = accel_offset + accel_size;
945
946	DEBUG_PRINTF("aux_size %zu\n", aux_size);
947	DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
948	DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
949	DEBUG_PRINTF("accel_size %zu\n", accel_size);
950	DEBUG_PRINTF("accel_offset %zu\n", accel_offset);
951	DEBUG_PRINTF("total_size %zu\n", total_size);
952
953	accel_offset -= sizeof(NFA); / adj accel offset to be relative to m /
954	assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
955
956	auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
957	char nfa_base = (char* *)nfa.get();
958
959	mcclellan m = (mcclellan )getMutableImplNfa(nfa.get());
960
961	allocateFSN8(info, accel_escape_info, &m->accel_limit_8,
962	&m->accept_limit_8);
963	populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset,
964	accel_escape_info.size(), arb, single, nfa.get());
965
966	vector<u32> reportOffsets;
967
968	ri ->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets);
969
970	/ copy in the state information /
971	u8 succ_table = (u8 )(nfa_base + sizeof(NFA) + sizeof(mcclellan));
972	mstate_aux aux = (mstate_aux )(nfa_base + aux_offset);
973
974	for (size_t i = `0`; i < info.size(); i++) {
975	if (contains(accel_escape_info, i)) {
976	u32 j = info.implId(i);
977
978	aux[j].accel_offset = accel_offset;
979	accel_offset += info.strat.accelSize();
980
981	info.strat.buildAccel(i, accel_escape_info.at(i),
982	(void )((char* *)m + aux[j].accel_offset));
983	}
984
985	fillInBasicState8(info, aux, succ_table, reportOffsets, reports,
986	reports_eod, i);
987	}
988
989	assert(accel_offset + sizeof(NFA) <= total_size);
990
991	DEBUG_PRINTF("rl size %zu\n", ri->size());
992
993	if (accel_states && nfa) {
994	fillAccelOut(accel_escape_info, accel_states);
995	}
996
997	return nfa;
998	}
999
1000	#define MAX_SHERMAN_LIST_LEN 9
1001
1002	static
1003	void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate,
1004	dstate_id_t max) {
1005	if (candidate < max) {
1006	dest.insert(candidate);
1007	}
1008	}
1009
1010	static
1011	void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source,
1012	u16 alphasize, dstate_id_t curr_id) {
1013	for (symbol_t s = `0`; s < alphasize; s++) {
1014	addIfEarlier(dest, source.next [s], curr_id);
1015	}
1016	}
1017
1018	/ \brief Returns a set of states to search for a better daddy. /
1019	static
1020	flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info,
1021	dstate_id_t curr_id) {
1022	flat_set<dstate_id_t> hinted;
1023
1024	addIfEarlier(hinted, `0`, curr_id);
1025	addIfEarlier(hinted, info.raw.start_anchored, curr_id);
1026	addIfEarlier(hinted, info.raw.start_floating, curr_id);
1027
1028	// Add existing daddy and his successors, then search back one generation.
1029	const u16 alphasize = info.impl_alpha_size;
1030	dstate_id_t daddy = info.states [curr_id].daddy;
1031	for (u32 level = `0`; daddy && level < `2`; level++) {
1032	addIfEarlier(hinted, daddy, curr_id);
1033	addSuccessors(hinted, info.states [daddy], alphasize, curr_id);
1034	daddy = info.states [daddy].daddy;
1035	}
1036
1037	return hinted;
1038	}
1039
1040	#define MAX_SHERMAN_SELF_LOOP 20
1041
1042	static
1043	void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
1044	bool any_cyclic_near_anchored_state,
1045	bool trust_daddy_states, const Grey &grey) {
1046	if (!grey.allowShermanStates) {
1047	return;
1048	}
1049
1050	const u16 width = using8bit ? sizeof(u8) : sizeof(u16);
1051	const u16 alphasize = info.impl_alpha_size;
1052
1053	if (info.raw.start_anchored != DEAD_STATE
1054	&& any_cyclic_near_anchored_state
1055	&& curr_id < alphasize * `3`) {
1056	/ crude attempt to prevent frequent states from being sherman'ed*
1057	* depends on the fact that states are numbers are currently in bfs
1058	* order */
1059	DEBUG_PRINTF("%hu is banned\n", curr_id);
1060	return;
1061	}
1062
1063	if (info.raw.start_floating != DEAD_STATE
1064	&& curr_id >= info.raw.start_floating
1065	&& curr_id < info.raw.start_floating + alphasize * `3`) {
1066	/ crude attempt to prevent frequent states from being sherman'ed*
1067	* depends on the fact that states are numbers are currently in bfs
1068	* order */
1069	DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating);
1070	return;
1071	}
1072
1073	const u16 full_state_size = width * alphasize;
1074	const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN,
1075	(full_state_size - `2`)/(width + `1`));
1076	u16 best_score = `0`;
1077	dstate_id_t best_daddy = `0`;
1078	dstate &currState = info.states [curr_id];
1079
1080	flat_set<dstate_id_t> hinted;
1081	if (trust_daddy_states) {
1082	// Use the daddy already set for this state so long as it isn't already
1083	// a Sherman state.
1084	dstate_id_t daddy = currState.daddy;
1085	if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) {
1086	hinted.insert(currState.daddy);
1087	} else {
1088	// Fall back to granddaddy, which has already been processed (due
1089	// to BFS ordering) and cannot be a Sherman state.
1090	dstate_id_t granddaddy = info.states [currState.daddy].daddy;
1091	if (info.is_widestate(granddaddy)) {
1092	return;
1093	}
1094	assert(!info.is_sherman(granddaddy));
1095	hinted.insert(granddaddy);
1096	}
1097	} else {
1098	hinted = find_daddy_candidates(info, curr_id);
1099	}
1100
1101	for (const dstate_id_t &donor : hinted) {
1102	assert(donor < curr_id);
1103	u32 score = `0`;
1104
1105	if (info.is_sherman(donor) \|\| info.is_widestate(donor)) {
1106	continue;
1107	}
1108
1109	const dstate &donorState = info.states [donor];
1110	for (symbol_t s = `0`; s < alphasize; s++) {
1111	if (currState.next [s] == donorState.next [s]) {
1112	score++;
1113	}
1114	}
1115
1116	/ prefer lower ids to provide some stability amongst potential*
1117	* siblings */
1118	if (score > best_score \|\| (score == best_score && donor < best_daddy)) {
1119	best_daddy = donor;
1120	best_score = score;
1121
1122	if (score == alphasize) {
1123	break;
1124	}
1125	}
1126	}
1127
1128	currState.daddy = best_daddy;
1129	info.extra [curr_id].daddytaken = best_score;
1130	DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy,
1131	best_score, alphasize);
1132
1133	if (best_score + max_list_len < alphasize) {
1134	return; / ??? /
1135	}
1136
1137	if (info.is_sherman(currState.daddy)) {
1138	return;
1139	}
1140
1141	u32 self_loop_width = `0`;
1142	const dstate &curr_raw = info.states [curr_id];
1143	for (unsigned i = `0`; i < N_CHARS; i++) {
1144	if (curr_raw.next [info.alpha_remap [i]] == curr_id) {
1145	self_loop_width++;
1146	}
1147	}
1148
1149	if (self_loop_width > MAX_SHERMAN_SELF_LOOP) {
1150	DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id,
1151	self_loop_width);
1152	return;
1153	}
1154
1155	DEBUG_PRINTF("%hu is sherman\n", curr_id);
1156	info.extra [curr_id].shermanState = true;
1157	}
1158
1159	static
1160	bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
1161	symbol_t alphasize = raw.getImplAlphaSize();
1162	for (symbol_t s = `0`; s < alphasize; s++) {
1163	dstate_id_t succ_id = raw.states [root].next [s];
1164	if (succ_id == DEAD_STATE) {
1165	continue;
1166	}
1167
1168	const dstate &succ = raw.states [succ_id];
1169	for (symbol_t t = `0`; t < alphasize; t++) {
1170	if (succ.next [t] == root \|\| succ.next [t] == succ_id) {
1171	return true;
1172	}
1173	}
1174	}
1175	return false;
1176	}
1177
1178	/ \brief Test for only-one-predecessor property. /
1179	static
1180	bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size,
1181	const dstate_id_t curr_id, dstate_id_t &prev_id,
1182	symbol_t &prev_sym) {
1183	u32 num_prev = `0`;
1184	bool test_p1 = false;
1185
1186	for (symbol_t sym = `0`; sym < impl_alpha_size; sym++) {
1187	num_prev += info.states [curr_id].prev_vec [sym].size();
1188	DEBUG_PRINTF("Check symbol: %u, with its vector size: %lu\n", sym,
1189	info.states[curr_id].prev_vec[sym].size());
1190	if (num_prev == `1` && !test_p1) {
1191	test_p1 = true;
1192	prev_id = info.states [curr_id].prev_vec [sym].front(); //[0] for sure???
1193	prev_sym = sym;
1194	}
1195	}
1196
1197	return num_prev == `1`;
1198	}
1199
1200	/ \brief Test for same-failure-action property. /
1201	static
1202	bool check_property2(const raw_dfa &rdfa, const u16 impl_alpha_size,
1203	const dstate_id_t curr_id, const dstate_id_t prev_id,
1204	const symbol_t curr_sym, const symbol_t prev_sym) {
1205	const dstate &prevState = rdfa.states [prev_id];
1206	const dstate &currState = rdfa.states [curr_id];
1207
1208	// Compare transition tables between currState and prevState.
1209	u16 score = `0`;
1210	for (symbol_t sym = `0`; sym < impl_alpha_size; sym++) {
1211	if (currState.next [sym] == prevState.next [sym]
1212	&& sym != curr_sym && sym != prev_sym) {
1213	score++;
1214	}
1215	}
1216	DEBUG_PRINTF("(Score: %u/%u)\n", score, impl_alpha_size);
1217
1218	// 2 cases.
1219	if (curr_sym != prev_sym && score >= impl_alpha_size - `2`
1220	&& currState.next [prev_sym] == prevState.next [curr_sym]) {
1221	return true;
1222	} else if (curr_sym == prev_sym && score == impl_alpha_size - `1`) {
1223	return true;
1224	}
1225	return false;
1226	}
1227
1228	/ \brief Check whether adding current prev_id will generate a circle./
1229	static
1230	bool check_circle(const DfaPrevInfo &info, const u16 impl_alpha_size,
1231	const vector<dstate_id_t> &chain, const dstate_id_t id) {
1232	const vector<vector<dstate_id_t>> &prev_vec = info.states [id].prev_vec;
1233	const dstate_id_t tail = chain.front();
1234	for (symbol_t sym = `0`; sym < impl_alpha_size; sym++) {
1235	auto iter = find(prev_vec [sym].begin(), prev_vec [sym].end(), tail);
1236	if (iter != prev_vec [sym].end()) {
1237	// Tail is one of id's predecessors, forming a circle.
1238	return true;
1239	}
1240	}
1241	return false;
1242	}
1243
1244	/ \brief Returns a chain of state ids and symbols. /
1245	static
1246	dstate_id_t find_chain_candidate(const raw_dfa &rdfa, const DfaPrevInfo &info,
1247	const dstate_id_t curr_id,
1248	const symbol_t curr_sym,
1249	vector<dstate_id_t> &temp_chain) {
1250	//Record current id first.
1251	temp_chain.push_back(curr_id);
1252
1253	const u16 size = info.impl_alpha_size;
1254
1255	// Stop when entering root cloud.
1256	if (rdfa.start_anchored != DEAD_STATE
1257	&& is_cyclic_near(rdfa, rdfa.start_anchored)
1258	&& curr_id < size) {
1259	return curr_id;
1260	}
1261	if (rdfa.start_floating != DEAD_STATE
1262	&& curr_id >= rdfa.start_floating
1263	&& curr_id < rdfa.start_floating + size * `3`) {
1264	return curr_id;
1265	}
1266
1267	// Stop when reaching anchored or floating.
1268	if (curr_id == rdfa.start_anchored \|\| curr_id == rdfa.start_floating) {
1269	return curr_id;
1270	}
1271
1272	dstate_id_t prev_id = `0`;
1273	symbol_t prev_sym = ALPHABET_SIZE;
1274
1275	// Check the only-one-predecessor property.
1276	if (!check_property1(info, size, curr_id, prev_id, prev_sym)) {
1277	return curr_id;
1278	}
1279	assert(prev_id != `0` && prev_sym != ALPHABET_SIZE);
1280	DEBUG_PRINTF("(P1 test passed.)\n");
1281
1282	// Circle testing for the prev_id that passes the P1 test.
1283	if (check_circle(info, size, temp_chain, prev_id)) {
1284	DEBUG_PRINTF("(A circle is found.)\n");
1285	return curr_id;
1286	}
1287
1288	// Check the same-failure-action property.
1289	if (!check_property2(rdfa, size, curr_id, prev_id, curr_sym, prev_sym)) {
1290	return curr_id;
1291	}
1292	DEBUG_PRINTF("(P2 test passed.)\n");
1293
1294	if (!rdfa.states [prev_id].reports.empty()
1295	\|\| !rdfa.states [prev_id].reports_eod.empty()) {
1296	return curr_id;
1297	} else {
1298	return find_chain_candidate(rdfa, info, prev_id, prev_sym, temp_chain);
1299	}
1300	}
1301
1302	/ \brief Always store the non-extensible chains found till now. /
1303	static
1304	bool store_chain_longest(vector<vector<dstate_id_t>> &candidate_chain,
1305	vector<dstate_id_t> &temp_chain,
1306	dynamic_bitset<> &added, bool head_is_new) {
1307	dstate_id_t head = temp_chain.front();
1308	u16 length = temp_chain.size();
1309
1310	if (head_is_new) {
1311	DEBUG_PRINTF("This is a new chain!\n");
1312
1313	// Add this new chain and get it marked.
1314	candidate_chain.push_back(temp_chain);
1315
1316	for (auto &id : temp_chain) {
1317	DEBUG_PRINTF("(Marking s%u ...)\n", id);
1318	added.set(id);
1319	}
1320
1321	return true;
1322	}
1323
1324	DEBUG_PRINTF("This is a longer chain!\n");
1325	assert(!candidate_chain.empty());
1326
1327	auto chain = find_if(candidate_chain.begin(), candidate_chain.end(),
1328	[&](const vector<dstate_id_t> &it) {
1329	return it.front() == head;
1330	});
1331
1332	// Not a valid head, just do nothing and return.
1333	if (chain == candidate_chain.end()) {
1334	return false;
1335	}
1336
1337	u16 len = chain ->size();
1338
1339	if (length > len) {
1340	// Find out the branch node first.
1341	size_t piv = `0`;
1342	for (; piv < length; piv++) {
1343	if ((*chain)[piv] != temp_chain [piv]) {
1344	break;
1345	}
1346	}
1347
1348	for (size_t j = piv + `1`; j < length; j++) {
1349	DEBUG_PRINTF("(Marking s%u (new branch) ...)\n", temp_chain[j]);
1350	added.set(temp_chain [j]);
1351	}
1352
1353	// Unmark old unuseful nodes.
1354	// (Except the tail node, which is in working queue)
1355	for (size_t j = piv + `1`; j < verify_u16(len - `1`); j++) {
1356	DEBUG_PRINTF("(UnMarking s%u (old branch)...)\n", (*chain)[j]);
1357	added.reset((*chain)[j]);
1358	}
1359
1360	chain ->assign(temp_chain.begin(), temp_chain.end());
1361	}
1362
1363	return false;
1364	}
1365
1366	/ \brief Generate wide_symbol_chain from wide_state_chain. /
1367	static
1368	void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) {
1369	raw_dfa &rdfa = info.raw;
1370	assert(chain_tail.size() == info.wide_state_chain.size());
1371
1372	for (size_t i = `0`; i < info.wide_state_chain.size(); i++) {
1373	vector<dstate_id_t> &state_chain = info.wide_state_chain [i];
1374	vector<symbol_t> symbol_chain;
1375
1376	info.extra [state_chain [`0`]].wideHead = true;
1377	size_t width = state_chain.size() - `1`;
1378
1379	for (size_t j = `0`; j < width; j++) {
1380	dstate_id_t curr_id = state_chain [j];
1381	dstate_id_t next_id = state_chain [j + `1`];
1382
1383	// The last state of the chain doesn't belong to a wide state.
1384	info.extra [curr_id].wideState = true;
1385
1386	// The tail symbol comes from vector chain_tail;
1387	if (j == width - `1`) {
1388	symbol_chain.push_back(chain_tail [i]);
1389	} else {
1390	for (symbol_t sym = `0`; sym < info.impl_alpha_size; sym++) {
1391	if (rdfa.states [curr_id].next [sym] == next_id) {
1392	symbol_chain.push_back(sym);
1393	break;
1394	}
1395	}
1396	}
1397	}
1398
1399	info.wide_symbol_chain.push_back(symbol_chain);
1400	}
1401	}
1402
1403	/ \brief Find potential regions of states to be packed into wide states. /
1404	static
1405	void find_wide_state(dfa_info &info) {
1406	DfaPrevInfo dinfo(info.raw);
1407	queue<dstate_id_t> work_queue;
1408
1409	dynamic_bitset<> added(info.raw.states.size());
1410	for (auto it : dinfo.accepts) {
1411	work_queue.push(it);
1412	added.set(it);
1413	}
1414
1415	vector<symbol_t> chain_tail;
1416	while (!work_queue.empty()) {
1417	dstate_id_t curr_id = work_queue.front();
1418	work_queue.pop();
1419	DEBUG_PRINTF("Newly popped state: s%u\n", curr_id);
1420
1421	for (symbol_t sym = `0`; sym < dinfo.impl_alpha_size; sym++) {
1422	for (auto info_it : dinfo.states [curr_id].prev_vec [sym]) {
1423	if (added.test(info_it)) {
1424	DEBUG_PRINTF("(s%u already marked.)\n", info_it);
1425	continue;
1426	}
1427
1428	vector<dstate_id_t> temp_chain;
1429	// Head is a state failing the test of the chain.
1430	dstate_id_t head = find_chain_candidate(info.raw, dinfo,
1431	info_it, sym,
1432	temp_chain);
1433
1434	// A candidate chain should contain 8 substates at least.
1435	if (temp_chain.size() < `8`) {
1436	DEBUG_PRINTF("(Not enough substates, continue.)\n");
1437	continue;
1438	}
1439
1440	bool head_is_new = !added.test(head);
1441	if (head_is_new) {
1442	added.set(head);
1443	work_queue.push(head);
1444	DEBUG_PRINTF("Newly pushed state: s%u\n", head);
1445	}
1446
1447	reverse(temp_chain.begin(), temp_chain.end());
1448	temp_chain.push_back(curr_id);
1449
1450	assert(head > `0` && head == temp_chain.front());
1451	if (store_chain_longest(info.wide_state_chain, temp_chain,
1452	added, head_is_new)) {
1453	chain_tail.push_back(sym);
1454	}
1455	}
1456	}
1457	}
1458
1459	generate_symbol_chain(info, chain_tail);
1460	}
1461
1462	bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat,
1463	const CompileContext &cc,
1464	bool trust_daddy_states,
1465	set<dstate_id_t> *accel_states) {
1466	assert(!is_dead(raw));
1467
1468	dfa_info info(strat);
1469	bool using8bit = cc.grey.allowMcClellan8 && info.size() <= `256`;
1470
1471	if (!cc.streaming) { / TODO: work out if we can do the strip in streaming*
1472	* mode with our semantics */
1473	raw.stripExtraEodReports();
1474	}
1475
1476	bool has_eod_reports = raw.hasEodReports();
1477
1478	bytecode_ptr<NFA> nfa;
1479	if (!using8bit) {
1480	if (cc.grey.allowWideStates && strat.getType() == McClellan
1481	&& !is_triggered(raw.kind)) {
1482	find_wide_state(info);
1483	}
1484
1485	u16 total_daddy = `0`;
1486	bool any_cyclic_near_anchored_state
1487	= is_cyclic_near(raw, raw.start_anchored);
1488
1489	for (u32 i = `0`; i < info.size(); i++) {
1490	if (info.is_widestate(i)) {
1491	continue;
1492	}
1493	find_better_daddy(info, i, using8bit,
1494	any_cyclic_near_anchored_state,
1495	trust_daddy_states, cc.grey);
1496	total_daddy += info.extra [i].daddytaken;
1497	}
1498
1499	DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
1500	info.size() * info.impl_alpha_size, info.size(),
1501	info.impl_alpha_size);
1502
1503	nfa = mcclellanCompile16(info, cc, accel_states);
1504	} else {
1505	nfa = mcclellanCompile8(info, cc, accel_states);
1506	}
1507
1508	if (has_eod_reports) {
1509	nfa ->flags \|= NFA_ACCEPTS_EOD;
1510	}
1511
1512	DEBUG_PRINTF("compile done\n");
1513	return nfa;
1514	}
1515
1516	bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc,
1517	const ReportManager &rm,
1518	bool only_accel_init,
1519	bool trust_daddy_states,
1520	set<dstate_id_t> *accel_states) {
1521	mcclellan_build_strat mbs(raw, rm, only_accel_init);
1522	return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states);
1523	}
1524
1525	size_t mcclellan_build_strat::accelSize(void) const {
1526	return sizeof(AccelAux); / McClellan accel structures are just bare*
1527	* accelaux */
1528	}
1529
1530	u32 mcclellanStartReachSize(const raw_dfa *raw) {
1531	if (raw->states.size() < `2`) {
1532	return `0`;
1533	}
1534
1535	const dstate &ds = raw->states [raw->start_anchored];
1536
1537	CharReach out;
1538	for (unsigned i = `0`; i < N_CHARS; i++) {
1539	if (ds.next [raw->alpha_remap [i]] != DEAD_STATE) {
1540	out.set(i);
1541	}
1542	}
1543
1544	return out.count();
1545	}
1546
1547	bool has_accel_mcclellan(const NFA *nfa) {
1548	const mcclellan m = (const* mcclellan *)getImplNfa(nfa);
1549	return m->has_accel;
1550	}
1551
1552	} // namespace ue2
1553

Browse the source code of ClickHouse/contrib/hyperscan/src/nfa/mcclellancompile.cpp