limex_compile.cpp source code [ClickHouse/contrib/hyperscan/src/nfa/limex_compile.cpp]

1	/*
2	* Copyright (c) 2015-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/**
30	* \file
31	* \brief Main NFA build code.
32	*/
33
34	#include "limex_compile.h"
35
36	#include "accel.h"
37	#include "accelcompile.h"
38	#include "grey.h"
39	#include "limex_internal.h"
40	#include "limex_limits.h"
41	#include "nfa_build_util.h"
42	#include "nfagraph/ng_dominators.h"
43	#include "nfagraph/ng_holder.h"
44	#include "nfagraph/ng_limex_accel.h"
45	#include "nfagraph/ng_repeat.h"
46	#include "nfagraph/ng_squash.h"
47	#include "nfagraph/ng_util.h"
48	#include "ue2common.h"
49	#include "repeatcompile.h"
50	#include "util/alloc.h"
51	#include "util/bitutils.h"
52	#include "util/bytecode_ptr.h"
53	#include "util/charreach.h"
54	#include "util/compile_context.h"
55	#include "util/container.h"
56	#include "util/flat_containers.h"
57	#include "util/graph.h"
58	#include "util/graph_range.h"
59	#include "util/graph_small_color_map.h"
60	#include "util/order_check.h"
61	#include "util/unordered.h"
62	#include "util/verify_types.h"
63
64	#include <algorithm>
65	#include <cassert>
66	#include <cstddef>
67	#include <cstdlib>
68	#include <cstring>
69	#include <map>
70	#include <set>
71	#include <vector>
72
73	#include <boost/graph/breadth_first_search.hpp>
74	#include <boost/graph/depth_first_search.hpp>
75	#include <boost/range/adaptor/map.hpp>
76
77	using namespace std;
78	using boost::adaptors::map_values;
79
80	namespace ue2 {
81
82	/**
83	* \brief Special state index value meaning that the vertex will not
84	* participate in an (NFA/DFA/etc) implementation.
85	*/
86	static constexpr u32 NO_STATE = ~`0`;
87
88	namespace {
89
90	struct precalcAccel {
91	precalcAccel() : single_offset(`0`), double_offset(`0`) {}
92	CharReach single_cr;
93	u32 single_offset;
94
95	CharReach double_cr;
96	flat_set<pair<u8, u8>> double_lits; / double-byte accel stop literals /
97	u32 double_offset;
98	};
99
100	struct limex_accel_info {
101	unordered_set<NFAVertex> accelerable;
102	map<NFAStateSet, precalcAccel> precalc;
103	unordered_map<NFAVertex, flat_set<NFAVertex>> friends;
104	unordered_map<NFAVertex, AccelScheme> accel_map;
105	};
106
107	static
108	unordered_map<NFAVertex, NFAStateSet>
109	reindexByStateId(const unordered_map<NFAVertex, NFAStateSet> &in,
110	const NGHolder &g,
111	const unordered_map<NFAVertex, u32> &state_ids,
112	const u32 num_states) {
113	unordered_map<NFAVertex, NFAStateSet> out;
114	out.reserve(in.size());
115
116	vector<u32> indexToState(num_vertices(g), NO_STATE);
117	for (const auto &m : state_ids) {
118	u32 vert_id = g [m.first].index;
119	assert(vert_id < indexToState.size());
120	indexToState [vert_id] = m.second;
121	}
122
123	for (const auto &m : in) {
124	NFAVertex v = m.first;
125	assert(m.second.size() <= indexToState.size());
126
127	NFAStateSet mask(num_states);
128	for (size_t i = m.second.find_first(); i != m.second.npos;
129	i = m.second.find_next(i)) {
130	u32 state_id = indexToState [i];
131	if (state_id == NO_STATE) {
132	continue;
133	}
134	mask.set(state_id);
135	}
136	out.emplace(v, mask);
137	}
138
139	return out;
140	}
141
142	struct build_info {
143	build_info(NGHolder &hi,
144	const unordered_map<NFAVertex, u32> &states_in,
145	const vector<BoundedRepeatData> &ri,
146	const unordered_map<NFAVertex, NFAStateSet> &rsmi,
147	const unordered_map<NFAVertex, NFAStateSet> &smi,
148	const map<u32, set<NFAVertex>> &ti, const set<NFAVertex> &zi,
149	bool dai, bool sci, const CompileContext &cci, u32 nsi)
150	: h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs (nsi),
151	zombies(zi), do_accel(dai), stateCompression(sci), cc(cci),
152	num_states(nsi) {
153	for (const auto &br : repeats) {
154	for (auto v : br.tug_triggers) {
155	assert(state_ids.at(v) != NO_STATE);
156	tugs.set(state_ids.at(v));
157	}
158	br_cyclic [br.cyclic] =
159	BoundedRepeatSummary (br.repeatMin, br.repeatMax);
160	}
161
162	// Convert squash maps to be indexed by state index rather than
163	// vertex_index.
164	squashMap = reindexByStateId(smi, h, state_ids, num_states);
165	reportSquashMap = reindexByStateId(rsmi, h, state_ids, num_states);
166	}
167
168	NGHolder &h;
169	const unordered_map<NFAVertex, u32> &state_ids;
170	const vector<BoundedRepeatData> &repeats;
171
172	// Squash maps; state sets are indexed by state_id.
173	unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
174	unordered_map<NFAVertex, NFAStateSet> squashMap;
175
176	const map<u32, set<NFAVertex>> &tops;
177	NFAStateSet tugs;
178	map<NFAVertex, BoundedRepeatSummary> br_cyclic;
179	const set<NFAVertex> &zombies;
180	bool do_accel;
181	bool stateCompression;
182	const CompileContext &cc;
183	u32 num_states;
184	limex_accel_info accel;
185	};
186
187	#define LAST_LIMEX_NFA LIMEX_NFA_512
188
189	// Constants for scoring mechanism
190	const int SHIFT_COST = `10`; // limex: cost per shift mask
191	const int EXCEPTION_COST = `4`; // limex: per exception
192
193	template<NFAEngineType t> struct NFATraits { };
194
195	template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t,
196	NFAEngineType lb>
197	struct DISPATCH_BY_LIMEX_TYPE_INT {
198	static rv_t doOp(NFAEngineType i, const arg_t &arg) {
199	if (i == lb) {
200	return sfunc<lb>::call(arg);
201	} else {
202	return DISPATCH_BY_LIMEX_TYPE_INT<sfunc, rv_t, arg_t,
203	(NFAEngineType)(lb + `1`)>
204	::doOp(i, arg);
205	}
206	}
207	};
208
209	template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t>
210	struct DISPATCH_BY_LIMEX_TYPE_INT<sfunc, rv_t, arg_t,
211	(NFAEngineType)(LAST_LIMEX_NFA + `1`)> {
212	// dummy
213	static rv_t doOp(NFAEngineType, const arg_t &) {
214	assert(`0`);
215	throw std::logic_error ("Unreachable");
216	}
217	};
218
219	#define DISPATCH_BY_LIMEX_TYPE(i, op, arg) \
220	DISPATCH_BY_LIMEX_TYPE_INT<op, decltype(op<(NFAEngineType)0>::call(arg)), \
221	decltype(arg), (NFAEngineType)0>::doOp(i, arg)
222
223	// Given a number of states, find the size of the smallest container NFA it
224	// will fit in. We support NFAs of the following sizes: 32, 64, 128, 256, 384,
225	// 512.
226	size_t findContainerSize(size_t states) {
227	if (states > `256` && states <= `384`) {
228	return `384`;
229	}
230	return `1ULL` << (lg2(states - `1`) + `1`);
231	}
232
233	bool isLimitedTransition(int from, int to, int maxshift) {
234	int diff = to - from;
235
236	// within our shift?
237	if (diff < `0` \|\| diff > maxshift) {
238	return false;
239	}
240
241	// can't jump over a bollard
242	return (from & ~`63`) == (to & ~`63`);
243	}
244
245	// Fill a bit mask
246	template<class Mask>
247	void maskFill(Mask &m, u8 c) {
248	memset(&m, c, sizeof(m));
249	}
250
251	// Clear a bit mask.
252	template<class Mask>
253	void maskClear(Mask &m) {
254	memset(&m, `0`, sizeof(m));
255	}
256
257	template<class Mask>
258	u8 *maskGetByte(Mask &m, u32 bit) {
259	assert(bit < sizeof(m)*`8`);
260	u8 m8 = (u8 )&m;
261
262	return m8 + bit/`8`;
263	}
264
265	// Set a bit in a mask, starting from the little end.
266	template<class Mask>
267	void maskSetBit(Mask &m, const unsigned int bit) {
268	u8 *byte = maskGetByte(m, bit);
269	*byte \|= `1U` << (bit % `8`);
270	}
271
272	template<class Mask>
273	void maskSetBits(Mask &m, const NFAStateSet &bits) {
274	for (size_t i = bits.find_first(); i != bits.npos; i = bits.find_next(i)) {
275	maskSetBit(m, i);
276	}
277	}
278
279	template<class Mask>
280	bool isMaskZero(Mask &m) {
281	u8 m8 = (u8 )&m;
282	for (u32 i = `0`; i < sizeof(m); i++) {
283	if (m8[i]) {
284	return false;
285	}
286	}
287	return true;
288	}
289
290	// Sets an entire byte in a mask to the given value
291	template<class Mask>
292	void maskSetByte(Mask &m, const unsigned int idx, const char val) {
293	assert(idx < sizeof(m));
294	char m8 = (char* *)&m;
295	char &byte = m8[idx];
296	byte = val;
297	}
298
299	// Clear a bit in the mask, starting from the little end.
300	template<class Mask>
301	void maskClearBit(Mask &m, const u32 bit) {
302	u8 *byte = maskGetByte(m, bit);
303	*byte &= ~(`1U` << (bit % `8`));
304	}
305
306	/*
307	* Common code: the following code operates on parts of the NFA that are common
308	* to both the (defunct) General and the LimEx models.
309	*/
310
311	static
312	void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach,
313	vector<u8> &reachMap) {
314	const NGHolder &h = args.h;
315	const auto &state_ids = args.state_ids;
316
317	// Build a list of vertices with a state index assigned.
318	vector<NFAVertex> verts;
319	verts.reserve(args.num_states);
320	for (auto v : vertices_range(h)) {
321	if (state_ids.at(v) != NO_STATE) {
322	verts.push_back(v);
323	}
324	}
325
326	// Build a mapping from set-of-states -> reachability.
327	map<NFAStateSet, CharReach> mapping;
328	NFAStateSet states(args.num_states);
329	for (size_t i = `0`; i < N_CHARS; i++) {
330	states.reset();
331	for (auto v : verts) {
332	const CharReach &cr = h [v].char_reach;
333	if (cr.test(i)) {
334	u32 state_id = state_ids.at(v);
335	states.set(state_id);
336	}
337	}
338	mapping [states].set(i);
339	}
340
341	DEBUG_PRINTF("%zu distinct reachability entries\n", mapping.size());
342	assert(!mapping.empty());
343
344	// Build a vector of distinct reachability entries and a mapping from every
345	// character to one of those entries.
346
347	reach.reserve(mapping.size());
348	reachMap.assign(N_CHARS, `0`);
349
350	u8 num = `0`;
351	for (auto mi = mapping.begin(), me = mapping.end(); mi != me; ++mi, ++num) {
352	// Reach entry.
353	reach.push_back(mi ->first);
354
355	// Character mapping.
356	const CharReach &cr = mi ->second;
357	for (size_t i = cr.find_first(); i != CharReach::npos;
358	i = cr.find_next(i)) {
359	reachMap [i] = num;
360	}
361	}
362	}
363
364	struct AccelBuild {
365	AccelBuild() : v (NGHolder::null_vertex()), state(`0`), offset(`0`) {}
366	NFAVertex v;
367	u32 state;
368	u32 offset; // offset correction to apply
369	CharReach stop1; // single-byte accel stop literals
370	flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals
371	};
372
373	static
374	void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) {
375	u32 state = bi.state_ids.at(v);
376	build.v = v;
377	build.state = state;
378	NFAStateSet ss(bi.num_states);
379	ss.set(state);
380
381	if (!contains(bi.accel.precalc, ss)) {
382	build.stop1 = CharReach::dot();
383	} else {
384	const precalcAccel &precalc = bi.accel.precalc.at(ss);
385	if (precalc.double_lits.empty()) {
386	build.stop1 = precalc.single_cr;
387	build.offset = precalc.single_offset;
388	} else {
389	build.stop1 = precalc.double_cr;
390	build.stop2 = precalc.double_lits;
391	build.offset = precalc.double_offset;
392	}
393	}
394
395	#ifdef DEBUG
396	printf("state %u stop1:", state);
397	for (size_t j = build.stop1.find_first(); j != build.stop1.npos;
398	j = build.stop1.find_next(j)) {
399	printf(" 0x%02x", (u32)j);
400	}
401	printf("\n");
402	printf("state %u stop2:", state);
403	for (auto it = build.stop2.begin(); it != build.stop2.end(); ++it) {
404	printf(" 0x%02hhx%02hhx", it->first, it->second);
405	}
406	printf("\n");
407	#endif
408	}
409
410	// Generate all the data we need for at most NFA_MAX_ACCEL_STATES accelerable
411	// states.
412	static
413	void gatherAccelStates(const build_info &bi, vector<AccelBuild> &accelStates) {
414	for (auto v : bi.accel.accelerable) {
415	DEBUG_PRINTF("state %u is accelerable\n", bi.state_ids.at(v));
416	AccelBuild a;
417	findStopLiterals(bi, v, a);
418	accelStates.push_back(a);
419	}
420
421	// AccelStates should be sorted by state number, so that we build our accel
422	// masks correctly.
423	sort(accelStates.begin(), accelStates.end(),
424	[](const AccelBuild &a, const AccelBuild &b) {
425	return a.state < b.state;
426	});
427
428	// Our caller shouldn't have fed us too many accel states.
429	assert(accelStates.size() <= NFA_MAX_ACCEL_STATES);
430	if (accelStates.size() > NFA_MAX_ACCEL_STATES) {
431	accelStates.resize(NFA_MAX_ACCEL_STATES);
432	}
433	}
434
435	static
436	void combineAccel(const AccelBuild &in, AccelBuild &out) {
437	// stop1 and stop2 union
438	out.stop1 \|= in.stop1;
439	out.stop2.insert(in.stop2.begin(), in.stop2.end());
440	// offset is maximum of the two
441	out.offset = max(out.offset, in.offset);
442	}
443
444	static
445	void minimiseAccel(AccelBuild &build) {
446	flat_set<pair<u8, u8>> new_stop2;
447	// Any two-byte accels beginning with a one-byte accel should be removed
448	for (const auto &si : build.stop2) {
449	if (!build.stop1.test(si.first)) {
450	new_stop2.insert(si);
451	}
452	}
453	build.stop2 = new_stop2;
454	}
455
456	struct AccelAuxCmp {
457	explicit AccelAuxCmp(const AccelAux &aux_in) : aux(aux_in) {}
458	bool operator()(const AccelAux &a) const {
459	return !memcmp(&a, &aux, sizeof(AccelAux));
460	}
461	private:
462	const AccelAux &aux;
463	};
464
465	static
466	bool allow_wide_accel(NFAVertex v, const NGHolder &g, NFAVertex sds_or_proxy) {
467	return v == sds_or_proxy \|\| edge(g.start, v, g).second;
468	}
469
470	static
471	bool allow_wide_accel(const vector<NFAVertex> &vv, const NGHolder &g,
472	NFAVertex sds_or_proxy) {
473	for (auto v : vv) {
474	if (allow_wide_accel(v, g, sds_or_proxy)) {
475	return true;
476	}
477	}
478
479	return false;
480	}
481
482	// identify and mark states that we feel are accelerable (for a limex NFA)
483	/ Note: leftfix nfas allow accepts to be accelerated /
484	static
485	void nfaFindAccelSchemes(const NGHolder &g,
486	const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
487	unordered_map<NFAVertex, AccelScheme> *out) {
488	vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
489
490	NFAVertex sds_or_proxy = get_sds_or_proxy(g);
491
492	for (auto v : vertices_range(g)) {
493	// We want to skip any vertices that don't lead to at least one other
494	// (self-loops don't count) vertex.
495	if (!has_proper_successor(v, g)) {
496	DEBUG_PRINTF("skipping vertex %zu\n", g[v].index);
497	continue;
498	}
499
500	bool allow_wide = allow_wide_accel(v, g, sds_or_proxy);
501
502	AccelScheme as;
503	if (nfaCheckAccel(g, v, refined_cr, br_cyclic, &as, allow_wide)) {
504	DEBUG_PRINTF("graph vertex %zu is accelerable with offset %u.\n",
505	g[v].index, as.offset);
506	(*out)[v] = as;
507	}
508	}
509	}
510
511	struct fas_visitor : public boost::default_bfs_visitor {
512	fas_visitor(const unordered_map<NFAVertex, AccelScheme> &am_in,
513	unordered_map<NFAVertex, AccelScheme> *out_in)
514	: accel_map(am_in), out(out_in) {}
515
516	void discover_vertex(NFAVertex v, const NGHolder &) {
517	if (accel_map.find(v) != accel_map.end()) {
518	(*out)[v] = accel_map.find(v)->second;
519	}
520	if (out->size() >= NFA_MAX_ACCEL_STATES) {
521	throw this; / done /
522	}
523	}
524	const unordered_map<NFAVertex, AccelScheme> &accel_map;
525	unordered_map<NFAVertex, AccelScheme> *out;
526	};
527
528	static
529	void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops,
530	unordered_map<NFAVertex, AccelScheme> *accel_map) {
531	/ We want the NFA_MAX_ACCEL_STATES best acceleration states, everything*
532	* else should be ditched. We use a simple BFS to choose accel states near
533	* the start. */
534
535	vector<NFAEdge> tempEdges;
536	for (const auto &vv : tops \| map_values) {
537	for (NFAVertex v : vv) {
538	if (!edge(g.start, v, g).second) {
539	tempEdges.push_back(add_edge(g.start, v, g).first);
540	}
541	}
542	}
543
544	// Similarly, connect (start, startDs) if necessary.
545	if (!edge(g.start, g.startDs, g).second) {
546	NFAEdge e = add_edge(g.start, g.startDs, g);
547	tempEdges.push_back(e); // Remove edge later.
548	}
549
550	unordered_map<NFAVertex, AccelScheme> out;
551
552	try {
553	boost::breadth_first_search(g, g.start,
554	visitor(fas_visitor (*accel_map, &out))
555	.color_map(make_small_color_map(g)));
556	} catch (fas_visitor *) {
557	; / found max accel_states /
558	}
559
560	remove_edges(tempEdges, g);
561
562	assert(out.size() <= NFA_MAX_ACCEL_STATES);
563	accel_map->swap(out);
564	}
565
566	static
567	bool containsBadSubset(const limex_accel_info &accel,
568	const NFAStateSet &state_set, const u32 effective_sds) {
569	NFAStateSet subset(state_set.size());
570	for (size_t j = state_set.find_first(); j != state_set.npos;
571	j = state_set.find_next(j)) {
572	subset = state_set;
573	subset.reset(j);
574
575	if (effective_sds != NO_STATE && subset.count() == `1` &&
576	subset.test(effective_sds)) {
577	continue;
578	}
579
580	if (subset.any() && !contains(accel.precalc, subset)) {
581	return true;
582	}
583	}
584	return false;
585	}
586
587	static
588	bool is_too_wide(const AccelScheme &as) {
589	return as.cr.count() > MAX_MERGED_ACCEL_STOPS;
590	}
591
592	static
593	void fillAccelInfo(build_info &bi) {
594	if (!bi.do_accel) {
595	return;
596	}
597
598	NGHolder &g = bi.h;
599	limex_accel_info &accel = bi.accel;
600	unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map;
601	const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic;
602	const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids;
603	const u32 num_states = bi.num_states;
604
605	nfaFindAccelSchemes(g, br_cyclic, &accel_map);
606	filterAccelStates(g, bi.tops, &accel_map);
607
608	assert(accel_map.size() <= NFA_MAX_ACCEL_STATES);
609
610	vector<CharReach> refined_cr = reduced_cr(g, br_cyclic);
611
612	vector<NFAVertex> astates;
613	for (const auto &m : accel_map) {
614	astates.push_back(m.first);
615	}
616
617	NFAStateSet useful(num_states);
618	NFAStateSet state_set(num_states);
619	vector<NFAVertex> states;
620
621	NFAVertex sds_or_proxy = get_sds_or_proxy(g);
622	const u32 effective_sds = state_ids.at(sds_or_proxy);
623
624	/ for each subset of the accel keys need to find an accel scheme /
625	assert(astates.size() < `32`);
626	sort(astates.begin(), astates.end());
627
628	for (u32 i = `1`, i_end = `1U` << astates.size(); i < i_end; i++) {
629	DEBUG_PRINTF("saving info for accel %u\n", i);
630	states.clear();
631	state_set.reset();
632	for (u32 j = `0`, j_end = astates.size(); j < j_end; j++) {
633	if (i & (`1U` << j)) {
634	NFAVertex v = astates [j];
635	states.push_back(v);
636	state_set.set(state_ids.at(v));
637	}
638	}
639
640	if (containsBadSubset(accel, state_set, effective_sds)) {
641	DEBUG_PRINTF("accel %u has bad subset\n", i);
642	continue; / if a subset failed to build we would too /
643	}
644
645	const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy);
646
647	AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic,
648	allow_wide, true);
649	if (is_too_wide(as)) {
650	DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i,
651	as.cr.count(), MAX_MERGED_ACCEL_STOPS);
652	continue;
653	}
654
655	DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset,
656	as.double_offset);
657
658	precalcAccel &pa = accel.precalc [state_set];
659	pa.single_offset = as.offset;
660	pa.single_cr = as.cr;
661
662	if (as.double_byte.size() != `0`) {
663	pa.double_offset = as.double_offset;
664	pa.double_lits = as.double_byte;
665	pa.double_cr = as.double_cr;
666	}
667
668	useful \|= state_set;
669	}
670
671	for (const auto &m : accel_map) {
672	NFAVertex v = m.first;
673	const u32 state_id = state_ids.at(v);
674
675	/ if we we unable to make a scheme out of the state in any context,*
676	* there is not point marking it as accelerable */
677	if (!useful.test(state_id)) {
678	continue;
679	}
680
681	u32 offset = `0`;
682	state_set.reset();
683	state_set.set(state_id);
684
685	accel.accelerable.insert(v);
686	findAccelFriends(g, v, br_cyclic, offset, &accel.friends [v]);
687	}
688	}
689
690	/* The AccelAux structure has large alignment specified, and this makes some*
691	* compilers do odd things unless we specify a custom allocator. */
692	typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)>>
693	AccelAuxVector;
694
695	#define IMPOSSIBLE_ACCEL_MASK (~0U)
696
697	static
698	u32 getEffectiveAccelStates(const build_info &args,
699	const unordered_map<NFAVertex, NFAVertex> &dom_map,
700	u32 active_accel_mask,
701	const vector<AccelBuild> &accelStates) {
702	/ accelStates is indexed by the acceleration bit index and contains a*
703	* reference to the original vertex & state_id */
704
705	/ Cases to consider:*
706	*
707	* 1: Accel states a and b are on and b can squash a
708	* --> we can ignore a. This will result in a no longer being accurately
709	* modelled - we may miss escapes turning it off and we may also miss
710	* its successors being activated.
711	*
712	* 2: Accel state b is on but accel state a is off and a is .* and must be
713	* seen before b is reached (and would not be covered by (1))
714	* --> if a is squashable (or may die unexpectedly) we should continue
715	* as is
716	* --> if a is not squashable we can treat this as a+b or as a no accel,
717	* impossible case
718	* --> this case could be extended to handle non dot reaches by
719	* effectively creating something similar to squash masks for the
720	* reverse graph
721	*
722	*
723	* Other cases:
724	*
725	* 3: Accel states a and b are on but have incompatible reaches
726	* --> we should treat this as an impossible case. Actually, this case
727	* is unlikely to arise as we pick states with wide reaches to
728	* accelerate so an empty intersection is unlikely.
729	*
730	* Note: we need to be careful when dealing with accel states corresponding
731	* to bounded repeat cyclics - they may 'turn off' based on a max bound and
732	* so we may still require on earlier states to be accurately modelled.
733	*/
734	const NGHolder &h = args.h;
735
736	/ map from accel_id to mask of accel_ids that it is dominated by /
737	vector<u32> dominated_by(accelStates.size());
738
739	map<NFAVertex, u32> accel_id_map;
740	for (u32 accel_id = `0`; accel_id < accelStates.size(); accel_id++) {
741	NFAVertex v = accelStates [accel_id].v;
742	accel_id_map [v] = accel_id;
743	}
744
745	/ Note: we want a slightly less strict defn of dominate as skip edges*
746	* prevent .* 'truly' dominating */
747	for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) {
748	u32 accel_id = findAndClearLSB_32(&local_accel_mask);
749	assert(accel_id < accelStates.size());
750	NFAVertex v = accelStates [accel_id].v;
751	while (contains(dom_map, v) && dom_map.at(v)) {
752	v = dom_map.at(v);
753	if (contains(accel_id_map, v)) {
754	dominated_by [accel_id] \|= `1U` << accel_id_map [v];
755	}
756	/ TODO: could also look at inv_adj vertices to handle fan-in /
757	for (NFAVertex a : adjacent_vertices_range(v, h)) {
758	if (a == v \|\| !contains(accel_id_map, a)
759	\|\| a == accelStates [accel_id].v / not likely /) {
760	continue;
761	}
762	if (!is_subset_of(h [v].reports, h [a].reports)) {
763	continue;
764	}
765	auto v_succ = succs(v, h);
766	auto a_succ = succs(a, h);
767	if (is_subset_of(v_succ, a_succ)) {
768	dominated_by [accel_id] \|= `1U` << accel_id_map [a];
769	}
770	}
771	}
772	}
773
774	u32 may_turn_off = `0`; / BR with max bound, non-dots, squashed, etc /
775	for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) {
776	u32 accel_id = findAndClearLSB_32(&local_accel_mask);
777	NFAVertex v = accelStates [accel_id].v;
778	u32 state_id = accelStates [accel_id].state;
779	assert(contains(args.accel.accelerable, v));
780	if (!h [v].char_reach.all()) {
781	may_turn_off \|= `1U` << accel_id;
782	continue;
783	}
784	if (contains(args.br_cyclic, v)
785	&& args.br_cyclic.at(v).repeatMax != depth::infinity()) {
786	may_turn_off \|= `1U` << accel_id;
787	continue;
788	}
789	for (const auto &s_mask : args.squashMap \| map_values) {
790	if (!s_mask.test(state_id)) {
791	may_turn_off \|= `1U` << accel_id;
792	break;
793	}
794	}
795	for (const auto &s_mask : args.reportSquashMap \| map_values) {
796	if (!s_mask.test(state_id)) {
797	may_turn_off \|= `1U` << accel_id;
798	break;
799	}
800	}
801	}
802
803	/ Case 1: /
804	u32 ignored = `0`;
805	for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) {
806	u32 accel_id_b = findAndClearLSB_32(&local_accel_mask);
807	NFAVertex v = accelStates [accel_id_b].v;
808	if (!contains(args.squashMap, v)) {
809	continue;
810	}
811	assert(!contains(args.br_cyclic, v)
812	\|\| args.br_cyclic.at(v).repeatMax == depth::infinity());
813	NFAStateSet squashed = args.squashMap.at(v);
814	squashed.flip(); / default sense for mask of survivors /
815
816	for (u32 local_accel_mask2 = active_accel_mask; local_accel_mask2; ) {
817	u32 accel_id_a = findAndClearLSB_32(&local_accel_mask2);
818	if (squashed.test(accelStates [accel_id_a].state)) {
819	ignored \|= `1U` << accel_id_a;
820	}
821	}
822	}
823
824	/ Case 2: /
825	for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) {
826	u32 accel_id = findAndClearLSB_32(&local_accel_mask);
827
828	u32 stuck_dominators = dominated_by [accel_id] & ~may_turn_off;
829	if ((stuck_dominators & active_accel_mask) != stuck_dominators) {
830	DEBUG_PRINTF("only %08x on, but we require %08x\n",
831	active_accel_mask, stuck_dominators);
832	return IMPOSSIBLE_ACCEL_MASK;
833	}
834	}
835
836	if (ignored) {
837	DEBUG_PRINTF("in %08x, ignoring %08x\n", active_accel_mask, ignored);
838	}
839
840	return active_accel_mask & ~ignored;
841	}
842
843	static
844	void buildAccel(const build_info &args, NFAStateSet &accelMask,
845	NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec,
846	vector<u8> &accelTable) {
847	const limex_accel_info &accel = args.accel;
848
849	// Init, all zeroes.
850	accelMask.resize(args.num_states);
851	accelFriendsMask.resize(args.num_states);
852
853	if (!args.do_accel) {
854	return;
855	}
856
857	vector<AccelBuild> accelStates;
858	gatherAccelStates(args, accelStates);
859
860	if (accelStates.empty()) {
861	DEBUG_PRINTF("no accelerable states\n");
862	return;
863	}
864
865	const auto dom_map = findDominators(args.h);
866
867	// We have 2^n different accel entries, one for each possible
868	// combination of accelerable states.
869	assert(accelStates.size() < `32`);
870	const u32 accelCount = `1U` << accelStates.size();
871	assert(accelCount <= `256`);
872
873	// Set up a unioned AccelBuild for every possible combination of the set
874	// bits in accelStates.
875	vector<AccelBuild> accelOuts(accelCount);
876	vector<u32> effective_accel_set;
877	effective_accel_set.push_back(`0`); / empty is effectively empty /
878
879	for (u32 i = `1`; i < accelCount; i++) {
880	u32 effective_i = getEffectiveAccelStates(args, dom_map, i,
881	accelStates);
882	effective_accel_set.push_back(effective_i);
883
884	if (effective_i == IMPOSSIBLE_ACCEL_MASK) {
885	DEBUG_PRINTF("this combination of accel states is not possible\n");
886	accelOuts [i].stop1 = CharReach::dot();
887	continue;
888	}
889
890	while (effective_i) {
891	u32 base_accel_state = findAndClearLSB_32(&effective_i);
892	combineAccel(accelStates [base_accel_state], accelOuts [i]);
893	}
894	minimiseAccel(accelOuts [i]);
895	}
896
897	accelTable.resize(accelCount);
898
899	// We dedupe our AccelAux structures here, so that we only write one copy
900	// of each unique accel scheme into the bytecode, using the accelTable as
901	// an index.
902
903	// Start with the NONE case.
904	auxvec.push_back(AccelAux ());
905	memset(&auxvec [`0`], `0`, sizeof(AccelAux));
906	auxvec [`0`].accel_type = ACCEL_NONE; // no states on.
907
908	AccelAux aux;
909	for (u32 i = `1`; i < accelCount; i++) {
910	memset(&aux, `0`, sizeof(aux));
911
912	NFAStateSet effective_states(args.num_states);
913	u32 effective_i = effective_accel_set [i];
914
915	AccelInfo ainfo;
916	ainfo.double_offset = accelOuts [i].offset;
917	ainfo.double_stop1 = accelOuts [i].stop1;
918	ainfo.double_stop2 = accelOuts [i].stop2;
919
920	if (effective_i != IMPOSSIBLE_ACCEL_MASK) {
921	while (effective_i) {
922	u32 base_accel_id = findAndClearLSB_32(&effective_i);
923	effective_states.set(accelStates [base_accel_id].state);
924	}
925
926	if (contains(accel.precalc, effective_states)) {
927	const auto &precalc = accel.precalc.at(effective_states);
928	ainfo.single_offset = precalc.single_offset;
929	ainfo.single_stops = precalc.single_cr;
930	}
931	}
932
933	buildAccelAux(ainfo, &aux);
934
935	// FIXME: We may want a faster way to find AccelAux structures that
936	// we've already built before.
937	auto it = find_if(auxvec.begin(), auxvec.end(), AccelAuxCmp (aux));
938	if (it == auxvec.end()) {
939	accelTable [i] = verify_u8(auxvec.size());
940	auxvec.push_back(aux);
941	} else {
942	accelTable [i] = verify_u8(it - auxvec.begin());
943	}
944	}
945
946	DEBUG_PRINTF("%zu unique accel schemes (of max %u)\n", auxvec.size(),
947	accelCount);
948
949	// XXX: ACCEL_NONE?
950	for (const auto &as : accelStates) {
951	NFAVertex v = as.v;
952	assert(v && args.state_ids.at(v) == as.state);
953
954	accelMask.set(as.state);
955	accelFriendsMask.set(as.state);
956
957	if (!contains(accel.friends, v)) {
958	continue;
959	}
960	// Add the friends of this state to the friends mask.
961	const flat_set<NFAVertex> &friends = accel.friends.at(v);
962	DEBUG_PRINTF("%u has %zu friends\n", as.state, friends.size());
963	for (auto friend_v : friends) {
964	u32 state_id = args.state_ids.at(friend_v);
965	DEBUG_PRINTF("--> %u\n", state_id);
966	accelFriendsMask.set(state_id);
967	}
968	}
969	}
970
971	static
972	u32 addSquashMask(const build_info &args, const NFAVertex &v,
973	vector<NFAStateSet> &squash) {
974	auto sit = args.reportSquashMap.find(v);
975	if (sit == args.reportSquashMap.end()) {
976	return MO_INVALID_IDX;
977	}
978
979	// This state has a squash mask. Paw through the existing vector to
980	// see if we've already seen it, otherwise add a new one.
981	auto it = find(squash.begin(), squash.end(), sit ->second);
982	if (it != squash.end()) {
983	return verify_u32(std::distance(squash.begin(), it));
984	}
985	u32 idx = verify_u32(squash.size());
986	squash.push_back(sit ->second);
987	return idx;
988	}
989
990	using ReportListCache = ue2_unordered_map<vector<ReportID>, u32>;
991
992	static
993	u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports,
994	ReportListCache &reports_cache) {
995	assert(!r.empty());
996
997	vector<ReportID> my_reports(begin(r), end(r));
998	my_reports.push_back(MO_INVALID_IDX); // sentinel
999
1000	auto cache_it = reports_cache.find(my_reports);
1001	if (cache_it != end(reports_cache)) {
1002	u32 offset = cache_it ->second;
1003	DEBUG_PRINTF("reusing cached report list at %u\n", offset);
1004	return offset;
1005	}
1006
1007	auto it = search(begin(reports), end(reports), begin(my_reports),
1008	end(my_reports));
1009	if (it != end(reports)) {
1010	u32 offset = verify_u32(std::distance(begin(reports), it));
1011	DEBUG_PRINTF("reusing found report list at %u\n", offset);
1012	return offset;
1013	}
1014
1015	u32 offset = verify_u32(reports.size());
1016	insert(&reports, reports.end(), my_reports);
1017	reports_cache.emplace(move(my_reports), offset);
1018	return offset;
1019	}
1020
1021	static
1022	void buildAcceptsList(const build_info &args, ReportListCache &reports_cache,
1023	vector<NFAVertex> &verts, vector<NFAAccept> &accepts,
1024	vector<ReportID> &reports, vector<NFAStateSet> &squash) {
1025	if (verts.empty()) {
1026	return;
1027	}
1028
1029	DEBUG_PRINTF("building accept lists for %zu states\n", verts.size());
1030
1031	auto cmp_state_id = [&args](NFAVertex a, NFAVertex b) {
1032	u32 a_state = args.state_ids.at(a);
1033	u32 b_state = args.state_ids.at(b);
1034	assert(a_state != b_state \|\| a == b);
1035	return a_state < b_state;
1036	};
1037
1038	sort(begin(verts), end(verts), cmp_state_id);
1039
1040	const NGHolder &h = args.h;
1041	for (const auto &v : verts) {
1042	DEBUG_PRINTF("state=%u, reports: [%s]\n", args.state_ids.at(v),
1043	as_string_list(h[v].reports).c_str());
1044	NFAAccept a;
1045	memset(&a, `0`, sizeof(a));
1046	assert(!h[v].reports.empty());
1047	if (h [v].reports.size() == `1`) {
1048	a.single_report = `1`;
1049	a.reports = *h [v].reports.begin();
1050	} else {
1051	a.single_report = `0`;
1052	a.reports = addReports(h [v].reports, reports, reports_cache);
1053	}
1054	a.squash = addSquashMask(args, v, squash);
1055	accepts.push_back(move(a));
1056	}
1057	}
1058
1059	static
1060	void buildAccepts(const build_info &args, ReportListCache &reports_cache,
1061	NFAStateSet &acceptMask, NFAStateSet &acceptEodMask,
1062	vector<NFAAccept> &accepts, vector<NFAAccept> &acceptsEod,
1063	vector<ReportID> &reports, vector<NFAStateSet> &squash) {
1064	const NGHolder &h = args.h;
1065
1066	acceptMask.resize(args.num_states);
1067	acceptEodMask.resize(args.num_states);
1068
1069	vector<NFAVertex> verts_accept, verts_accept_eod;
1070
1071	for (auto v : vertices_range(h)) {
1072	u32 state_id = args.state_ids.at(v);
1073
1074	if (state_id == NO_STATE \|\| !is_match_vertex(v, h)) {
1075	continue;
1076	}
1077
1078	if (edge(v, h.accept, h).second) {
1079	acceptMask.set(state_id);
1080	verts_accept.push_back(v);
1081	} else {
1082	assert(edge(v, h.acceptEod, h).second);
1083	acceptEodMask.set(state_id);
1084	verts_accept_eod.push_back(v);
1085	}
1086	}
1087
1088	buildAcceptsList(args, reports_cache, verts_accept, accepts, reports,
1089	squash);
1090	buildAcceptsList(args, reports_cache, verts_accept_eod, acceptsEod, reports,
1091	squash);
1092	}
1093
1094	static
1095	void buildTopMasks(const build_info &args, vector<NFAStateSet> &topMasks) {
1096	if (args.tops.empty()) {
1097	return; // No tops, probably an outfix NFA.
1098	}
1099
1100	u32 numMasks = args.tops.rbegin()->first + `1`; // max mask index
1101	DEBUG_PRINTF("we have %u top masks\n", numMasks);
1102
1103	topMasks.assign(numMasks, NFAStateSet (args.num_states)); // all zeroes
1104
1105	for (const auto &m : args.tops) {
1106	u32 mask_idx = m.first;
1107	for (NFAVertex v : m.second) {
1108	u32 state_id = args.state_ids.at(v);
1109	DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx);
1110
1111	assert(mask_idx < numMasks);
1112	assert(state_id != NO_STATE);
1113
1114	topMasks [mask_idx].set(state_id);
1115	}
1116	}
1117	}
1118
1119	static
1120	u32 uncompressedStateSize(u32 num_states) {
1121	// Number of bytes required to store all our states.
1122	return ROUNDUP_N(num_states, `8`)/`8`;
1123	}
1124
1125	static
1126	u32 compressedStateSize(const NGHolder &h, const NFAStateSet &maskedStates,
1127	const unordered_map<NFAVertex, u32> &state_ids) {
1128	// Shrink state requirement to enough to fit the compressed largest reach.
1129	vector<u32> allreach(N_CHARS, `0`);
1130
1131	for (auto v : vertices_range(h)) {
1132	u32 i = state_ids.at(v);
1133	if (i == NO_STATE \|\| maskedStates.test(i)) {
1134	continue;
1135	}
1136	const CharReach &cr = h [v].char_reach;
1137	for (size_t j = cr.find_first(); j != cr.npos; j = cr.find_next(j)) {
1138	allreach [j]++; // state 'i' can reach character 'j'.
1139	}
1140	}
1141
1142	u32 maxreach = *max_element(allreach.begin(), allreach.end());
1143	DEBUG_PRINTF("max reach is %u\n", maxreach);
1144	return (maxreach + `7`) / `8`;
1145	}
1146
1147	static
1148	bool hasSquashableInitDs(const build_info &args) {
1149	const NGHolder &h = args.h;
1150
1151	if (args.squashMap.empty()) {
1152	DEBUG_PRINTF("squash map is empty\n");
1153	return false;
1154	}
1155
1156	NFAStateSet initDs(args.num_states);
1157	u32 sds_state = args.state_ids.at(h.startDs);
1158	if (sds_state == NO_STATE) {
1159	DEBUG_PRINTF("no states in initds\n");
1160	return false;
1161	}
1162
1163	initDs.set(sds_state);
1164
1165	/ TODO: simplify /
1166
1167	// Check normal squash map.
1168	for (const auto &m : args.squashMap) {
1169	DEBUG_PRINTF("checking squash mask for state %u\n",
1170	args.state_ids.at(m.first));
1171	NFAStateSet squashed = ~(m.second); // flip mask
1172	assert(squashed.size() == initDs.size());
1173	if (squashed.intersects(initDs)) {
1174	DEBUG_PRINTF("state %u squashes initds states\n",
1175	args.state_ids.at(m.first));
1176	return true;
1177	}
1178	}
1179
1180	// Check report squash map.
1181	for (const auto &m : args.reportSquashMap) {
1182	DEBUG_PRINTF("checking report squash mask for state %u\n",
1183	args.state_ids.at(m.first));
1184	NFAStateSet squashed = ~(m.second); // flip mask
1185	assert(squashed.size() == initDs.size());
1186	if (squashed.intersects(initDs)) {
1187	DEBUG_PRINTF("state %u squashes initds states\n",
1188	args.state_ids.at(m.first));
1189	return true;
1190	}
1191	}
1192
1193	return false;
1194	}
1195
1196	static
1197	bool hasInitDsStates(const NGHolder &h,
1198	const unordered_map<NFAVertex, u32> &state_ids) {
1199	if (state_ids.at(h.startDs) != NO_STATE) {
1200	return true;
1201	}
1202
1203	if (is_triggered(h) && state_ids.at(h.start) != NO_STATE) {
1204	return true;
1205	}
1206
1207	return false;
1208	}
1209
1210	static
1211	void findMaskedCompressionStates(const build_info &args,
1212	NFAStateSet &maskedStates) {
1213	const NGHolder &h = args.h;
1214	if (!generates_callbacks(h)) {
1215	// Rose leftfixes can mask out initds, which is worth doing if it will
1216	// stay on forever (i.e. it's not squashable).
1217	u32 sds_i = args.state_ids.at(h.startDs);
1218	if (sds_i != NO_STATE && !hasSquashableInitDs(args)) {
1219	maskedStates.set(sds_i);
1220	DEBUG_PRINTF("masking out initds state\n");
1221	}
1222	}
1223
1224	// Suffixes and outfixes can mask out leaf states, which should all be
1225	// accepts. Right now we can only do this when there is nothing in initDs,
1226	// as we switch that on unconditionally in the expand call.
1227	if (!inspects_states_for_accepts(h)
1228	&& !hasInitDsStates(h, args.state_ids)) {
1229	NFAStateSet nonleaf(args.num_states);
1230	for (const auto &e : edges_range(h)) {
1231	u32 from = args.state_ids.at(source(e, h));
1232	u32 to = args.state_ids.at(target(e, h));
1233	if (from == NO_STATE) {
1234	continue;
1235	}
1236
1237	// We cannot mask out EOD accepts, as they have to perform an
1238	// action after they're switched on that may be delayed until the
1239	// next stream write.
1240	if (to == NO_STATE && target(e, h) != h.acceptEod) {
1241	continue;
1242	}
1243
1244	nonleaf.set(from);
1245	}
1246
1247	for (u32 i = `0`; i < args.num_states; i++) {
1248	if (!nonleaf.test(i)) {
1249	maskedStates.set(i);
1250	}
1251	}
1252
1253	DEBUG_PRINTF("masking out %zu leaf states\n", maskedStates.count());
1254	}
1255	}
1256
1257	/* \brief Sets a given flag in the LimEx structure. /
1258	template<class implNFA_t>
1259	static
1260	void setLimexFlag(implNFA_t *limex, u32 flag) {
1261	assert(flag);
1262	assert((flag & (flag - `1`)) == `0`);
1263	limex->flags \|= flag;
1264	}
1265
1266	/* \brief Sets a given flag in the NFA structure /
1267	static
1268	void setNfaFlag(NFA *nfa, u32 flag) {
1269	assert(flag);
1270	assert((flag & (flag - `1`)) == `0`);
1271	nfa->flags \|= flag;
1272	}
1273
1274	// Some of our NFA types support compressing the state down if we're not using
1275	// all of it.
1276	template<class implNFA_t>
1277	static
1278	void findStateSize(const build_info &args, implNFA_t *limex) {
1279	// Nothing is masked off by default.
1280	maskFill(limex->compressMask, `0xff`);
1281
1282	u32 sizeUncompressed = uncompressedStateSize(args.num_states);
1283	assert(sizeUncompressed <= sizeof(limex->compressMask));
1284
1285	if (!args.stateCompression) {
1286	DEBUG_PRINTF("compression disabled, uncompressed state size %u\n",
1287	sizeUncompressed);
1288	limex->stateSize = sizeUncompressed;
1289	return;
1290	}
1291
1292	NFAStateSet maskedStates(args.num_states);
1293	findMaskedCompressionStates(args, maskedStates);
1294
1295	u32 sizeCompressed = compressedStateSize(args.h, maskedStates, args.state_ids);
1296	assert(sizeCompressed <= sizeof(limex->compressMask));
1297
1298	DEBUG_PRINTF("compressed=%u, uncompressed=%u\n", sizeCompressed,
1299	sizeUncompressed);
1300
1301	// Must be at least a 10% saving.
1302	if ((sizeCompressed * `100`) <= (sizeUncompressed * `90`)) {
1303	DEBUG_PRINTF("using compression, state size %u\n",
1304	sizeCompressed);
1305	setLimexFlag(limex, LIMEX_FLAG_COMPRESS_STATE);
1306	limex->stateSize = sizeCompressed;
1307
1308	if (maskedStates.any()) {
1309	DEBUG_PRINTF("masking %zu states\n", maskedStates.count());
1310	setLimexFlag(limex, LIMEX_FLAG_COMPRESS_MASKED);
1311	for (size_t i = maskedStates.find_first(); i != NFAStateSet::npos;
1312	i = maskedStates.find_next(i)) {
1313	maskClearBit(limex->compressMask, i);
1314	}
1315	}
1316	} else {
1317	DEBUG_PRINTF("not using compression, state size %u\n",
1318	sizeUncompressed);
1319	limex->stateSize = sizeUncompressed;
1320	}
1321	}
1322
1323	/*
1324	* LimEx NFA: code for building NFAs in the Limited+Exceptional model. Most
1325	* transitions are limited, with transitions outside the constraints of our
1326	* shifts taken care of as 'exceptions'. Exceptions are also used to handle
1327	* accepts and squash behaviour.
1328	*/
1329
1330	/**
1331	* \brief Prototype exception class.
1332	*
1333	* Used to build up the map of exceptions before being converted to real
1334	* NFAException32 (etc) structures.
1335	*/
1336	struct ExceptionProto {
1337	u32 reports_index = MO_INVALID_IDX;
1338	NFAStateSet succ_states;
1339	NFAStateSet squash_states;
1340	u32 repeat_index = MO_INVALID_IDX;
1341	enum LimExTrigger trigger = LIMEX_TRIGGER_NONE;
1342	enum LimExSquash squash = LIMEX_SQUASH_NONE;
1343
1344	explicit ExceptionProto(u32 num_states)
1345	: succ_states (num_states), squash_states (num_states) {
1346	// Squash states are represented as the set of states to leave on,
1347	// so we start with all-ones.
1348	squash_states.set();
1349	}
1350
1351	bool operator<(const ExceptionProto &b) const {
1352	const ExceptionProto &a = *this;
1353
1354	ORDER_CHECK(reports_index);
1355	ORDER_CHECK(repeat_index);
1356	ORDER_CHECK(trigger);
1357	ORDER_CHECK(squash);
1358	ORDER_CHECK(succ_states);
1359	ORDER_CHECK(squash_states);
1360
1361	return false;
1362	}
1363	};
1364
1365	static
1366	u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache,
1367	const unordered_set<NFAEdge> &exceptional,
1368	map<ExceptionProto, vector<u32>> &exceptionMap,
1369	vector<ReportID> &reportList) {
1370	const NGHolder &h = args.h;
1371	const u32 num_states = args.num_states;
1372	u32 exceptionCount = `0`;
1373
1374	unordered_map<NFAVertex, u32> pos_trigger;
1375	unordered_map<NFAVertex, u32> tug_trigger;
1376
1377	for (u32 i = `0`; i < args.repeats.size(); i++) {
1378	const BoundedRepeatData &br = args.repeats [i];
1379	assert(!contains(pos_trigger, br.pos_trigger));
1380	pos_trigger [br.pos_trigger] = i;
1381	for (auto v : br.tug_triggers) {
1382	assert(!contains(tug_trigger, v));
1383	tug_trigger [v] = i;
1384	}
1385	}
1386
1387	for (auto v : vertices_range(h)) {
1388	const u32 i = args.state_ids.at(v);
1389
1390	if (i == NO_STATE) {
1391	continue;
1392	}
1393
1394	bool addMe = false;
1395	ExceptionProto e(num_states);
1396
1397	if (edge(v, h.accept, h).second && generates_callbacks(h)) {
1398	/ if nfa is never used to produce callbacks, no need to mark*
1399	* states as exceptional */
1400	const auto &reports = h [v].reports;
1401
1402	DEBUG_PRINTF("state %u is exceptional due to accept "
1403	"(%zu reports)\n", i, reports.size());
1404
1405	if (reports.empty()) {
1406	e.reports_index = MO_INVALID_IDX;
1407	} else {
1408	e.reports_index =
1409	addReports(reports, reportList, reports_cache);
1410	}
1411
1412	// We may be applying a report squash too.
1413	auto mi = args.reportSquashMap.find(v);
1414	if (mi != args.reportSquashMap.end()) {
1415	DEBUG_PRINTF("report squashes states\n");
1416	assert(e.squash_states.size() == mi->second.size());
1417	e.squash_states = mi ->second;
1418	e.squash = LIMEX_SQUASH_REPORT;
1419	}
1420
1421	addMe = true;
1422	}
1423
1424	if (contains(pos_trigger, v)) {
1425	u32 repeat_index = pos_trigger [v];
1426	assert(e.trigger == LIMEX_TRIGGER_NONE);
1427	e.trigger = LIMEX_TRIGGER_POS;
1428	e.repeat_index = repeat_index;
1429	DEBUG_PRINTF("state %u has pos trigger for repeat %u\n", i,
1430	repeat_index);
1431	addMe = true;
1432	}
1433
1434	if (contains(tug_trigger, v)) {
1435	u32 repeat_index = tug_trigger [v];
1436	assert(e.trigger == LIMEX_TRIGGER_NONE);
1437	e.trigger = LIMEX_TRIGGER_TUG;
1438	e.repeat_index = repeat_index;
1439
1440	// TUG triggers can squash the preceding cyclic state.
1441	u32 cyclic = args.state_ids.at(args.repeats [repeat_index].cyclic);
1442	e.squash_states.reset(cyclic);
1443	e.squash = LIMEX_SQUASH_TUG;
1444	DEBUG_PRINTF("state %u has tug trigger for repeat %u, can squash "
1445	"state %u\n", i, repeat_index, cyclic);
1446	addMe = true;
1447	}
1448
1449	// are we a non-limited transition?
1450	for (const auto &oe : out_edges_range(v, h)) {
1451	if (contains(exceptional, oe)) {
1452	NFAVertex w = target(oe, h);
1453	u32 w_idx = args.state_ids.at(w);
1454	assert(w_idx != NO_STATE);
1455	e.succ_states.set(w_idx);
1456	DEBUG_PRINTF("exceptional transition %u->%u\n", i, w_idx);
1457	addMe = true;
1458	}
1459	}
1460
1461	// do we lead SOLELY to a squasher state? (we use the successors as
1462	// a proxy for the out-edge here, so there must be only one for us
1463	// to do this safely)
1464	/ The above comment is IMHO bogus and would result in all squashing*
1465	* being disabled around stars */
1466	if (e.trigger != LIMEX_TRIGGER_TUG) {
1467	for (auto w : adjacent_vertices_range(v, h)) {
1468	if (w == v) {
1469	continue;
1470	}
1471	u32 j = args.state_ids.at(w);
1472	if (j == NO_STATE) {
1473	continue;
1474	}
1475	DEBUG_PRINTF("we are checking if succ %u is a squasher\n", j);
1476	auto mi = args.squashMap.find(w);
1477	if (mi != args.squashMap.end()) {
1478	DEBUG_PRINTF("squasher edge (%u, %u)\n", i, j);
1479	DEBUG_PRINTF("e.squash_states.size() == %zu, "
1480	"mi->second.size() = %zu\n",
1481	e.squash_states.size(), mi->second.size());
1482	assert(e.squash_states.size() == mi->second.size());
1483	e.squash_states = mi ->second;
1484
1485	// NOTE: this might be being combined with the report
1486	// squashing above.
1487
1488	e.squash = LIMEX_SQUASH_CYCLIC;
1489	DEBUG_PRINTF("squashing succ %u (turns off %zu states)\n",
1490	j, mi->second.size() - mi->second.count());
1491	addMe = true;
1492	}
1493	}
1494	}
1495
1496	if (addMe) {
1497	// Add 'e' if it isn't in the map, and push state i on to its list
1498	// of states.
1499	assert(e.succ_states.size() == num_states);
1500	assert(e.squash_states.size() == num_states);
1501	exceptionMap [e].push_back(i);
1502	exceptionCount++;
1503	}
1504	}
1505
1506	DEBUG_PRINTF("%u exceptions found (%zu unique)\n", exceptionCount,
1507	exceptionMap.size());
1508	return exceptionCount;
1509	}
1510
1511	static
1512	u32 depth_to_u32(const depth &d) {
1513	assert(d.is_reachable());
1514	if (d.is_infinite()) {
1515	return REPEAT_INF;
1516	}
1517
1518	u32 d_val = d;
1519	assert(d_val < REPEAT_INF);
1520	return d_val;
1521	}
1522
1523	static
1524	bool isExceptionalTransition(u32 from, u32 to, const build_info &args,
1525	u32 maxShift) {
1526	if (!isLimitedTransition(from, to, maxShift)) {
1527	return true;
1528	}
1529
1530	// All transitions out of a tug trigger are exceptional.
1531	if (args.tugs.test(from)) {
1532	return true;
1533	}
1534	return false;
1535	}
1536
1537	static
1538	u32 findMaxVarShift(const build_info &args, u32 nShifts) {
1539	const NGHolder &h = args.h;
1540	u32 shiftMask = `0`;
1541	for (const auto &e : edges_range(h)) {
1542	u32 from = args.state_ids.at(source(e, h));
1543	u32 to = args.state_ids.at(target(e, h));
1544	if (from == NO_STATE \|\| to == NO_STATE) {
1545	continue;
1546	}
1547	if (!isExceptionalTransition(from, to, args, MAX_SHIFT_AMOUNT)) {
1548	shiftMask \|= (`1UL` << (to - from));
1549	}
1550	}
1551
1552	u32 maxVarShift = `0`;
1553	for (u32 shiftCnt = `0`; shiftMask != `0` && shiftCnt < nShifts; shiftCnt++) {
1554	maxVarShift = findAndClearLSB_32(&shiftMask);
1555	}
1556
1557	return maxVarShift;
1558	}
1559
1560	static
1561	int getLimexScore(const build_info &args, u32 nShifts) {
1562	const NGHolder &h = args.h;
1563	u32 maxVarShift = nShifts;
1564	int score = `0`;
1565
1566	score += SHIFT_COST * nShifts;
1567	maxVarShift = findMaxVarShift(args, nShifts);
1568
1569	NFAStateSet exceptionalStates(args.num_states);
1570	for (const auto &e : edges_range(h)) {
1571	u32 from = args.state_ids.at(source(e, h));
1572	u32 to = args.state_ids.at(target(e, h));
1573	if (from == NO_STATE \|\| to == NO_STATE) {
1574	continue;
1575	}
1576	if (isExceptionalTransition(from, to, args, maxVarShift)) {
1577	exceptionalStates.set(from);
1578	}
1579	}
1580	score += EXCEPTION_COST * exceptionalStates.count();
1581	return score;
1582	}
1583
1584	// This function finds the best shift scheme with highest score
1585	// Returns number of shifts and score calculated for appropriate scheme
1586	// Returns zero if no appropriate scheme was found
1587	static
1588	u32 findBestNumOfVarShifts(const build_info &args,
1589	int bestScoreRet = nullptr*) {
1590	u32 bestNumOfVarShifts = `0`;
1591	int bestScore = INT_MAX;
1592	for (u32 shiftCount = `1`; shiftCount <= MAX_SHIFT_COUNT; shiftCount++) {
1593	int score = getLimexScore(args, shiftCount);
1594	if (score < bestScore) {
1595	bestScore = score;
1596	bestNumOfVarShifts = shiftCount;
1597	}
1598	}
1599	if (bestScoreRet != nullptr) {
1600	*bestScoreRet = bestScore;
1601	}
1602	return bestNumOfVarShifts;
1603	}
1604
1605	static
1606	bool cannotDie(const build_info &args, const set<NFAVertex> &tops) {
1607	const auto &h = args.h;
1608
1609	// When this top is activated, all of the vertices in 'tops' are switched
1610	// on. If any of those lead to a graph that cannot die, then this top
1611	// cannot die.
1612
1613	// For each top, we use a depth-first search to traverse the graph from the
1614	// top, looking for a cyclic path consisting of vertices of dot reach. If
1615	// one exists, than the NFA cannot die after this top is triggered.
1616
1617	auto colour_map = make_small_color_map(h);
1618
1619	struct CycleFound {};
1620	struct CannotDieVisitor : public boost::default_dfs_visitor {
1621	void back_edge(const NFAEdge &e, const NGHolder &g) const {
1622	DEBUG_PRINTF("back-edge %zu,%zu\n", g[source(e, g)].index,
1623	g[target(e, g)].index);
1624	if (g [target(e, g)].char_reach.all()) {
1625	assert(g[source(e, g)].char_reach.all());
1626	throw CycleFound();
1627	}
1628	}
1629	};
1630
1631	try {
1632	for (const auto &top : tops) {
1633	DEBUG_PRINTF("checking top vertex %zu\n", h[top].index);
1634
1635	// Constrain the search to the top vertices and any dot vertices it
1636	// can reach.
1637	auto term_func = [&](NFAVertex v, const NGHolder &g) {
1638	if (v == top) {
1639	return false;
1640	}
1641	if (!g [v].char_reach.all()) {
1642	return true;
1643	}
1644	if (contains(args.br_cyclic, v) &&
1645	args.br_cyclic.at(v).repeatMax != depth::infinity()) {
1646	// Bounded repeat vertices without inf max can be turned
1647	// off.
1648	return true;
1649	}
1650	return false;
1651	};
1652
1653	boost::depth_first_visit(h, top, CannotDieVisitor(), colour_map,
1654	term_func);
1655	}
1656	} catch (const CycleFound &) {
1657	DEBUG_PRINTF("cycle found\n");
1658	return true;
1659	}
1660
1661	return false;
1662	}
1663
1664	/* \brief True if this NFA cannot ever be in no states at all. /
1665	static
1666	bool cannotDie(const build_info &args) {
1667	const auto &h = args.h;
1668	const auto &state_ids = args.state_ids;
1669
1670	// If we have a startDs we're actually using, we can't die.
1671	if (state_ids.at(h.startDs) != NO_STATE) {
1672	DEBUG_PRINTF("is using startDs\n");
1673	return true;
1674	}
1675
1676	return all_of_in(args.tops \| map_values, [&](const set<NFAVertex> &verts) {
1677	return cannotDie(args, verts);
1678	});
1679	}
1680
1681	template<NFAEngineType dtype>
1682	struct Factory {
1683	// typedefs for readability, for types derived from traits
1684	typedef typename NFATraits<dtype>::exception_t exception_t;
1685	typedef typename NFATraits<dtype>::implNFA_t implNFA_t;
1686	typedef typename NFATraits<dtype>::tableRow_t tableRow_t;
1687
1688	static
1689	void allocState(NFA *nfa, u32 repeatscratchStateSize,
1690	u32 repeatStreamState) {
1691	implNFA_t limex = (implNFA_t )getMutableImplNfa(nfa);
1692
1693	// LimEx NFAs now store the following in state:
1694	// 1. state bitvector (always present)
1695	// 2. space associated with repeats
1696	// This function just needs to size these correctly.
1697
1698	u32 stateSize = limex->stateSize;
1699
1700	DEBUG_PRINTF("bitvector=%zu/%u, repeat full=%u, stream=%u\n",
1701	sizeof(limex->init), stateSize, repeatscratchStateSize,
1702	repeatStreamState);
1703
1704	size_t scratchStateSize = NFATraits<dtype>::scratch_state_size;
1705
1706	if (repeatscratchStateSize) {
1707	scratchStateSize
1708	= ROUNDUP_N(scratchStateSize, alignof(RepeatControl));
1709	scratchStateSize += repeatscratchStateSize;
1710	}
1711	size_t streamStateSize = stateSize + repeatStreamState;
1712
1713	nfa->scratchStateSize = verify_u32(scratchStateSize);
1714	nfa->streamStateSize = verify_u32(streamStateSize);
1715	}
1716
1717	static
1718	size_t repeatAllocSize(const BoundedRepeatData &br, u32 *tableOffset,
1719	u32 *tugMaskOffset) {
1720	size_t len = sizeof(NFARepeatInfo) + sizeof(RepeatInfo);
1721
1722	// sparse lookup table.
1723	if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
1724	len = ROUNDUP_N(len, alignof(u64a));
1725	*tableOffset = verify_u32(len);
1726	len += sizeof(u64a) * (br.repeatMax + `1`);
1727	} else {
1728	*tableOffset = `0`;
1729	}
1730
1731	// tug mask.
1732	len = ROUNDUP_N(len, alignof(tableRow_t));
1733	*tugMaskOffset = verify_u32(len);
1734	len += sizeof(tableRow_t);
1735
1736	// to simplify layout.
1737	len = ROUNDUP_CL(len);
1738
1739	return len;
1740	}
1741
1742	static
1743	void buildRepeats(const build_info &args,
1744	vector<bytecode_ptr<NFARepeatInfo>> &out,
1745	u32 scratchStateSize, u32 streamState) {
1746	out.reserve(args.repeats.size());
1747
1748	u32 repeat_idx = `0`;
1749	for (auto it = args.repeats.begin(), ite = args.repeats.end();
1750	it != ite; ++it, ++repeat_idx) {
1751	const BoundedRepeatData &br = *it;
1752	assert(args.state_ids.at(br.cyclic) != NO_STATE);
1753
1754	u32 tableOffset, tugMaskOffset;
1755	size_t len = repeatAllocSize(br, &tableOffset, &tugMaskOffset);
1756	auto info = make_zeroed_bytecode_ptr<NFARepeatInfo>(len);
1757	char info_ptr = (char* *)info.get();
1758
1759	// Collect state space info.
1760	RepeatStateInfo rsi(br.type, br.repeatMin, br.repeatMax, br.minPeriod);
1761	u32 streamStateLen = rsi.packedCtrlSize + rsi.stateSize;
1762
1763	// Fill the NFARepeatInfo structure.
1764	info ->cyclicState = args.state_ids.at(br.cyclic);
1765	info ->ctrlIndex = repeat_idx;
1766	info ->packedCtrlOffset = *streamState;
1767	info ->stateOffset = *streamState + rsi.packedCtrlSize;
1768	info ->stateSize = streamStateLen;
1769	info ->tugMaskOffset = tugMaskOffset;
1770
1771	// Fill the RepeatInfo structure.
1772	RepeatInfo *repeat =
1773	(RepeatInfo )(info_ptr + sizeof*(NFARepeatInfo));
1774	repeat->type = br.type;
1775	repeat->repeatMin = depth_to_u32(br.repeatMin);
1776	repeat->repeatMax = depth_to_u32(br.repeatMax);
1777	repeat->horizon = rsi.horizon;
1778	repeat->packedCtrlSize = rsi.packedCtrlSize;
1779	repeat->stateSize = rsi.stateSize;
1780	copy_bytes(repeat->packedFieldSizes, rsi.packedFieldSizes);
1781	repeat->patchCount = rsi.patchCount;
1782	repeat->patchSize = rsi.patchSize;
1783	repeat->encodingSize = rsi.encodingSize;
1784	repeat->patchesOffset = rsi.patchesOffset;
1785
1786	u32 repeat_len = sizeof(RepeatInfo);
1787	if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
1788	repeat_len += sizeof(u64a) * (rsi.patchSize + `1`);
1789	}
1790	repeat->length = repeat_len;
1791
1792	// Copy in the sparse lookup table.
1793	if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
1794	assert(!rsi.table.empty());
1795	copy_bytes(info_ptr + tableOffset, rsi.table);
1796	}
1797
1798	// Fill the tug mask.
1799	tableRow_t tugMask = (tableRow_t )(info_ptr + tugMaskOffset);
1800	for (auto v : br.tug_triggers) {
1801	u32 state_id = args.state_ids.at(v);
1802	assert(state_id != NO_STATE);
1803	maskSetBit(*tugMask, state_id);
1804	}
1805
1806	assert(streamStateLen);
1807	*streamState += streamStateLen;
1808	scratchStateSize += sizeof*(RepeatControl);
1809
1810	out.emplace_back(move(info));
1811	}
1812	}
1813
1814	static
1815	void writeLimexMasks(const build_info &args, implNFA_t *limex) {
1816	const NGHolder &h = args.h;
1817
1818	// Init masks.
1819	u32 s_i = args.state_ids.at(h.start);
1820	u32 sds_i = args.state_ids.at(h.startDs);
1821
1822	if (s_i != NO_STATE) {
1823	maskSetBit(limex->init, s_i);
1824	if (is_triggered(h)) {
1825	maskSetBit(limex->initDS, s_i);
1826	}
1827	}
1828
1829	if (sds_i != NO_STATE) {
1830	maskSetBit(limex->init, sds_i);
1831	maskSetBit(limex->initDS, sds_i);
1832	}
1833
1834	// Zombie mask.
1835	for (auto v : args.zombies) {
1836	u32 state_id = args.state_ids.at(v);
1837	assert(state_id != NO_STATE);
1838	maskSetBit(limex->zombieMask, state_id);
1839	}
1840
1841	// Repeat cyclic mask.
1842	for (const auto &br : args.repeats) {
1843	u32 cyclic = args.state_ids.at(br.cyclic);
1844	assert(cyclic != NO_STATE);
1845	maskSetBit(limex->repeatCyclicMask, cyclic);
1846	}
1847	/ also include tugs in repeat cyclic mask /
1848	for (size_t i = args.tugs.find_first(); i != args.tugs.npos;
1849	i = args.tugs.find_next(i)) {
1850	maskSetBit(limex->repeatCyclicMask, i);
1851	}
1852	}
1853
1854	static
1855	void writeShiftMasks(const build_info &args, implNFA_t *limex) {
1856	const NGHolder &h = args.h;
1857	u32 maxShift = findMaxVarShift(args, limex->shiftCount);
1858	u32 shiftMask = `0`;
1859	int shiftMaskIdx = `0`;
1860
1861	for (const auto &e : edges_range(h)) {
1862	u32 from = args.state_ids.at(source(e, h));
1863	u32 to = args.state_ids.at(target(e, h));
1864	if (from == NO_STATE \|\| to == NO_STATE) {
1865	continue;
1866	}
1867
1868	// We check for exceptional transitions here, as we don't want tug
1869	// trigger transitions emitted as limited transitions (even if they
1870	// could be in this model).
1871	if (!isExceptionalTransition(from, to, args, maxShift)) {
1872	u32 shift = to - from;
1873	if ((shiftMask & (`1UL` << shift)) == `0UL`) {
1874	shiftMask \|= (`1UL` << shift);
1875	limex->shiftAmount[shiftMaskIdx++] = (u8)shift;
1876	}
1877	assert(limex->shiftCount <= MAX_SHIFT_COUNT);
1878	for (u32 i = `0`; i < limex->shiftCount; i++) {
1879	if (limex->shiftAmount[i] == (u8)shift) {
1880	maskSetBit(limex->shift[i], from);
1881	break;
1882	}
1883	}
1884	}
1885	}
1886	if (maxShift && limex->shiftCount > `1`) {
1887	for (u32 i = `0`; i < limex->shiftCount; i++) {
1888	assert(!isMaskZero(limex->shift[i]));
1889	}
1890	}
1891	}
1892
1893	static
1894	void findExceptionalTransitions(const build_info &args,
1895	unordered_set<NFAEdge> &exceptional,
1896	u32 maxShift) {
1897	const NGHolder &h = args.h;
1898
1899	for (const auto &e : edges_range(h)) {
1900	u32 from = args.state_ids.at(source(e, h));
1901	u32 to = args.state_ids.at(target(e, h));
1902	if (from == NO_STATE \|\| to == NO_STATE) {
1903	continue;
1904	}
1905
1906	if (isExceptionalTransition(from, to, args, maxShift)) {
1907	exceptional.insert(e);
1908	}
1909	}
1910	}
1911
1912	static
1913	void writeExceptions(const map<ExceptionProto, vector<u32>> &exceptionMap,
1914	const vector<u32> &repeatOffsets, implNFA_t *limex,
1915	const u32 exceptionsOffset,
1916	const u32 reportListOffset) {
1917	DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset);
1918
1919	exception_t etable = (exception_t )((char *)limex + exceptionsOffset);
1920	assert(ISALIGNED(etable));
1921
1922	map<u32, ExceptionProto> exception_by_state;
1923	for (const auto &m : exceptionMap) {
1924	const ExceptionProto &proto = m.first;
1925	const vector<u32> &states = m.second;
1926	for (u32 i : states) {
1927	assert(!contains(exception_by_state, i));
1928	exception_by_state.emplace(i, proto);
1929	}
1930	}
1931
1932	u32 ecount = `0`;
1933	for (const auto &m : exception_by_state) {
1934	const ExceptionProto &proto = m.second;
1935	u32 state_id = m.first;
1936	DEBUG_PRINTF("exception %u, triggered by state %u\n", ecount,
1937	state_id);
1938
1939	// Write the exception entry.
1940	exception_t &e = etable[ecount];
1941	maskSetBits(e.squash, proto.squash_states);
1942	maskSetBits(e.successors, proto.succ_states);
1943	if (proto.reports_index == MO_INVALID_IDX) {
1944	e.reports = MO_INVALID_IDX;
1945	} else {
1946	e.reports = reportListOffset +
1947	proto.reports_index * sizeof(ReportID);
1948	}
1949	e.hasSquash = verify_u8(proto.squash);
1950	e.trigger = verify_u8(proto.trigger);
1951	u32 repeat_offset = proto.repeat_index == MO_INVALID_IDX
1952	? MO_INVALID_IDX
1953	: repeatOffsets [proto.repeat_index];
1954	e.repeatOffset = repeat_offset;
1955
1956	// for the state that can switch it on
1957	// set this bit in the exception mask
1958	maskSetBit(limex->exceptionMask, state_id);
1959
1960	ecount++;
1961	}
1962
1963	limex->exceptionOffset = exceptionsOffset;
1964	limex->exceptionCount = ecount;
1965	}
1966
1967	static
1968	void writeReachMapping(const vector<NFAStateSet> &reach,
1969	const vector<u8> &reachMap, implNFA_t *limex,
1970	const u32 reachOffset) {
1971	DEBUG_PRINTF("reachOffset=%u\n", reachOffset);
1972
1973	// Reach mapping is inside the LimEx structure.
1974	copy(reachMap.begin(), reachMap.end(), &limex->reachMap[`0`]);
1975
1976	// Reach table is right after the LimEx structure.
1977	tableRow_t reachMask = (tableRow_t )((char *)limex + reachOffset);
1978	assert(ISALIGNED(reachMask));
1979	for (size_t i = `0`, end = reach.size(); i < end; i++) {
1980	maskSetBits(reachMask[i], reach [i]);
1981	}
1982	limex->reachSize = verify_u32(reach.size());
1983	}
1984
1985	static
1986	void writeTopMasks(const vector<NFAStateSet> &tops, implNFA_t *limex,
1987	const u32 topsOffset) {
1988	DEBUG_PRINTF("topsOffset=%u\n", topsOffset);
1989
1990	limex->topOffset = topsOffset;
1991	tableRow_t topMasks = (tableRow_t )((char *)limex + topsOffset);
1992	assert(ISALIGNED(topMasks));
1993
1994	for (size_t i = `0`, end = tops.size(); i < end; i++) {
1995	maskSetBits(topMasks[i], tops [i]);
1996	}
1997
1998	limex->topCount = verify_u32(tops.size());
1999	}
2000
2001	static
2002	void writeAccelSsse3Masks(const NFAStateSet &accelMask, implNFA_t *limex) {
2003	char perm_base = (char* *)&limex->accelPermute;
2004	char comp_base = (char* *)&limex->accelCompare;
2005
2006	u32 num = `0`; // index in accel table.
2007	for (size_t i = accelMask.find_first(); i != accelMask.npos;
2008	i = accelMask.find_next(i), ++num) {
2009	u32 state_id = verify_u32(i);
2010	DEBUG_PRINTF("accel num=%u, state=%u\n", num, state_id);
2011
2012	// PSHUFB permute and compare masks
2013	size_t mask_idx = sizeof(u_128) * (state_id / `128U`);
2014	DEBUG_PRINTF("mask_idx=%zu\n", mask_idx);
2015	u_128 perm = (u_128 )(perm_base + mask_idx);
2016	u_128 comp = (u_128 )(comp_base + mask_idx);
2017	maskSetByte(*perm, num, ((state_id % `128U`) / `8U`));
2018	maskSetByte(*comp, num, ~(`1U` << (state_id % `8U`)));
2019	}
2020	}
2021
2022	static
2023	void writeAccel(const NFAStateSet &accelMask,
2024	const NFAStateSet &accelFriendsMask,
2025	const AccelAuxVector &accelAux,
2026	const vector<u8> &accelTable, implNFA_t *limex,
2027	const u32 accelTableOffset, const u32 accelAuxOffset) {
2028	DEBUG_PRINTF("accelTableOffset=%u, accelAuxOffset=%u\n",
2029	accelTableOffset, accelAuxOffset);
2030
2031	// Write accel lookup table.
2032	limex->accelTableOffset = accelTableOffset;
2033	copy(accelTable.begin(), accelTable.end(),
2034	(u8 )((char* *)limex + accelTableOffset));
2035
2036	// Write accel aux structures.
2037	limex->accelAuxOffset = accelAuxOffset;
2038	AccelAux auxTable = (AccelAux )((char *)limex + accelAuxOffset);
2039	assert(ISALIGNED(auxTable));
2040	copy(accelAux.begin(), accelAux.end(), auxTable);
2041
2042	// Write LimEx structure members.
2043	limex->accelCount = verify_u32(accelTable.size());
2044	// FIXME: accelAuxCount is unused?
2045	limex->accelAuxCount = verify_u32(accelAux.size());
2046
2047	// Write LimEx masks.
2048	maskSetBits(limex->accel, accelMask);
2049	maskSetBits(limex->accel_and_friends, accelFriendsMask);
2050
2051	// We can use PSHUFB-based shuffles for models >= 128 states. These
2052	// require some additional masks in the bytecode.
2053	maskClear(limex->accelCompare);
2054	maskFill(limex->accelPermute, (char)`0x80`);
2055	if (NFATraits<dtype>::maxStates >= `128`) {
2056	writeAccelSsse3Masks(accelMask, limex);
2057	}
2058	}
2059
2060	static
2061	void writeAccepts(const NFAStateSet &acceptMask,
2062	const NFAStateSet &acceptEodMask,
2063	const vector<NFAAccept> &accepts,
2064	const vector<NFAAccept> &acceptsEod,
2065	const vector<NFAStateSet> &squash, implNFA_t *limex,
2066	const u32 acceptsOffset, const u32 acceptsEodOffset,
2067	const u32 squashOffset, const u32 reportListOffset) {
2068	char limex_base = (char* *)limex;
2069
2070	DEBUG_PRINTF("acceptsOffset=%u, acceptsEodOffset=%u, squashOffset=%u\n",
2071	acceptsOffset, acceptsEodOffset, squashOffset);
2072
2073	// LimEx masks (in structure)
2074	maskSetBits(limex->accept, acceptMask);
2075	maskSetBits(limex->acceptAtEOD, acceptEodMask);
2076
2077	// Transforms the indices (report list, squash mask) into offsets
2078	// relative to the base of the limex.
2079	auto transform_offset_fn = [&](NFAAccept a) {
2080	if (!a.single_report) {
2081	a.reports = reportListOffset + a.reports * sizeof(ReportID);
2082	}
2083	a.squash = squashOffset + a.squash * sizeof(tableRow_t);
2084	return a;
2085	};
2086
2087	// Write accept table.
2088	limex->acceptOffset = acceptsOffset;
2089	limex->acceptCount = verify_u32(accepts.size());
2090	DEBUG_PRINTF("NFA has %zu accepts\n", accepts.size());
2091	NFAAccept acceptsTable = (NFAAccept )(limex_base + acceptsOffset);
2092	assert(ISALIGNED(acceptsTable));
2093	transform(accepts.begin(), accepts.end(), acceptsTable,
2094	transform_offset_fn);
2095
2096	// Write eod accept table.
2097	limex->acceptEodOffset = acceptsEodOffset;
2098	limex->acceptEodCount = verify_u32(acceptsEod.size());
2099	DEBUG_PRINTF("NFA has %zu EOD accepts\n", acceptsEod.size());
2100	NFAAccept acceptsEodTable = (NFAAccept )(limex_base + acceptsEodOffset);
2101	assert(ISALIGNED(acceptsEodTable));
2102	transform(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable,
2103	transform_offset_fn);
2104
2105	// Write squash mask table.
2106	limex->squashCount = verify_u32(squash.size());
2107	limex->squashOffset = squashOffset;
2108	DEBUG_PRINTF("NFA has %zu report squash masks\n", squash.size());
2109	tableRow_t mask = (tableRow_t )(limex_base + squashOffset);
2110	assert(ISALIGNED(mask));
2111	for (size_t i = `0`, end = squash.size(); i < end; i++) {
2112	maskSetBits(mask[i], squash [i]);
2113	}
2114	}
2115
2116	static
2117	void writeRepeats(const vector<bytecode_ptr<NFARepeatInfo>> &repeats,
2118	vector<u32> &repeatOffsets, implNFA_t *limex,
2119	const u32 repeatOffsetsOffset, const u32 repeatOffset) {
2120	const u32 num_repeats = verify_u32(repeats.size());
2121
2122	DEBUG_PRINTF("repeatOffsetsOffset=%u, repeatOffset=%u\n",
2123	repeatOffsetsOffset, repeatOffset);
2124
2125	repeatOffsets.resize(num_repeats);
2126	u32 offset = repeatOffset;
2127
2128	for (u32 i = `0`; i < num_repeats; i++) {
2129	repeatOffsets [i] = offset;
2130	assert(repeats[i]);
2131	memcpy((char *)limex + offset, repeats [i].get(), repeats [i].size());
2132	offset += repeats [i].size();
2133	}
2134
2135	// Write repeat offset lookup table.
2136	assert(ISALIGNED_N((char )limex + repeatOffsetsOffset, alignof*(u32)));
2137	copy_bytes((char *)limex + repeatOffsetsOffset, repeatOffsets);
2138
2139	limex->repeatOffset = repeatOffsetsOffset;
2140	limex->repeatCount = num_repeats;
2141	}
2142
2143	static
2144	void writeReportList(const vector<ReportID> &reports, implNFA_t *limex,
2145	const u32 reportListOffset) {
2146	DEBUG_PRINTF("reportListOffset=%u\n", reportListOffset);
2147	assert(ISALIGNED_N((char *)limex + reportListOffset,
2148	alignof(ReportID)));
2149	copy_bytes((char *)limex + reportListOffset, reports);
2150	}
2151
2152	static
2153	bytecode_ptr<NFA> generateNfa(const build_info &args) {
2154	if (args.num_states > NFATraits<dtype>::maxStates) {
2155	return nullptr;
2156	}
2157
2158	// Build bounded repeat structures.
2159	vector<bytecode_ptr<NFARepeatInfo>> repeats;
2160	u32 repeats_full_state = `0`;
2161	u32 repeats_stream_state = `0`;
2162	buildRepeats(args, repeats, &repeats_full_state, &repeats_stream_state);
2163	size_t repeatSize = `0`;
2164	for (size_t i = `0`; i < repeats.size(); i++) {
2165	repeatSize += repeats [i].size();
2166	}
2167
2168	// We track report lists that have already been written into the global
2169	// list in case we can reuse them.
2170	ReportListCache reports_cache;
2171
2172	unordered_set<NFAEdge> exceptional;
2173	u32 shiftCount = findBestNumOfVarShifts(args);
2174	assert(shiftCount);
2175	u32 maxShift = findMaxVarShift(args, shiftCount);
2176	findExceptionalTransitions(args, exceptional, maxShift);
2177
2178	map<ExceptionProto, vector<u32>> exceptionMap;
2179	vector<ReportID> reportList;
2180
2181	u32 exceptionCount = buildExceptionMap(args, reports_cache, exceptional,
2182	exceptionMap, reportList);
2183
2184	assert(exceptionCount <= args.num_states);
2185
2186	// Build reach table and character mapping.
2187	vector<NFAStateSet> reach;
2188	vector<u8> reachMap;
2189	buildReachMapping(args, reach, reachMap);
2190
2191	// Build top masks.
2192	vector<NFAStateSet> tops;
2193	buildTopMasks(args, tops);
2194
2195	// Build all our accept info.
2196	NFAStateSet acceptMask, acceptEodMask;
2197	vector<NFAAccept> accepts, acceptsEod;
2198	vector<NFAStateSet> squash;
2199	buildAccepts(args, reports_cache, acceptMask, acceptEodMask, accepts,
2200	acceptsEod, reportList, squash);
2201
2202	// Build all our accel info.
2203	NFAStateSet accelMask, accelFriendsMask;
2204	AccelAuxVector accelAux;
2205	vector<u8> accelTable;
2206	buildAccel(args, accelMask, accelFriendsMask, accelAux, accelTable);
2207
2208	// Compute the offsets in the bytecode for this LimEx NFA for all of
2209	// our structures. First, the NFA and LimEx structures. All other
2210	// offsets are relative to the start of the LimEx struct, starting with
2211	// the reach table.
2212	u32 offset = sizeof(implNFA_t);
2213
2214	const u32 reachOffset = offset;
2215	offset += sizeof(tableRow_t) * reach.size();
2216
2217	const u32 topsOffset = offset;
2218	offset += sizeof(tableRow_t) * tops.size();
2219
2220	const u32 accelTableOffset = offset;
2221	offset += sizeof(u8) * accelTable.size();
2222
2223	offset = ROUNDUP_N(offset, alignof(AccelAux));
2224	const u32 accelAuxOffset = offset;
2225	offset += sizeof(AccelAux) * accelAux.size();
2226
2227	offset = ROUNDUP_N(offset, alignof(NFAAccept));
2228	const u32 acceptsOffset = offset;
2229	offset += sizeof(NFAAccept) * accepts.size();
2230	const u32 acceptsEodOffset = offset;
2231	offset += sizeof(NFAAccept) * acceptsEod.size();
2232
2233	offset = ROUNDUP_CL(offset);
2234	const u32 squashOffset = offset;
2235	offset += sizeof(tableRow_t) * squash.size();
2236
2237	offset = ROUNDUP_CL(offset);
2238	const u32 exceptionsOffset = offset;
2239	offset += sizeof(exception_t) * exceptionCount;
2240
2241	const u32 reportListOffset = offset;
2242	offset += sizeof(ReportID) * reportList.size();
2243
2244	const u32 repeatOffsetsOffset = offset;
2245	offset += sizeof(u32) * args.repeats.size();
2246
2247	offset = ROUNDUP_CL(offset);
2248	const u32 repeatsOffset = offset;
2249	offset += repeatSize;
2250
2251	// Now we can allocate space for the NFA and get to work on layout.
2252
2253	size_t nfaSize = sizeof(NFA) + offset;
2254	DEBUG_PRINTF("nfa size %zu\n", nfaSize);
2255	auto nfa = make_zeroed_bytecode_ptr<NFA>(nfaSize);
2256	assert(nfa); // otherwise we would have thrown std::bad_alloc
2257
2258	implNFA_t limex = (implNFA_t )getMutableImplNfa(nfa.get());
2259	assert(ISALIGNED(limex));
2260
2261	writeReachMapping(reach, reachMap, limex, reachOffset);
2262
2263	writeTopMasks(tops, limex, topsOffset);
2264
2265	writeAccel(accelMask, accelFriendsMask, accelAux, accelTable,
2266	limex, accelTableOffset, accelAuxOffset);
2267
2268	writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash,
2269	limex, acceptsOffset, acceptsEodOffset, squashOffset,
2270	reportListOffset);
2271
2272	limex->shiftCount = shiftCount;
2273	writeShiftMasks(args, limex);
2274
2275	if (cannotDie(args)) {
2276	DEBUG_PRINTF("nfa cannot die\n");
2277	setLimexFlag(limex, LIMEX_FLAG_CANNOT_DIE);
2278	}
2279
2280	// Determine the state required for our state vector.
2281	findStateSize(args, limex);
2282
2283	writeReportList(reportList, limex, reportListOffset);
2284
2285	// Repeat structures and offset table.
2286	vector<u32> repeatOffsets;
2287	writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset,
2288	repeatsOffset);
2289
2290	writeExceptions(exceptionMap, repeatOffsets, limex, exceptionsOffset,
2291	reportListOffset);
2292
2293	writeLimexMasks(args, limex);
2294
2295	allocState(nfa.get(), repeats_full_state, repeats_stream_state);
2296
2297	nfa ->type = dtype;
2298	nfa ->length = verify_u32(nfaSize);
2299	nfa ->nPositions = args.num_states;
2300
2301	if (!args.zombies.empty()) {
2302	setNfaFlag(nfa.get(), NFA_ZOMBIE);
2303	}
2304	if (!acceptsEod.empty()) {
2305	setNfaFlag(nfa.get(), NFA_ACCEPTS_EOD);
2306	}
2307
2308	return nfa;
2309	}
2310
2311	static int score(const build_info &args) {
2312	// LimEx NFAs are available in sizes from 32 to 512-bit.
2313	size_t num_states = args.num_states;
2314
2315	size_t sz = findContainerSize(num_states);
2316	if (sz < `32`) {
2317	sz = `32`;
2318	}
2319
2320	if (args.cc.grey.nfaForceSize) {
2321	sz = args.cc.grey.nfaForceSize;
2322	}
2323
2324	if (sz != NFATraits<dtype>::maxStates) {
2325	return -`1`; // fail, size not appropriate
2326	}
2327
2328	// We are of the right size, calculate a score based on the number
2329	// of exceptions and the number of shifts used by this LimEx.
2330	int score;
2331	u32 shiftCount = findBestNumOfVarShifts(args, &score);
2332	if (shiftCount == `0`) {
2333	return -`1`;
2334	}
2335	return score;
2336	}
2337	};
2338
2339	template<NFAEngineType dtype>
2340	struct generateNfa {
2341	static bytecode_ptr<NFA> call(const build_info &args) {
2342	return Factory<dtype>::generateNfa(args);
2343	}
2344	};
2345
2346	template<NFAEngineType dtype>
2347	struct scoreNfa {
2348	static int call(const build_info &args) {
2349	return Factory<dtype>::score(args);
2350	}
2351	};
2352
2353	#define MAKE_LIMEX_TRAITS(mlt_size) \
2354	template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \
2355	typedef LimExNFA##mlt_size implNFA_t; \
2356	typedef u_##mlt_size tableRow_t; \
2357	typedef NFAException##mlt_size exception_t; \
2358	static const size_t maxStates = mlt_size; \
2359	static const size_t scratch_state_size = mlt_size == 64 ? sizeof(m128) \
2360	: sizeof(tableRow_t); \
2361	};
2362
2363	MAKE_LIMEX_TRAITS(`32`)
2364	MAKE_LIMEX_TRAITS(`64`)
2365	MAKE_LIMEX_TRAITS(`128`)
2366	MAKE_LIMEX_TRAITS(`256`)
2367	MAKE_LIMEX_TRAITS(`384`)
2368	MAKE_LIMEX_TRAITS(`512`)
2369
2370	} // namespace
2371
2372	#ifndef NDEBUG
2373	// Some sanity tests, called by an assertion in generate().
2374	static UNUSED
2375	bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops,
2376	const unordered_map<NFAVertex, u32> &state_ids,
2377	u32 num_states) {
2378	unordered_set<u32> seen;
2379	unordered_set<NFAVertex> top_starts;
2380	for (const auto &vv : tops \| map_values) {
2381	insert(&top_starts, vv);
2382	}
2383
2384	for (auto v : vertices_range(h)) {
2385	if (!contains(state_ids, v)) {
2386	DEBUG_PRINTF("no entry for vertex %zu in state map\n", h[v].index);
2387	return false;
2388	}
2389	const u32 i = state_ids.at(v);
2390	if (i == NO_STATE) {
2391	continue;
2392	}
2393
2394	DEBUG_PRINTF("checking vertex %zu (state %u)\n", h[v].index, i);
2395
2396	if (i >= num_states \|\| contains(seen, i)) {
2397	DEBUG_PRINTF("vertex %u/%u has invalid state\n", i, num_states);
2398	return false;
2399	}
2400	seen.insert(i);
2401
2402	// All our states should be reachable and have a state assigned.
2403	if (h[v].char_reach.none()) {
2404	DEBUG_PRINTF("vertex %zu has empty reachability\n", h[v].index);
2405	return false;
2406	}
2407
2408	// Every state that isn't a start state (or top, in triggered NFAs)
2409	// must have at least one predecessor that is not itself.
2410	if (v != h.start && v != h.startDs && !contains(top_starts, v)
2411	&& !proper_in_degree(v, h)) {
2412	DEBUG_PRINTF("vertex %zu has no pred\n", h[v].index);
2413	return false;
2414	}
2415	}
2416
2417	if (seen.size() != num_states) {
2418	return false;
2419	}
2420
2421	return true;
2422	}
2423	#endif // NDEBUG
2424
2425	static
2426	u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) {
2427	u32 rv = `0`;
2428	for (const auto &m : state_ids) {
2429	DEBUG_PRINTF("state %u\n", m.second);
2430	if (m.second != NO_STATE) {
2431	rv = max(m.second, rv);
2432	}
2433	}
2434	DEBUG_PRINTF("max %u\n", rv);
2435	return rv;
2436	}
2437
2438	bytecode_ptr<NFA> generate(NGHolder &h,
2439	const unordered_map<NFAVertex, u32> &states,
2440	const vector<BoundedRepeatData> &repeats,
2441	const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
2442	const unordered_map<NFAVertex, NFAStateSet> &squashMap,
2443	const map<u32, set<NFAVertex>> &tops,
2444	const set<NFAVertex> &zombies, bool do_accel,
2445	bool stateCompression, u32 hint,
2446	const CompileContext &cc) {
2447	const u32 num_states = max_state(states) + `1`;
2448	DEBUG_PRINTF("total states: %u\n", num_states);
2449
2450	if (!cc.grey.allowLimExNFA) {
2451	DEBUG_PRINTF("limex not allowed\n");
2452	return nullptr;
2453	}
2454
2455	// If you ask for a particular type, it had better be an NFA.
2456	assert(hint == INVALID_NFA \|\| hint <= LAST_LIMEX_NFA);
2457	DEBUG_PRINTF("hint=%u\n", hint);
2458
2459	// Sanity check the input data.
2460	assert(isSane(h, tops, states, num_states));
2461
2462	// Build arguments used in the rest of this file.
2463	build_info arg(h, states, repeats, reportSquashMap, squashMap, tops,
2464	zombies, do_accel, stateCompression, cc, num_states);
2465
2466	// Acceleration analysis.
2467	fillAccelInfo(arg);
2468
2469	vector<pair<int, NFAEngineType>> scores;
2470
2471	if (hint != INVALID_NFA) {
2472	// The caller has told us what to (attempt to) build.
2473	scores.emplace_back(`0`, (NFAEngineType)hint);
2474	} else {
2475	for (size_t i = `0`; i <= LAST_LIMEX_NFA; i++) {
2476	NFAEngineType ntype = (NFAEngineType)i;
2477	int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg);
2478	if (score >= `0`) {
2479	DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score);
2480	scores.emplace_back(score, ntype);
2481	}
2482	}
2483	}
2484
2485	if (scores.empty()) {
2486	DEBUG_PRINTF("No NFA returned a valid score for this case.\n");
2487	return nullptr;
2488	}
2489
2490	// Sort acceptable models in priority order, lowest score first.
2491	sort(scores.begin(), scores.end());
2492
2493	for (const auto &elem : scores) {
2494	assert(elem.first >= `0`);
2495	NFAEngineType limex_model = elem.second;
2496	auto nfa = DISPATCH_BY_LIMEX_TYPE(limex_model, generateNfa, arg);
2497	if (nfa) {
2498	DEBUG_PRINTF("successful build with NFA engine: %s\n",
2499	nfa_type_name(limex_model));
2500	return nfa;
2501	}
2502	}
2503
2504	DEBUG_PRINTF("NFA build failed.\n");
2505	return nullptr;
2506	}
2507
2508	u32 countAccelStates(NGHolder &h,
2509	const unordered_map<NFAVertex, u32> &states,
2510	const vector<BoundedRepeatData> &repeats,
2511	const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
2512	const unordered_map<NFAVertex, NFAStateSet> &squashMap,
2513	const map<u32, set<NFAVertex>> &tops,
2514	const set<NFAVertex> &zombies,
2515	const CompileContext &cc) {
2516	const u32 num_states = max_state(states) + `1`;
2517	DEBUG_PRINTF("total states: %u\n", num_states);
2518
2519	if (!cc.grey.allowLimExNFA) {
2520	DEBUG_PRINTF("limex not allowed\n");
2521	return `0`;
2522	}
2523
2524	// Sanity check the input data.
2525	assert(isSane(h, tops, states, num_states));
2526
2527	const bool do_accel = true;
2528	const bool state_compression = false;
2529
2530	// Build arguments used in the rest of this file.
2531	build_info bi(h, states, repeats, reportSquashMap, squashMap, tops, zombies,
2532	do_accel, state_compression, cc, num_states);
2533
2534	// Acceleration analysis.
2535	nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map);
2536
2537	u32 num_accel = verify_u32(bi.accel.accel_map.size());
2538	DEBUG_PRINTF("found %u accel states\n", num_accel);
2539	return num_accel;
2540	}
2541
2542	} // namespace ue2
2543

Browse the source code of ClickHouse/contrib/hyperscan/src/nfa/limex_compile.cpp