mcsheng_compile.cpp source code [ClickHouse/contrib/hyperscan/src/nfa/mcsheng_compile.cpp]

1	/*
2	* Copyright (c) 2016-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	#include "mcsheng_compile.h"
30
31	#include "accel.h"
32	#include "accelcompile.h"
33	#include "grey.h"
34	#include "mcclellancompile.h"
35	#include "mcclellancompile_util.h"
36	#include "mcsheng_internal.h"
37	#include "nfa_internal.h"
38	#include "rdfa_graph.h"
39	#include "shufticompile.h"
40	#include "trufflecompile.h"
41	#include "ue2common.h"
42	#include "util/alloc.h"
43	#include "util/bitutils.h"
44	#include "util/charreach.h"
45	#include "util/compare.h"
46	#include "util/compile_context.h"
47	#include "util/container.h"
48	#include "util/flat_containers.h"
49	#include "util/graph.h"
50	#include "util/graph_range.h"
51	#include "util/make_unique.h"
52	#include "util/order_check.h"
53	#include "util/report_manager.h"
54	#include "util/unaligned.h"
55	#include "util/unordered.h"
56	#include "util/verify_types.h"
57
58	#include <algorithm>
59	#include <cstdio>
60	#include <cstdlib>
61	#include <cstring>
62	#include <map>
63	#include <memory>
64	#include <set>
65	#include <deque>
66	#include <vector>
67
68	#include <boost/range/adaptor/map.hpp>
69
70	using namespace std;
71	using boost::adaptors::map_keys;
72
73	namespace ue2 {
74
75	namespace / anon / {
76
77	#define MIN_SHENG_SIZE 6
78	#define INVALID_SHENG_ID 255
79
80	struct dstate_extra {
81	u16 daddytaken = `0`;
82	bool shermanState = false;
83	bool sheng_succ = false;
84	u8 sheng_id = INVALID_SHENG_ID;
85	};
86
87	struct dfa_info {
88	accel_dfa_build_strat &strat;
89	raw_dfa &raw;
90	vector<dstate> &states;
91	vector<dstate_extra> extra;
92	const u16 alpha_size; / including special symbols /
93	const array<u16, ALPHABET_SIZE> &alpha_remap;
94	vector<CharReach> rev_alpha;
95	const u16 impl_alpha_size;
96
97	u8 getAlphaShift() const;
98
99	explicit dfa_info(accel_dfa_build_strat &s)
100	: strat(s),
101	raw(s.get_raw()),
102	states(raw.states),
103	extra (raw.states.size()),
104	alpha_size(raw.alpha_size),
105	alpha_remap(raw.alpha_remap),
106	impl_alpha_size(raw.getImplAlphaSize()) {
107	rev_alpha.resize(impl_alpha_size);
108	for (u32 i = `0`; i < N_CHARS; i++) {
109	rev_alpha [alpha_remap [i]].set(i);
110	}
111	}
112
113	dstate_id_t implId(dstate_id_t raw_id) const {
114	return states [raw_id].impl_id;
115	}
116
117	bool is_sherman(dstate_id_t raw_id) const {
118	return extra [raw_id].shermanState;
119	}
120
121	bool is_sheng(dstate_id_t raw_id) const {
122	return extra [raw_id].sheng_id != INVALID_SHENG_ID;
123	}
124
125	bool is_sheng_succ(dstate_id_t raw_id) const {
126	return extra [raw_id].sheng_succ;
127	}
128
129	/ states which use the normal transition/successor table /
130	bool is_normal(dstate_id_t raw_id) const {
131	return raw_id != DEAD_STATE && !is_sheng(raw_id) && !is_sherman(raw_id);
132	}
133	size_t size(void) const { return states.size(); }
134	};
135
136	u8 dfa_info::getAlphaShift() const {
137	if (impl_alpha_size < `2`) {
138	return `1`;
139	} else {
140	/ log2 round up /
141	return `32` - clz32(impl_alpha_size - `1`);
142	}
143	}
144
145	} // namespace
146
147	static
148	mstate_aux getAux(NFA n, dstate_id_t i) {
149	mcsheng m = (mcsheng )getMutableImplNfa(n);
150	mstate_aux aux_base = (mstate_aux )((char *)n + m->aux_offset);
151
152	mstate_aux *aux = aux_base + i;
153	assert((const char )aux < (const* char *)n + m->length);
154	return aux;
155	}
156
157	static
158	void createShuffleMasks(mcsheng m, const* dfa_info &info,
159	dstate_id_t sheng_end,
160	const map<dstate_id_t, AccelScheme> &accel_escape_info) {
161	DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end);
162	assert(sheng_end > DEAD_STATE + `1`);
163	assert(sheng_end <= sizeof(m128) + `1`);
164	vector<array<u8, sizeof(m128)>> masks;
165	masks.resize(info.alpha_size);
166	/ -1 to avoid wasting a slot as we do not include dead state /
167	vector<dstate_id_t> raw_ids;
168	raw_ids.resize(sheng_end - `1`);
169	for (dstate_id_t s = DEAD_STATE + `1`; s < info.states.size(); s++) {
170	assert(info.implId(s)); / should not map to DEAD_STATE /
171	if (info.is_sheng(s)) {
172	raw_ids [info.extra [s].sheng_id] = s;
173	}
174	}
175	for (u32 i = `0`; i < info.alpha_size; i++) {
176	if (i == info.alpha_remap [TOP]) {
177	continue;
178	}
179	auto &mask = masks [i];
180	assert(sizeof(mask) == sizeof(m128));
181	mask.fill(`0`);
182
183	for (dstate_id_t sheng_id = `0`; sheng_id < sheng_end - `1`; sheng_id++) {
184	dstate_id_t raw_id = raw_ids [sheng_id];
185	dstate_id_t next_id = info.implId(info.states [raw_id].next [i]);
186	if (next_id == DEAD_STATE) {
187	next_id = sheng_end - `1`;
188	} else if (next_id < sheng_end) {
189	next_id--;
190	}
191	DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id);
192	mask [sheng_id] = verify_u8(next_id);
193	}
194	}
195	for (u32 i = `0`; i < N_CHARS; i++) {
196	assert(info.alpha_remap[i] != info.alpha_remap[TOP]);
197	memcpy((u8 *)&m->sheng_masks[i],
198	(u8 )masks [info.alpha_remap [i]].data(), sizeof*(m128));
199	}
200	m->sheng_end = sheng_end;
201	m->sheng_accel_limit = sheng_end - `1`;
202
203	for (dstate_id_t s : raw_ids) {
204	if (contains(accel_escape_info, s)) {
205	LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id);
206	}
207	}
208	}
209
210	static
211	void populateBasicInfo(size_t state_size, const dfa_info &info,
212	u32 total_size, u32 aux_offset, u32 accel_offset,
213	u32 accel_count, ReportID arb, bool single, NFA *nfa) {
214	assert(state_size == sizeof(u16) \|\| state_size == sizeof(u8));
215
216	nfa->length = total_size;
217	nfa->nPositions = info.states.size();
218
219	nfa->scratchStateSize = verify_u32(state_size);
220	nfa->streamStateSize = verify_u32(state_size);
221
222	if (state_size == sizeof(u8)) {
223	nfa->type = MCSHENG_NFA_8;
224	} else {
225	nfa->type = MCSHENG_NFA_16;
226	}
227
228	mcsheng m = (mcsheng )getMutableImplNfa(nfa);
229	for (u32 i = `0`; i < `256`; i++) {
230	m->remap[i] = verify_u8(info.alpha_remap [i]);
231	}
232	m->alphaShift = info.getAlphaShift();
233	m->length = total_size;
234	m->aux_offset = aux_offset;
235	m->accel_offset = accel_offset;
236	m->arb_report = arb;
237	m->state_count = verify_u16(info.size());
238	m->start_anchored = info.implId(info.raw.start_anchored);
239	m->start_floating = info.implId(info.raw.start_floating);
240	m->has_accel = accel_count ? `1` : `0`;
241
242	if (single) {
243	m->flags \|= MCSHENG_FLAG_SINGLE;
244	}
245	}
246
247	static
248	size_t calcShermanRegionSize(const dfa_info &info) {
249	size_t rv = `0`;
250
251	for (size_t i = `0`; i < info.size(); i++) {
252	if (info.is_sherman(i)) {
253	rv += SHERMAN_FIXED_SIZE;
254	}
255	}
256
257	return ROUNDUP_16(rv);
258	}
259
260	static
261	void fillInAux(mstate_aux aux, dstate_id_t i, const* dfa_info &info,
262	const vector<u32> &reports, const vector<u32> &reports_eod,
263	const vector<u32> &reportOffsets) {
264	const dstate &raw_state = info.states [i];
265	aux->accept = raw_state.reports.empty() ? `0` : reportOffsets [reports [i]];
266	aux->accept_eod = raw_state.reports_eod.empty() ? `0`
267	: reportOffsets [reports_eod [i]];
268	aux->top = info.implId(i ? raw_state.next [info.alpha_remap [TOP]]
269	: info.raw.start_floating);
270	}
271
272	/ returns false on error /
273	static
274	bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end,
275	dstate_id_t *sherman_base) {
276	info.states [`0`].impl_id = `0`; / dead is always 0 /
277
278	vector<dstate_id_t> norm;
279	vector<dstate_id_t> sherm;
280	vector<dstate_id_t> norm_sheng_succ;
281	vector<dstate_id_t> sherm_sheng_succ;
282
283	if (info.size() > (`1` << `16`)) {
284	DEBUG_PRINTF("too many states\n");
285	*sherman_base = `0`;
286	return false;
287	}
288
289	for (u32 i = `1`; i < info.size(); i++) {
290	if (info.is_sheng(i)) {
291	continue; / sheng impl ids have already been allocated /
292	} if (info.is_sherman(i)) {
293	if (info.is_sheng_succ(i)) {
294	sherm_sheng_succ.push_back(i);
295	} else {
296	sherm.push_back(i);
297	}
298	} else {
299	if (info.is_sheng_succ(i)) {
300	norm_sheng_succ.push_back(i);
301	} else {
302	norm.push_back(i);
303	}
304	}
305	}
306
307	dstate_id_t next_norm = sheng_end;
308	for (dstate_id_t s : norm_sheng_succ) {
309	info.states [s].impl_id = next_norm++;
310	}
311	if (next_norm + norm.size() + sherm_sheng_succ.size() > UINT8_MAX) {
312	/ we need to give sheng_succs ids which fit into a u8 -- demote these*
313	* to normal states */
314	for (dstate_id_t s : sherm_sheng_succ) {
315	info.states [s].impl_id = next_norm++;
316	info.extra [s].shermanState = false;
317	}
318	sherm_sheng_succ.clear();
319	}
320	for (dstate_id_t s : norm) {
321	info.states [s].impl_id = next_norm++;
322	}
323
324	*sherman_base = next_norm;
325	dstate_id_t next_sherman = next_norm;
326
327	for (dstate_id_t s : sherm_sheng_succ) {
328	info.states [s].impl_id = next_sherman++;
329	}
330
331	for (dstate_id_t s : sherm) {
332	info.states [s].impl_id = next_sherman++;
333	}
334
335	/ Check to see if we haven't over allocated our states /
336	DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman,
337	(dstate_id_t)(next_sherman & STATE_MASK));
338	return (next_sherman - `1`) == ((next_sherman - `1`) & STATE_MASK);
339	}
340
341	typedef RdfaGraph::vertex_descriptor RdfaVertex;
342
343	static
344	bool mark_sheng_succs(const RdfaGraph &g, dfa_info &info,
345	const flat_set<RdfaVertex> &sheng_states) {
346	u32 exit_count = `0`;
347
348	for (auto v : sheng_states) {
349	dstate_id_t s = g [v].index;
350	for (u32 i = `0`; i != info.alpha_size; i++) {
351	if (i == info.alpha_remap [TOP]) {
352	continue;
353	}
354	dstate_id_t next = info.states [s].next [i];
355	if (!next \|\| info.is_sheng(next) \|\| info.is_sheng_succ(next)) {
356	continue;
357	}
358	exit_count++;
359	info.extra [next].sheng_succ = true;
360	}
361	}
362
363	if (exit_count + sheng_states.size() < UINT8_MAX) {
364	return true;
365	} else {
366	DEBUG_PRINTF("fail: unable to fit %u exits in byte", exit_count);
367	return false;
368	}
369	}
370
371	static
372	CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) {
373	CharReach rv;
374	for (u32 i = `0`; i < info.impl_alpha_size; i++) {
375	if (info.raw.states [u].next [i] == v) {
376	assert(info.rev_alpha[i].any());
377	rv \|= info.rev_alpha [i];
378	}
379	}
380	assert(rv.any());
381	return rv;
382	}
383
384	#define MAX_SHENG_STATES 16
385	#define MAX_SHENG_LEAKINESS 0.05
386
387	using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>;
388
389	/**
390	* Returns the proportion of strings of length 'depth' which will leave the
391	* sheng region when starting at state 'u'.
392	*/
393	static
394	double leakiness(const RdfaGraph &g, dfa_info &info,
395	const flat_set<RdfaVertex> &sheng_states, RdfaVertex u,
396	u32 depth, LeakinessCache &cache) {
397	double rv = `0`;
398	if (contains(cache, make_pair(u, depth))) {
399	return cache [make_pair(u, depth)];
400	}
401	for (RdfaVertex v : adjacent_vertices_range(u, g)) {
402	if (g [v].index == DEAD_STATE) {
403	continue;
404	}
405	double width = get_edge_reach(g [u].index, g [v].index, info).count();
406	width /= N_CHARS;
407
408	double weight;
409	if (!contains(sheng_states, v)) {
410	weight = `1`;
411	} else if (depth > `1`) {
412	weight = leakiness(g, info, sheng_states, v, depth - `1`, cache);
413	} else {
414	continue; / weight = 0 /
415	}
416	rv += width * weight;
417	}
418
419	cache [make_pair(u, depth)] = rv;
420	DEBUG_PRINTF("%zu [%u] q = %g\n", g[u].index, depth, rv);
421	return rv;
422	}
423
424	/**
425	* Returns the proportion of 8 byte strings which will leave the sheng region
426	* when starting at state 'u'.
427	*/
428	static
429	double leakiness(const RdfaGraph &g, dfa_info &info,
430	const flat_set<RdfaVertex> &sheng_states, RdfaVertex u) {
431	LeakinessCache cache;
432	double rv = leakiness(g, info, sheng_states, u, `8`, cache);
433	return rv;
434	}
435
436	static
437	dstate_id_t find_sheng_states(dfa_info &info,
438	map<dstate_id_t, AccelScheme> &accel_escape_info) {
439	RdfaGraph g(info.raw);
440	auto cyclics = find_vertices_in_cycles(g);
441
442	auto base_cyclic = RdfaGraph::null_vertex();
443	for (const auto &v : cyclics) {
444	if (g [v].index == DEAD_STATE) {
445	continue;
446	}
447	DEBUG_PRINTF("considering cyclic %zu\n", g[v].index);
448	/ get an estimate of stickness of the cyclic: assume any edges from*
449	* states with larger state ids are back edges */
450	CharReach est_back_reach;
451	for (const auto &u : inv_adjacent_vertices_range(v, g)) {
452	if (g [u].index < g [v].index) {
453	continue;
454	}
455	est_back_reach \|= get_edge_reach(g [u].index, g [v].index, info);
456	}
457
458	if (est_back_reach.count() < `30`) {
459	continue;
460	}
461	base_cyclic = v;
462	break;
463	}
464	if (!base_cyclic) {
465	return DEAD_STATE;
466	}
467
468	flat_set<RdfaVertex> sheng_states;
469	deque<RdfaVertex> to_consider = { base_cyclic };
470	flat_set<dstate_id_t> considered = { DEAD_STATE };
471	bool seen_back_edge = false;
472	while (!to_consider.empty()
473	&& sheng_states.size() < MAX_SHENG_STATES) {
474	auto v = to_consider.front();
475	to_consider.pop_front();
476	if (!considered.insert(g [v].index).second) {
477	continue;
478	}
479
480	assert(!contains(sheng_states, v));
481
482	if (generates_callbacks(info.raw.kind)
483	&& !info.states [g [v].index].reports.empty()) {
484	/ cannot raise callbacks from sheng region /
485	continue;
486	}
487
488	sheng_states.insert(v);
489	for (const auto &t : adjacent_vertices_range(v, g)) {
490	if (!contains(considered, g [t].index)) {
491	to_consider.push_back(t);
492	}
493	if (t == base_cyclic) {
494	seen_back_edge = true;
495	}
496	}
497	}
498
499	/ allocate normal ids /
500	dstate_id_t sheng_end = DEAD_STATE + `1`;
501	for (auto v : sheng_states) {
502	dstate_id_t s = g [v].index;
503	if (!contains(accel_escape_info, s)) {
504	info.states [s].impl_id = sheng_end++;
505	info.extra [s].sheng_id = info.states [s].impl_id - `1`;
506	}
507	}
508
509	/ allocate accel ids /
510	for (auto v : sheng_states) {
511	dstate_id_t s = g [v].index;
512	if (contains(accel_escape_info, s)) {
513	assert(!info.states[s].impl_id);
514	info.states [s].impl_id = sheng_end++;
515	info.extra [s].sheng_id = info.states [s].impl_id - `1`;
516	}
517	}
518
519	if (sheng_states.size() < MIN_SHENG_SIZE) {
520	DEBUG_PRINTF("sheng region too small\n");
521	return DEAD_STATE;
522	}
523
524	if (!seen_back_edge) {
525	DEBUG_PRINTF("did not include cyclic\n");
526	return DEAD_STATE;
527	}
528
529	double leak = leakiness(g, info, sheng_states, base_cyclic);
530	if (leak > MAX_SHENG_LEAKINESS) {
531	DEBUG_PRINTF("too leaky (%g)\n", leak);
532	return DEAD_STATE;
533	}
534
535	if (!mark_sheng_succs(g, info, sheng_states)) {
536	return DEAD_STATE;
537	}
538
539	/ TODO: ensure sufficiently 'sticky' /
540	/ TODO: check not all states accel /
541	DEBUG_PRINTF("sheng_end = %hu\n", sheng_end);
542	return sheng_end;
543	}
544
545	static
546	void fill_in_aux_info(NFA nfa, const* dfa_info &info,
547	const map<dstate_id_t, AccelScheme> &accel_escape_info,
548	u32 accel_offset, UNUSED u32 accel_end_offset,
549	const vector<u32> &reports,
550	const vector<u32> &reports_eod,
551	u32 report_base_offset,
552	const raw_report_info &ri) {
553	mcsheng m = (mcsheng )getMutableImplNfa(nfa);
554
555	vector<u32> reportOffsets;
556
557	ri.fillReportLists(nfa, report_base_offset, reportOffsets);
558
559	for (u32 i = `0`; i < info.size(); i++) {
560	u16 impl_id = info.implId(i);
561	mstate_aux *this_aux = getAux(nfa, impl_id);
562
563	fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets);
564	if (contains(accel_escape_info, i)) {
565	this_aux->accel_offset = accel_offset;
566	accel_offset += info.strat.accelSize();
567	assert(accel_offset <= accel_end_offset);
568	assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
569	info.strat.buildAccel(i, accel_escape_info.at(i),
570	(void )((char* *)m + this_aux->accel_offset));
571	}
572	}
573	}
574
575	static
576	u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) {
577	mstate_aux *aux = getAux(nfa, target_impl_id);
578	u16 flags = `0`;
579
580	if (aux->accept) {
581	flags \|= ACCEPT_FLAG;
582	}
583
584	if (aux->accel_offset) {
585	flags \|= ACCEL_FLAG;
586	}
587
588	return flags;
589	}
590
591	static
592	void fill_in_succ_table_16(NFA nfa, const* dfa_info &info,
593	dstate_id_t sheng_end,
594	UNUSED dstate_id_t sherman_base) {
595	u16 succ_table = (u16 )((char )nfa + sizeof(NFA) + sizeof*(mcsheng));
596
597	u8 alphaShift = info.getAlphaShift();
598	assert(alphaShift <= `8`);
599
600	for (size_t i = `0`; i < info.size(); i++) {
601	if (!info.is_normal(i)) {
602	assert(info.implId(i) < sheng_end \|\| info.is_sherman(i));
603	continue;
604	}
605
606	assert(info.implId(i) < sherman_base);
607	u16 normal_id = verify_u16(info.implId(i) - sheng_end);
608
609	for (size_t s = `0`; s < info.impl_alpha_size; s++) {
610	dstate_id_t raw_succ = info.states [i].next [s];
611	u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s];
612
613	entry = info.implId(raw_succ);
614	entry \|= get_edge_flags(nfa, entry);
615	}
616	}
617	}
618
619	#define MAX_SHERMAN_LIST_LEN 8
620
621	static
622	void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate,
623	dstate_id_t max) {
624	if (candidate < max) {
625	dest.insert(candidate);
626	}
627	}
628
629	static
630	void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source,
631	u16 alphasize, dstate_id_t curr_id) {
632	for (symbol_t s = `0`; s < alphasize; s++) {
633	addIfEarlier(dest, source.next [s], curr_id);
634	}
635	}
636
637	/ \brief Returns a set of states to search for a better daddy. /
638	static
639	flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info,
640	dstate_id_t curr_id) {
641	flat_set<dstate_id_t> hinted;
642
643	addIfEarlier(hinted, `0`, curr_id);
644	addIfEarlier(hinted, info.raw.start_anchored, curr_id);
645	addIfEarlier(hinted, info.raw.start_floating, curr_id);
646
647	// Add existing daddy and his successors, then search back one generation.
648	const u16 alphasize = info.impl_alpha_size;
649	dstate_id_t daddy = info.states [curr_id].daddy;
650	for (u32 level = `0`; daddy && level < `2`; level++) {
651	addIfEarlier(hinted, daddy, curr_id);
652	addSuccessors(hinted, info.states [daddy], alphasize, curr_id);
653	daddy = info.states [daddy].daddy;
654	}
655
656	return hinted;
657	}
658
659	#define MAX_SHERMAN_SELF_LOOP 20
660
661	static
662	void find_better_daddy(dfa_info &info, dstate_id_t curr_id,
663	bool any_cyclic_near_anchored_state, const Grey &grey) {
664	if (!grey.allowShermanStates) {
665	return;
666	}
667
668	const u16 width = sizeof(u16);
669	const u16 alphasize = info.impl_alpha_size;
670
671	if (info.raw.start_anchored != DEAD_STATE
672	&& any_cyclic_near_anchored_state
673	&& curr_id < alphasize * `3`) {
674	/ crude attempt to prevent frequent states from being sherman'ed*
675	* depends on the fact that states are numbers are currently in bfs
676	* order */
677	DEBUG_PRINTF("%hu is banned\n", curr_id);
678	return;
679	}
680
681	if (info.raw.start_floating != DEAD_STATE
682	&& curr_id >= info.raw.start_floating
683	&& curr_id < info.raw.start_floating + alphasize * `3`) {
684	/ crude attempt to prevent frequent states from being sherman'ed*
685	* depends on the fact that states are numbers are currently in bfs
686	* order */
687	DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating);
688	return;
689	}
690
691	const u16 full_state_size = width * alphasize;
692	const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN,
693	(full_state_size - `2`)/(width + `1`));
694	u16 best_score = `0`;
695	dstate_id_t best_daddy = `0`;
696	dstate &currState = info.states [curr_id];
697
698	flat_set<dstate_id_t> hinted = find_daddy_candidates(info, curr_id);
699
700	for (const dstate_id_t &donor : hinted) {
701	assert(donor < curr_id);
702	u32 score = `0`;
703
704	if (!info.is_normal(donor)) {
705	continue;
706	}
707
708	const dstate &donorState = info.states [donor];
709	for (symbol_t s = `0`; s < alphasize; s++) {
710	if (currState.next [s] == donorState.next [s]) {
711	score++;
712	}
713	}
714
715	/ prefer lower ids to provide some stability amongst potential*
716	* siblings */
717	if (score > best_score \|\| (score == best_score && donor < best_daddy)) {
718	best_daddy = donor;
719	best_score = score;
720
721	if (score == alphasize) {
722	break;
723	}
724	}
725	}
726
727	currState.daddy = best_daddy;
728	info.extra [curr_id].daddytaken = best_score;
729	DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy,
730	best_score, alphasize);
731
732	if (best_daddy == DEAD_STATE) {
733	return; / No good daddy /
734	}
735
736	if (best_score + max_list_len < alphasize) {
737	return; / ??? /
738	}
739
740	assert(info.is_normal(currState.daddy));
741
742	u32 self_loop_width = `0`;
743	const dstate &curr_raw = info.states [curr_id];
744	for (unsigned i = `0`; i < N_CHARS; i++) {
745	if (curr_raw.next [info.alpha_remap [i]] == curr_id) {
746	self_loop_width++;
747	}
748	}
749
750	if (self_loop_width > MAX_SHERMAN_SELF_LOOP) {
751	DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id,
752	self_loop_width);
753	return;
754	}
755
756	if (info.is_sheng(curr_id)) {
757	return;
758	}
759
760	DEBUG_PRINTF("%hu is sherman\n", curr_id);
761	info.extra [curr_id].shermanState = true;
762	}
763
764	static
765	bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) {
766	symbol_t alphasize = raw.getImplAlphaSize();
767	for (symbol_t s = `0`; s < alphasize; s++) {
768	dstate_id_t succ_id = raw.states [root].next [s];
769	if (succ_id == DEAD_STATE) {
770	continue;
771	}
772
773	const dstate &succ = raw.states [succ_id];
774	for (symbol_t t = `0`; t < alphasize; t++) {
775	if (succ.next [t] == root \|\| succ.next [t] == succ_id) {
776	return true;
777	}
778	}
779	}
780	return false;
781	}
782
783	static
784	void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) {
785	char nfa_base = (char* *)nfa;
786	mcsheng m = (mcsheng )getMutableImplNfa(nfa);
787	char *sherman_table = nfa_base + m->sherman_offset;
788
789	assert(ISALIGNED_16(sherman_table));
790	for (size_t i = `0`; i < info.size(); i++) {
791	if (!info.is_sherman(i)) {
792	continue;
793	}
794	u16 fs = verify_u16(info.implId(i));
795	DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs);
796
797	assert(fs >= sherman_limit);
798
799	char *curr_sherman_entry
800	= sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE;
801	assert(curr_sherman_entry <= nfa_base + m->length);
802
803	u8 len = verify_u8(info.impl_alpha_size - info.extra [i].daddytaken);
804	assert(len <= `9`);
805	dstate_id_t d = info.states [i].daddy;
806
807	(u8 )(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE;
808	(u8 )(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len;
809	(u16 )(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d);
810	u8 chars = (u8 )(curr_sherman_entry + SHERMAN_CHARS_OFFSET);
811
812	for (u16 s = `0`; s < info.impl_alpha_size; s++) {
813	if (info.states [i].next [s] != info.states [d].next [s]) {
814	*(chars++) = (u8)s;
815	}
816	}
817
818	u16 states = (u16 )(curr_sherman_entry + SHERMAN_STATES_OFFSET(len));
819	for (u16 s = `0`; s < info.impl_alpha_size; s++) {
820	if (info.states [i].next [s] != info.states [d].next [s]) {
821	DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs,
822	info.implId(d),
823	info.implId(info.states[i].next[s]));
824	u16 entry_val = info.implId(info.states [i].next [s]);
825	entry_val \|= get_edge_flags(nfa, entry_val);
826	unaligned_store_u16((u8 *)states++, entry_val);
827	}
828	}
829	}
830	}
831
832	static
833	bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end,
834	const map<dstate_id_t, AccelScheme> &accel_escape_info,
835	const Grey &grey) {
836	DEBUG_PRINTF("building mcsheng 16\n");
837
838	vector<u32> reports; / index in ri for the appropriate report list /
839	vector<u32> reports_eod; / as above /
840	ReportID arb;
841	u8 single;
842
843	assert(info.getAlphaShift() <= `8`);
844
845	u16 total_daddy = `0`;
846	for (u32 i = `0`; i < info.size(); i++) {
847	find_better_daddy(info, i,
848	is_cyclic_near(info.raw, info.raw.start_anchored),
849	grey);
850	total_daddy += info.extra [i].daddytaken;
851	}
852
853	DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy,
854	info.size() * info.impl_alpha_size, info.size(),
855	info.impl_alpha_size);
856
857	u16 sherman_limit;
858	if (!allocateImplId16(info, sheng_end, &sherman_limit)) {
859	DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n",
860	info.size());
861	return nullptr;
862	}
863	u16 count_real_states = sherman_limit - sheng_end;
864
865	auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
866
867	size_t tran_size = (`1` << info.getAlphaShift()) * sizeof(u16)
868	* count_real_states;
869
870	size_t aux_size = sizeof(mstate_aux) * info.size();
871
872	size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size);
873	size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
874	size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
875	+ ri ->getReportListSize(), `32`);
876	size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size);
877	size_t sherman_size = calcShermanRegionSize(info);
878
879	size_t total_size = sherman_offset + sherman_size;
880
881	accel_offset -= sizeof(NFA); / adj accel offset to be relative to m /
882	assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
883
884	auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
885	mcsheng m = (mcsheng )getMutableImplNfa(nfa.get());
886
887	populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset,
888	accel_escape_info.size(), arb, single, nfa.get());
889	createShuffleMasks(m, info, sheng_end, accel_escape_info);
890
891	/ copy in the mc header information /
892	m->sherman_offset = sherman_offset;
893	m->sherman_end = total_size;
894	m->sherman_limit = sherman_limit;
895
896	DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end,
897	count_real_states, info.size());
898
899	fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset,
900	sherman_offset - sizeof(NFA), reports, reports_eod,
901	aux_offset + aux_size, *ri);
902
903	fill_in_succ_table_16(nfa.get(), info, sheng_end, sherman_limit);
904
905	fill_in_sherman(nfa.get(), info, sherman_limit);
906
907	return nfa;
908	}
909
910	static
911	void fill_in_succ_table_8(NFA nfa, const* dfa_info &info,
912	dstate_id_t sheng_end) {
913	u8 succ_table = (u8 )nfa + sizeof(NFA) + sizeof(mcsheng);
914
915	u8 alphaShift = info.getAlphaShift();
916	assert(alphaShift <= `8`);
917
918	for (size_t i = `0`; i < info.size(); i++) {
919	assert(!info.is_sherman(i));
920	if (!info.is_normal(i)) {
921	assert(info.implId(i) < sheng_end);
922	continue;
923	}
924	u8 normal_id = verify_u8(info.implId(i) - sheng_end);
925
926	for (size_t s = `0`; s < info.impl_alpha_size; s++) {
927	dstate_id_t raw_succ = info.states [i].next [s];
928	succ_table[((size_t)normal_id << alphaShift) + s]
929	= info.implId(raw_succ);
930	}
931	}
932	}
933
934	static
935	void allocateImplId8(dfa_info &info, dstate_id_t sheng_end,
936	const map<dstate_id_t, AccelScheme> &accel_escape_info,
937	u16 accel_limit, u16 accept_limit) {
938	info.states [`0`].impl_id = `0`; / dead is always 0 /
939
940	vector<dstate_id_t> norm;
941	vector<dstate_id_t> accel;
942	vector<dstate_id_t> accept;
943
944	assert(info.size() <= (`1` << `8`));
945
946	for (u32 i = `1`; i < info.size(); i++) {
947	if (info.is_sheng(i)) {
948	continue; / already allocated /
949	} else if (!info.states [i].reports.empty()) {
950	accept.push_back(i);
951	} else if (contains(accel_escape_info, i)) {
952	accel.push_back(i);
953	} else {
954	norm.push_back(i);
955	}
956	}
957
958	u32 j = sheng_end;
959	for (const dstate_id_t &s : norm) {
960	assert(j <= `256`);
961	DEBUG_PRINTF("mapping state %u to %u\n", s, j);
962	info.states [s].impl_id = j++;
963	}
964	*accel_limit = j;
965	for (const dstate_id_t &s : accel) {
966	assert(j <= `256`);
967	DEBUG_PRINTF("mapping state %u to %u\n", s, j);
968	info.states [s].impl_id = j++;
969	}
970	*accept_limit = j;
971	for (const dstate_id_t &s : accept) {
972	assert(j <= `256`);
973	DEBUG_PRINTF("mapping state %u to %u\n", s, j);
974	info.states [s].impl_id = j++;
975	}
976	}
977
978	static
979	bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end,
980	const map<dstate_id_t, AccelScheme> &accel_escape_info) {
981	DEBUG_PRINTF("building mcsheng 8\n");
982
983	vector<u32> reports;
984	vector<u32> reports_eod;
985	ReportID arb;
986	u8 single;
987
988	auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb);
989
990	size_t normal_count = info.size() - sheng_end;
991
992	size_t tran_size = sizeof(u8) * (`1` << info.getAlphaShift()) * normal_count;
993	size_t aux_size = sizeof(mstate_aux) * info.size();
994	size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size);
995	size_t accel_size = info.strat.accelSize() * accel_escape_info.size();
996	size_t accel_offset = ROUNDUP_N(aux_offset + aux_size
997	+ ri ->getReportListSize(), `32`);
998	size_t total_size = accel_offset + accel_size;
999
1000	DEBUG_PRINTF("aux_size %zu\n", aux_size);
1001	DEBUG_PRINTF("aux_offset %zu\n", aux_offset);
1002	DEBUG_PRINTF("rl size %u\n", ri->getReportListSize());
1003	DEBUG_PRINTF("accel_size %zu\n", accel_size);
1004	DEBUG_PRINTF("accel_offset %zu\n", accel_offset);
1005	DEBUG_PRINTF("total_size %zu\n", total_size);
1006
1007	accel_offset -= sizeof(NFA); / adj accel offset to be relative to m /
1008	assert(ISALIGNED_N(accel_offset, alignof(union AccelAux)));
1009
1010	auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
1011	mcsheng m = (mcsheng )getMutableImplNfa(nfa.get());
1012
1013	allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8,
1014	&m->accept_limit_8);
1015
1016	populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset,
1017	accel_escape_info.size(), arb, single, nfa.get());
1018	createShuffleMasks(m, info, sheng_end, accel_escape_info);
1019
1020	fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset,
1021	total_size - sizeof(NFA), reports, reports_eod,
1022	aux_offset + aux_size, *ri);
1023
1024	fill_in_succ_table_8(nfa.get(), info, sheng_end);
1025
1026	DEBUG_PRINTF("rl size %zu\n", ri->size());
1027
1028	return nfa;
1029	}
1030
1031	bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc,
1032	const ReportManager &rm) {
1033	if (!cc.grey.allowMcSheng) {
1034	return nullptr;
1035	}
1036
1037	mcclellan_build_strat mbs(raw, rm, false);
1038	dfa_info info(mbs);
1039	bool using8bit = cc.grey.allowMcClellan8 && info.size() <= `256`;
1040
1041	if (!cc.streaming) { / TODO: work out if we can do the strip in streaming*
1042	* mode with our semantics */
1043	raw.stripExtraEodReports();
1044	}
1045
1046	bool has_eod_reports = raw.hasEodReports();
1047
1048	map<dstate_id_t, AccelScheme> accel_escape_info
1049	= info.strat.getAccelInfo(cc.grey);
1050
1051	dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info);
1052	if (sheng_end <= DEAD_STATE + `1`) {
1053	return nullptr;
1054	}
1055
1056	bytecode_ptr<NFA> nfa;
1057	if (!using8bit) {
1058	nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey);
1059	} else {
1060	nfa = mcshengCompile8(info, sheng_end, accel_escape_info);
1061	}
1062
1063	if (!nfa) {
1064	return nfa;
1065	}
1066
1067	if (has_eod_reports) {
1068	nfa ->flags \|= NFA_ACCEPTS_EOD;
1069	}
1070
1071	DEBUG_PRINTF("compile done\n");
1072	return nfa;
1073	}
1074
1075	bool has_accel_mcsheng(const NFA *) {
1076	return true; / consider the sheng region as accelerated /
1077	}
1078
1079	} // namespace ue2
1080

Browse the source code of ClickHouse/contrib/hyperscan/src/nfa/mcsheng_compile.cpp