ng_limex_accel.cpp source code [ClickHouse/contrib/hyperscan/src/nfagraph/ng_limex_accel.cpp]

1	/*
2	* Copyright (c) 2015-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief NFA acceleration analysis code.
31	*/
32	#include "ng_limex_accel.h"
33
34	#include "ng_holder.h"
35	#include "ng_misc_opt.h"
36	#include "ng_util.h"
37	#include "ue2common.h"
38
39	#include "nfa/accel.h"
40
41	#include "util/bitutils.h" // for CASE_CLEAR
42	#include "util/charreach.h"
43	#include "util/compile_context.h"
44	#include "util/container.h"
45	#include "util/dump_charclass.h"
46	#include "util/graph_range.h"
47	#include "util/small_vector.h"
48	#include "util/target_info.h"
49
50	#include <algorithm>
51	#include <map>
52
53	#include <boost/range/adaptor/map.hpp>
54
55	using namespace std;
56	using boost::adaptors::map_keys;
57
58	namespace ue2 {
59
60	#define WIDE_FRIEND_MIN 200
61
62	static
63	void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
64	const flat_set<NFAVertex> &cands,
65	const flat_set<NFAVertex> &preds,
66	flat_set<NFAVertex> *next_cands,
67	flat_set<NFAVertex> *next_preds,
68	flat_set<NFAVertex> *friends) {
69	for (auto v : cands) {
70	if (contains(preds, v)) {
71	continue;
72	}
73
74	const CharReach &acr = g [v].char_reach;
75	DEBUG_PRINTF("checking %zu\n", g[v].index);
76
77	if (acr.count() < WIDE_FRIEND_MIN \|\| !acr.isSubsetOf(cr)) {
78	DEBUG_PRINTF("bad reach %zu\n", acr.count());
79	continue;
80	}
81
82	for (auto u : inv_adjacent_vertices_range(v, g)) {
83	if (!contains(preds, u)) {
84	DEBUG_PRINTF("bad pred\n");
85	goto next_cand;
86	}
87	}
88
89	next_preds->insert(v);
90	insert(next_cands, adjacent_vertices(v, g));
91
92	DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index);
93	friends->insert(v);
94	next_cand:;
95	}
96	}
97
98	void findAccelFriends(const NGHolder &g, NFAVertex v,
99	const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
100	u32 offset, flat_set<NFAVertex> *friends) {
101	/ A friend of an accel state is a successor state which can only be on when*
102	* the accel is on. This requires that it has a subset of the accel state's
103	* preds and a charreach which is a subset of the accel state.
104	*
105	* A friend can be safely ignored when accelerating provided there is
106	* sufficient back-off. A friend is useful if it has a wide reach.
107	*/
108
109	/ BR cyclic states which may go stale cannot have friends as they may*
110	* suddenly turn off leading their so-called friends stranded and alone.
111	* TODO: restrict to only stale going BR cyclics
112	*/
113	if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) {
114	return;
115	}
116
117	u32 friend_depth = offset + `1`;
118
119	flat_set<NFAVertex> preds;
120	insert(&preds, inv_adjacent_vertices(v, g));
121	const CharReach &cr = g [v].char_reach;
122
123	flat_set<NFAVertex> cands;
124	insert(&cands, adjacent_vertices(v, g));
125
126	flat_set<NFAVertex> next_preds;
127	flat_set<NFAVertex> next_cands;
128	for (u32 i = `0`; i < friend_depth; i++) {
129	findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds,
130	friends);
131	preds.insert(next_preds.begin(), next_preds.end());
132	next_preds.clear();
133	cands.swap(next_cands);
134	next_cands.clear();
135	}
136	}
137
138	static
139	void findPaths(const NGHolder &g, NFAVertex v,
140	const vector<CharReach> &refined_cr,
141	vector<vector<CharReach>> *paths,
142	const flat_set<NFAVertex> &forbidden, u32 depth) {
143	static const u32 MAGIC_TOO_WIDE_NUMBER = `16`;
144	if (!depth) {
145	paths->push_back({});
146	return;
147	}
148	if (v == g.accept \|\| v == g.acceptEod) {
149	paths->push_back({});
150	if (!generates_callbacks(g) \|\| v == g.acceptEod) {
151	paths->back().push_back(CharReach ()); / red tape options /
152	}
153	return;
154	}
155
156	/ for the escape 'literals' we want to use the minimal cr so we*
157	* can be more selective */
158	const CharReach &cr = refined_cr [g [v].index];
159
160	if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
161	\|\| hasSelfLoop(v, g)) {
162	/ give up on pushing past this point /
163	paths->push_back({cr});
164	return;
165	}
166
167	vector<vector<CharReach>> curr;
168	for (auto w : adjacent_vertices_range(v, g)) {
169	if (contains(forbidden, w)) {
170	/ path has looped back to one of the active+boring acceleration*
171	* states. We can ignore this path if we have sufficient back-
172	* off. */
173	paths->push_back({CharReach ()});
174	continue;
175	}
176
177	u32 new_depth = depth - `1`;
178	do {
179	curr.clear();
180	findPaths(g, w, refined_cr, &curr, forbidden, new_depth);
181	} while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER);
182
183	for (auto &c : curr) {
184	c.push_back(cr);
185	paths->push_back(std::move(c));
186	}
187	}
188	}
189
190	namespace {
191	struct SAccelScheme {
192	SAccelScheme(CharReach cr_in, u32 offset_in)
193	: cr (std::move(cr_in)), offset(offset_in) {
194	assert(offset <= MAX_ACCEL_DEPTH);
195	}
196
197	SAccelScheme() {}
198
199	bool operator<(const SAccelScheme &b) const {
200	const SAccelScheme &a = *this;
201
202	const size_t a_count = cr.count(), b_count = b.cr.count();
203	if (a_count != b_count) {
204	return a_count < b_count;
205	}
206
207	/ TODO: give bonus if one is a 'caseless' character /
208	ORDER_CHECK(offset);
209	ORDER_CHECK(cr);
210	return false;
211	}
212
213	CharReach cr = CharReach::dot();
214	u32 offset = MAX_ACCEL_DEPTH + `1`;
215	};
216	}
217
218	/**
219	* \brief Limit on the number of (recursive) calls to findBestInternal().
220	*/
221	static constexpr size_t MAX_FINDBEST_CALLS = `1000000`;
222
223	static
224	void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
225	vector<vector<CharReach>>::const_iterator pe,
226	size_t num_calls, const* SAccelScheme &curr,
227	SAccelScheme *best) {
228	assert(curr.offset <= MAX_ACCEL_DEPTH);
229
230	if (++(*num_calls) > MAX_FINDBEST_CALLS) {
231	DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls);
232	return;
233	}
234
235	DEBUG_PRINTF("paths left %zu\n", pe - pb);
236	if (pb == pe) {
237	if (curr < *best) {
238	*best = curr;
239	DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n",
240	best->cr.count(), describeClass(best->cr).c_str(),
241	best->offset);
242	}
243	return;
244	}
245
246	DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
247
248	small_vector<SAccelScheme, `10`> priority_path;
249	priority_path.reserve(pb ->size());
250	u32 i = `0`;
251	for (auto p = pb ->begin(); p != pb ->end(); ++p, i++) {
252	SAccelScheme as(*p \| curr.cr, max(i, curr.offset));
253	if (*best < as) {
254	DEBUG_PRINTF("worse\n");
255	continue;
256	}
257	priority_path.push_back(move(as));
258	}
259
260	sort(priority_path.begin(), priority_path.end());
261	for (auto it = priority_path.begin(); it != priority_path.end(); ++it) {
262	auto jt = next(it);
263	for (; jt != priority_path.end(); ++jt) {
264	if (!it ->cr.isSubsetOf(jt ->cr)) {
265	break;
266	}
267	}
268	priority_path.erase(next(it), jt);
269	DEBUG_PRINTF("\|\|%zu\n", it->cr.count());
270	}
271	DEBUG_PRINTF("---\n");
272
273	for (const SAccelScheme &in : priority_path) {
274	DEBUG_PRINTF("in: count %zu\n", in.cr.count());
275	if (*best < in) {
276	DEBUG_PRINTF("worse\n");
277	continue;
278	}
279	findBestInternal(pb + `1`, pe, num_calls, in, best);
280
281	if (curr.cr == best->cr) {
282	return; / could only get better by offset /
283	}
284	}
285	}
286
287	static
288	SAccelScheme findBest(const vector<vector<CharReach>> &paths,
289	const CharReach &terminating) {
290	SAccelScheme curr(terminating, `0U`);
291	SAccelScheme best;
292	size_t num_calls = `0`;
293	findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best);
294	DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls);
295	DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n",
296	best.cr.count(), describeClass(best.cr).c_str(), best.offset);
297	return best;
298	}
299
300	namespace {
301	struct DAccelScheme {
302	DAccelScheme(CharReach cr_in, u32 offset_in)
303	: double_cr (std::move(cr_in)), double_offset(offset_in) {
304	assert(double_offset <= MAX_ACCEL_DEPTH);
305	}
306
307	bool operator<(const DAccelScheme &b) const {
308	const DAccelScheme &a = *this;
309
310	size_t a_dcount = a.double_cr.count();
311	size_t b_dcount = b.double_cr.count();
312
313	assert(!a.double_byte.empty() \|\| a_dcount \|\| a.double_offset);
314	assert(!b.double_byte.empty() \|\| b_dcount \|\| b.double_offset);
315
316	if (a_dcount != b_dcount) {
317	return a_dcount < b_dcount;
318	}
319
320	if (!a_dcount) {
321	bool cd_a = buildDvermMask(a.double_byte);
322	bool cd_b = buildDvermMask(b.double_byte);
323	if (cd_a != cd_b) {
324	return cd_a > cd_b;
325	}
326	}
327
328	ORDER_CHECK(double_byte.size());
329	ORDER_CHECK(double_offset);
330
331	/ TODO: give bonus if one is a 'caseless' character /
332	ORDER_CHECK(double_byte);
333	ORDER_CHECK(double_cr);
334
335	return false;
336	}
337
338	flat_set<pair<u8, u8>> double_byte;
339	CharReach double_cr;
340	u32 double_offset = `0`;
341	};
342	}
343
344	static
345	DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
346	const CharReach &cr_2_in, u32 offset_in) {
347	cr_1 &= ~as.double_cr;
348	CharReach cr_2 = cr_2_in & ~as.double_cr;
349	u32 offset = offset_in;
350
351	if (cr_1.none()) {
352	DEBUG_PRINTF("empty first element\n");
353	ENSURE_AT_LEAST(&as.double_offset, offset);
354	return as;
355	}
356
357	if (cr_2_in != cr_2 \|\| cr_2.none()) {
358	offset = offset_in + `1`;
359	}
360
361	size_t two_count = cr_1.count() * cr_2.count();
362
363	DEBUG_PRINTF("will generate raw %zu pairs\n", two_count);
364
365	if (!two_count) {
366	DEBUG_PRINTF("empty element\n");
367	ENSURE_AT_LEAST(&as.double_offset, offset);
368	return as;
369	}
370
371	if (two_count > DOUBLE_SHUFTI_LIMIT) {
372	if (cr_2.count() < cr_1.count()) {
373	as.double_cr \|= cr_2;
374	offset = offset_in + `1`;
375	} else {
376	as.double_cr \|= cr_1;
377	}
378	} else {
379	for (auto i = cr_1.find_first(); i != CharReach::npos;
380	i = cr_1.find_next(i)) {
381	for (auto j = cr_2.find_first(); j != CharReach::npos;
382	j = cr_2.find_next(j)) {
383	as.double_byte.emplace(i, j);
384	}
385	}
386	}
387
388	ENSURE_AT_LEAST(&as.double_offset, offset);
389	DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n",
390	as.double_byte.size(), as.double_cr.count(), as.double_offset);
391	return as;
392	}
393
394	static
395	void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
396	vector<vector<CharReach> >::const_iterator pe,
397	const DAccelScheme &curr, DAccelScheme *best) {
398	assert(curr.double_offset <= MAX_ACCEL_DEPTH);
399	DEBUG_PRINTF("paths left %zu\n", pe - pb);
400	DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n",
401	curr.double_byte.size(), curr.double_cr.count(),
402	curr.double_offset);
403	if (pb == pe) {
404	if (curr < *best) {
405	*best = curr;
406	DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n",
407	best->double_byte.size(), best->double_cr.count(),
408	best->double_offset);
409	}
410	return;
411	}
412
413	DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
414
415	small_vector<DAccelScheme, `10`> priority_path;
416	priority_path.reserve(pb ->size());
417	u32 i = `0`;
418	for (auto p = pb ->begin(); p != pb ->end() && next(p) != pb ->end();
419	++p, i++) {
420	DAccelScheme as = make_double_accel(curr, p, next(p), i);
421	if (*best < as) {
422	DEBUG_PRINTF("worse\n");
423	continue;
424	}
425	priority_path.push_back(move(as));
426	}
427
428	sort(priority_path.begin(), priority_path.end());
429	DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size());
430	DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n",
431	best->double_byte.size(), best->double_cr.count(),
432	best->double_offset);
433
434	for (const DAccelScheme &in : priority_path) {
435	DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
436	in.double_byte.size(), in.double_cr.count(),
437	in.double_offset);
438	if (*best < in) {
439	DEBUG_PRINTF("worse\n");
440	continue;
441	}
442	findDoubleBest(pb + `1`, pe, in, best);
443	}
444	}
445
446	#ifdef DEBUG
447	static
448	void dumpPaths(const vector<vector<CharReach>> &paths) {
449	for (const auto &path : paths) {
450	DEBUG_PRINTF("path: [");
451	for (const auto &cr : path) {
452	printf(" [");
453	describeClass(stdout, cr, `20`, CC_OUT_TEXT);
454	printf("]");
455	}
456	printf(" ]\n");
457	}
458	}
459	#endif
460
461	static
462	void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
463	/ paths segments which are a superset of an earlier segment should never be*
464	* picked as an acceleration segment -> to improve processing just replace
465	* with dot */
466	for (auto &p : paths) {
467	for (auto it = p.begin(); it != p.end(); ++it) {
468	for (auto jt = next(it); jt != p.end(); ++jt) {
469	if (it ->isSubsetOf(*jt)) {
470	*jt = CharReach::dot();
471	}
472	}
473	}
474	}
475	}
476
477	static
478	void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
479	/ try to unify paths which only differ in the last segment /
480	for (vector<vector<CharReach> >::iterator p = paths.begin();
481	p != paths.end() && p + `1` != paths.end();) {
482	vector<CharReach> &a = *p;
483	vector<CharReach> &b = *(p + `1`);
484
485	if (a.size() != b.size()) {
486	++p;
487	continue;
488	}
489
490	u32 i = `0`;
491	for (; i < a.size() - `1`; i++) {
492	if (a [i] != b [i]) {
493	break;
494	}
495	}
496	if (i == a.size() - `1`) {
497	/ we can unify these paths /
498	a [i] \|= b [i];
499	paths.erase(p + `1`);
500	} else {
501	++p;
502	}
503	}
504	}
505
506	static
507	void improvePaths(vector<vector<CharReach> > &paths) {
508	#ifdef DEBUG
509	DEBUG_PRINTF("orig paths\n");
510	dumpPaths(paths);
511	#endif
512	blowoutPathsLessStrictSegment(paths);
513
514	sort(paths.begin(), paths.end());
515
516	unifyPathsLastSegment(paths);
517
518	#ifdef DEBUG
519	DEBUG_PRINTF("opt paths\n");
520	dumpPaths(paths);
521	#endif
522	}
523
524	#define MAX_DOUBLE_ACCEL_PATHS 10
525
526	static
527	DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
528	const CharReach &terminating) {
529	DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n",
530	terminating.count());
531	unifyPathsLastSegment(paths);
532
533	#ifdef DEBUG
534	DEBUG_PRINTF("paths:\n");
535	dumpPaths(paths);
536	#endif
537
538	/ if there are too many paths, shorten the paths to reduce the number of*
539	* distinct paths we have to consider */
540	while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) {
541	for (auto &p : paths) {
542	if (p.empty()) {
543	return DAccelScheme (terminating, `0U`);
544	}
545	p.pop_back();
546	}
547	unifyPathsLastSegment(paths);
548	}
549
550	if (paths.empty()) {
551	return DAccelScheme (terminating, `0U`);
552	}
553
554	DAccelScheme curr(terminating, `0U`);
555	DAccelScheme best(CharReach::dot(), `0U`);
556	findDoubleBest(paths.begin(), paths.end(), curr, &best);
557	DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(),
558	best.double_cr.count());
559	return best;
560	}
561
562	#define MAX_EXPLORE_PATHS 40
563
564	AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths,
565	const CharReach &terminating,
566	bool look_for_double_byte) {
567	AccelScheme rv;
568	if (look_for_double_byte) {
569	DAccelScheme da = findBestDoubleAccelScheme(paths, terminating);
570	if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) {
571	rv.double_byte = std::move(da.double_byte);
572	rv.double_cr = move(da.double_cr);
573	rv.double_offset = da.double_offset;
574	}
575	}
576
577	improvePaths(paths);
578
579	DEBUG_PRINTF("we have %zu paths\n", paths.size());
580	if (paths.size() > MAX_EXPLORE_PATHS) {
581	return rv; / too many paths to explore /
582	}
583
584	/ if we were smart we would do something netflowy on the paths to find the*
585	* best cut. But we aren't, so we will just brute force it.
586	*/
587	SAccelScheme best = findBest(paths, terminating);
588
589	/ find best is a bit lazy in terms of minimising the offset, see if we can*
590	* make it better. need to find the min max offset that we need.*/
591	u32 offset = `0`;
592	for (const auto &path : paths) {
593	u32 i = `0`;
594	for (const auto &cr : path) {
595	if (cr.isSubsetOf(best.cr)) {
596	break;
597	}
598	i++;
599	}
600	offset = MAX(offset, i);
601	}
602	assert(offset <= best.offset);
603	best.offset = offset;
604
605	rv.offset = best.offset;
606	rv.cr = best.cr;
607	if (rv.cr.count() < rv.double_cr.count()) {
608	rv.double_byte.clear();
609	}
610
611	return rv;
612	}
613
614	AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
615	const vector<CharReach> &refined_cr,
616	const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
617	bool allow_wide, bool look_for_double_byte) {
618	CharReach terminating;
619	for (auto v : verts) {
620	if (!hasSelfLoop(v, g)) {
621	DEBUG_PRINTF("no self loop\n");
622	return AccelScheme (); / invalid scheme /
623	}
624
625	// check that this state is reachable on most characters
626	terminating \|= ~g [v].char_reach;
627	}
628
629	DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count());
630	size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
631	: ACCEL_MAX_STOP_CHAR;
632	if (terminating.count() > limit) {
633	return AccelScheme (); / invalid scheme /
634	}
635
636	vector<vector<CharReach>> paths;
637	flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
638
639	/ Note: we can not in general (TODO: ignore when possible) ignore entries*
640	* into the bounded repeat cyclic states as that is when the magic happens
641	*/
642	for (auto v : br_cyclic \| map_keys) {
643	/ TODO: can allow if repeatMin <= 1 ? /
644	ignore_vert_set.erase(v);
645	}
646
647	for (auto v : verts) {
648	for (auto w : adjacent_vertices_range(v, g)) {
649	if (w != v) {
650	findPaths(g, w, refined_cr, &paths, ignore_vert_set,
651	MAX_ACCEL_DEPTH);
652	}
653	}
654	}
655
656	/ paths built wrong: reverse them /
657	for (auto &path : paths) {
658	reverse(path.begin(), path.end());
659	}
660
661	return findBestAccelScheme(std::move(paths), terminating,
662	look_for_double_byte);
663	}
664
665	NFAVertex get_sds_or_proxy(const NGHolder &g) {
666	DEBUG_PRINTF("looking for sds proxy\n");
667	if (proper_out_degree(g.startDs, g)) {
668	return g.startDs;
669	}
670
671	NFAVertex v = NGHolder::null_vertex();
672	for (auto w : adjacent_vertices_range(g.start, g)) {
673	if (w != g.startDs) {
674	if (!v) {
675	v = w;
676	} else {
677	return g.startDs;
678	}
679	}
680	}
681
682	if (!v) {
683	return g.startDs;
684	}
685
686	while (true) {
687	if (hasSelfLoop(v, g)) {
688	DEBUG_PRINTF("woot %zu\n", g[v].index);
689	return v;
690	}
691	if (out_degree(v, g) != `1`) {
692	break;
693	}
694	NFAVertex u = getSoleDestVertex(g, v);
695	if (!g [u].char_reach.all()) {
696	break;
697	}
698	v = u;
699	}
700
701	return g.startDs;
702	}
703
704	/* \brief Check if vertex \a v is an accelerable state (for a limex NFA). /
705	bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
706	const vector<CharReach> &refined_cr,
707	const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
708	AccelScheme as, bool* allow_wide) {
709	// For a state to be accelerable, our current criterion is that it be a
710	// large character class with a self-loop and narrow set of possible other
711	// successors (i.e. no special successors, union of successor reachability
712	// is small).
713	if (!hasSelfLoop(v, g)) {
714	return false;
715	}
716
717	// check that this state is reachable on most characters
718	/ we want to use the maximal reach here (in the graph) /
719	CharReach terminating = g [v].char_reach;
720	terminating.flip();
721
722	DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n",
723	g[v].index, terminating.count(),
724	allow_wide ? " (w)" : "");
725
726	size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
727	: ACCEL_MAX_STOP_CHAR;
728	if (terminating.count() > limit) {
729	DEBUG_PRINTF("too leaky\n");
730	return false;
731	}
732
733	flat_set<NFAVertex> curr, next;
734
735	insert(&curr, adjacent_vertices(v, g));
736	curr.erase(v); // erase self-loop
737
738	// We consider offsets of zero through three; this is fairly arbitrary at
739	// present and could probably be increased (FIXME)
740	/ WARNING: would/could do horrible things to compile time /
741	bool stop = false;
742	vector<CharReach> depthReach(MAX_ACCEL_DEPTH);
743	unsigned int depth;
744	for (depth = `0`; !stop && depth < MAX_ACCEL_DEPTH; depth++) {
745	CharReach &cr = depthReach [depth];
746	for (auto t : curr) {
747	if (is_special(t, g)) {
748	// We've bumped into the edge of the graph, so we should stop
749	// searching.
750	// Exception: iff our cyclic state is not a dot, than we can
751	// safely accelerate towards an EOD accept.
752
753	/ Exception: nfas that don't generate callbacks so accepts are*
754	* fine too */
755	if (t == g.accept && !generates_callbacks(g)) {
756	stop = true; // don't search beyond this depth
757	continue;
758	} else if (t == g.accept) {
759	goto depth_done;
760	}
761
762	assert(t == g.acceptEod);
763	stop = true; // don't search beyond this depth
764	} else {
765	// Non-special vertex
766	insert(&next, adjacent_vertices(t, g));
767	/ for the escape 'literals' we want to use the minimal cr so we*
768	* can be more selective */
769	cr \|= refined_cr [g [t].index];
770	}
771	}
772
773	cr \|= terminating;
774	DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count());
775
776	curr.swap(next);
777	next.clear();
778	}
779
780	depth_done:
781
782	if (depth == `0`) {
783	return false;
784	}
785
786	DEBUG_PRINTF("selecting from depth 0..%u\n", depth);
787
788	/ Look for the most awesome acceleration evar /
789	for (unsigned int i = `0`; i < depth; i++) {
790	if (depthReach [i].none()) {
791	DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
792	*as = AccelScheme ();
793	as->offset = i;
794	as->cr = CharReach ();
795	return true;
796	}
797	}
798
799	// First, loop over our depths and see if we have a suitable 2-byte
800	// caseful vermicelli option: this is the (second) fastest accel we have
801	if (depth > `1`) {
802	for (unsigned int i = `0`; i < (depth - `1`); i++) {
803	const CharReach &cra = depthReach [i];
804	const CharReach &crb = depthReach [i + `1`];
805	if ((cra.count() == `1` && crb.count() == `1`)
806	\|\| (cra.count() == `2` && crb.count() == `2`
807	&& cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
808	DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
809	*as = AccelScheme ();
810	as->offset = i;
811	return true;
812	}
813	}
814	}
815
816	// Second option: a two-byte shufti (i.e. less than eight 2-byte
817	// literals)
818	if (depth > `1`) {
819	for (unsigned int i = `0`; i < (depth - `1`); i++) {
820	if (depthReach [i].count() * depthReach [i+`1`].count()
821	<= DOUBLE_SHUFTI_LIMIT) {
822	DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
823	*as = AccelScheme ();
824	as->offset = i;
825	return true;
826	}
827	}
828	}
829
830	// Look for offset accel schemes verm/shufti;
831	vector<NFAVertex> verts(`1`, v);
832	as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true*);
833	DEBUG_PRINTF("as width %zu\n", as->cr.count());
834	return as->cr.count() <= ACCEL_MAX_STOP_CHAR \|\| allow_wide;
835	}
836
837	} // namespace ue2
838

Browse the source code of ClickHouse/contrib/hyperscan/src/nfagraph/ng_limex_accel.cpp