accel_dfa_build_strat.cpp source code [ClickHouse/contrib/hyperscan/src/nfa/accel_dfa_build_strat.cpp]

1	/*
2	* Copyright (c) 2015-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	#include "accel_dfa_build_strat.h"
30
31	#include "accel.h"
32	#include "grey.h"
33	#include "nfagraph/ng_limex_accel.h"
34	#include "shufticompile.h"
35	#include "trufflecompile.h"
36	#include "util/accel_scheme.h"
37	#include "util/charreach.h"
38	#include "util/container.h"
39	#include "util/dump_charclass.h"
40	#include "util/small_vector.h"
41	#include "util/verify_types.h"
42
43	#include <sstream>
44	#include <unordered_map>
45	#include <unordered_set>
46	#include <vector>
47
48	#define PATHS_LIMIT 500
49
50	using namespace std;
51
52	namespace ue2 {
53
54	namespace {
55	struct path {
56	small_vector<CharReach, MAX_ACCEL_DEPTH + `1`> reach;
57	dstate_id_t dest = DEAD_STATE;
58	explicit path(dstate_id_t base) : dest(base) {}
59	};
60	};
61
62	template<typename Container>
63	void dump_paths(const Container &paths) {
64	for (UNUSED const path &p : paths) {
65	DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest);
66	}
67	DEBUG_PRINTF("%zu paths\n", paths.size());
68	}
69
70	static
71	vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
72	vector<CharReach> rv(rdfa.alpha_size - `1`); / TOP not required /
73
74	for (u32 i = `0`; i < N_CHARS; i++) {
75	rv.at(rdfa.alpha_remap [i]).set(i);
76	}
77
78	return rv;
79	}
80
81	static
82	bool is_useful_path(const vector<path> &good, const path &p) {
83	for (const auto &g : good) {
84	assert(g.dest == p.dest);
85	assert(g.reach.size() <= p.reach.size());
86	auto git = g.reach.rbegin();
87	auto pit = p.reach.rbegin();
88
89	for (; git != g.reach.rend(); ++git, ++pit) {
90	if (!pit ->isSubsetOf(*git)) {
91	goto next;
92	}
93	}
94	DEBUG_PRINTF("better: [%s] -> %u\n", describeClasses(g.reach).c_str(),
95	g.dest);
96
97	return false;
98	next:;
99	}
100
101	return true;
102	}
103
104	static
105	path append(const path &orig, const CharReach &cr, u32 new_dest) {
106	path p(new_dest);
107	p.reach = orig.reach;
108	p.reach.push_back(cr);
109
110	return p;
111	}
112
113	static
114	void extend(const raw_dfa &rdfa, const vector<CharReach> &rev_map,
115	const path &p, unordered_map<u32, vector<path>> &all,
116	vector<path> &out) {
117	const dstate &s = rdfa.states [p.dest];
118
119	if (!p.reach.empty() && p.reach.back().none()) {
120	out.push_back(p);
121	return;
122	}
123
124	if (!s.reports.empty()) {
125	if (generates_callbacks(rdfa.kind)) {
126	out.push_back(p);
127	return;
128	} else {
129	path pp = append(p, CharReach (), p.dest);
130	all [p.dest].push_back(pp);
131	out.push_back(move(pp));
132	}
133	}
134
135	if (!s.reports_eod.empty()) {
136	path pp = append(p, CharReach (), p.dest);
137	all [p.dest].push_back(pp);
138	out.push_back(move(pp));
139	}
140
141	flat_map<u32, CharReach> dest;
142	for (u32 i = `0`; i < rev_map.size(); i++) {
143	u32 succ = s.next [i];
144	dest [succ] \|= rev_map [i];
145	}
146
147	for (const auto &e : dest) {
148	path pp = append(p, e.second, e.first);
149	if (!is_useful_path(all [e.first], pp)) {
150	DEBUG_PRINTF("not useful: [%s] -> %u\n",
151	describeClasses(pp.reach).c_str(), pp.dest);
152	continue;
153	}
154
155	DEBUG_PRINTF("----good: [%s] -> %u\n",
156	describeClasses(pp.reach).c_str(), pp.dest);
157	all [e.first].push_back(pp);
158	out.push_back(move(pp));
159	}
160	}
161
162	static
163	vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa,
164	dstate_id_t base, u32 len) {
165	const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
166	vector<path> paths{path (base)};
167	unordered_map<u32, vector<path>> all;
168	all [base].push_back(path (base));
169	for (u32 i = `0`; i < len && paths.size() < PATHS_LIMIT; i++) {
170	vector<path> next_gen;
171	for (const auto &p : paths) {
172	extend(rdfa, rev_map, p, all, next_gen);
173	}
174
175	paths = move(next_gen);
176	}
177
178	dump_paths(paths);
179
180	vector<vector<CharReach>> rv;
181	rv.reserve(paths.size());
182	for (auto &p : paths) {
183	rv.push_back(vector<CharReach>(std::make_move_iterator(p.reach.begin()),
184	std::make_move_iterator(p.reach.end())));
185	}
186	return rv;
187	}
188
189	static
190	AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
191	u32 max_allowed_accel_offset) {
192	DEBUG_PRINTF("looking for accel for %hu\n", base);
193	vector<vector<CharReach>> paths =
194	generate_paths(rdfa, base, max_allowed_accel_offset + `1`);
195	AccelScheme as = findBestAccelScheme(paths, CharReach (), true);
196	DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
197	return as;
198	}
199
200	static UNUSED
201	bool better(const AccelScheme &a, const AccelScheme &b) {
202	if (!a.double_byte.empty() && b.double_byte.empty()) {
203	return true;
204	}
205
206	if (!b.double_byte.empty()) {
207	return false;
208	}
209
210	return a.cr.count() < b.cr.count();
211	}
212
213	static
214	bool double_byte_ok(const AccelScheme &info) {
215	return !info.double_byte.empty() &&
216	info.double_cr.count() < info.double_byte.size() &&
217	info.double_cr.count() <= `2` && !info.double_byte.empty();
218	}
219
220	static
221	bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
222	u16 top_remap = raw.alpha_remap [TOP];
223	for (u32 i = `0`; i < raw.states [s].next.size(); i++) {
224	if (i != top_remap && raw.states [s].next [i] == s) {
225	return true;
226	}
227	}
228	return false;
229	}
230
231	static
232	flat_set<u16> find_nonexit_symbols(const raw_dfa &rdfa,
233	const CharReach &escape) {
234	flat_set<u16> rv;
235	CharReach nonexit = ~escape;
236	for (auto i = nonexit.find_first(); i != nonexit.npos;
237	i = nonexit.find_next(i)) {
238	rv.insert(rdfa.alpha_remap [i]);
239	}
240
241	return rv;
242	}
243
244	static
245	dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
246	if (raw.start_floating != DEAD_STATE) {
247	DEBUG_PRINTF("has floating start\n");
248	return raw.start_floating;
249	}
250
251	DEBUG_PRINTF("looking for SDS proxy\n");
252
253	dstate_id_t s = raw.start_anchored;
254
255	if (has_self_loop(s, raw)) {
256	return s;
257	}
258
259	u16 top_remap = raw.alpha_remap [TOP];
260
261	std::unordered_set<dstate_id_t> seen;
262	while (true) {
263	seen.insert(s);
264	DEBUG_PRINTF("basis %hu\n", s);
265
266	/ check if we are connected to a state with a self loop /
267	for (u32 i = `0`; i < raw.states [s].next.size(); i++) {
268	dstate_id_t t = raw.states [s].next [i];
269	if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
270	return t;
271	}
272	}
273
274	/ find a neighbour to use as a basis for looking for the sds proxy /
275	dstate_id_t t = DEAD_STATE;
276	for (u32 i = `0`; i < raw.states [s].next.size(); i++) {
277	dstate_id_t tt = raw.states [s].next [i];
278	if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
279	t = tt;
280	break;
281	}
282	}
283
284	if (t == DEAD_STATE) {
285	/ we were unable to find a state to use as a SDS proxy /
286	return DEAD_STATE;
287	}
288
289	s = t;
290	}
291	}
292
293	static
294	set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
295	const AccelScheme &ei) {
296	DEBUG_PRINTF("looking for region around %hu\n", base);
297
298	set<dstate_id_t> region = {base};
299
300	if (!ei.double_byte.empty()) {
301	return region;
302	}
303
304	DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.cr).c_str(), ei.offset);
305
306	const CharReach &escape = ei.cr;
307	auto nonexit_symbols = find_nonexit_symbols(rdfa, escape);
308
309	vector<dstate_id_t> pending = {base};
310	while (!pending.empty()) {
311	dstate_id_t curr = pending.back();
312	pending.pop_back();
313	for (auto s : nonexit_symbols) {
314	dstate_id_t t = rdfa.states [curr].next [s];
315	if (contains(region, t)) {
316	continue;
317	}
318
319	DEBUG_PRINTF(" %hu is in region\n", t);
320	region.insert(t);
321	pending.push_back(t);
322	}
323	}
324
325	return region;
326	}
327
328	AccelScheme
329	accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const {
330	AccelScheme rv;
331	const raw_dfa &rdfa = get_raw();
332	rv.cr.clear();
333	rv.offset = `0`;
334	const dstate &raw = rdfa.states [this_idx];
335	const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
336	bool outs2_broken = false;
337	flat_map<dstate_id_t, CharReach> succs;
338
339	for (u32 i = `0`; i < rev_map.size(); i++) {
340	if (raw.next [i] == this_idx) {
341	continue;
342	}
343
344	const CharReach &cr_i = rev_map.at(i);
345
346	rv.cr \|= cr_i;
347	dstate_id_t next_id = raw.next [i];
348
349	DEBUG_PRINTF("next is %hu\n", next_id);
350	const dstate &raw_next = rdfa.states [next_id];
351
352	if (outs2_broken) {
353	continue;
354	}
355
356	if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
357	DEBUG_PRINTF("leads to report\n");
358	outs2_broken = true; / cannot accelerate over reports /
359	continue;
360	}
361	succs [next_id] \|= cr_i;
362	}
363
364	if (!outs2_broken) {
365	for (const auto &e : succs) {
366	const CharReach &cr_i = e.second;
367	const dstate &raw_next = rdfa.states [e.first];
368
369	CharReach cr_all_j;
370	for (u32 j = `0`; j < rev_map.size(); j++) {
371	if (raw_next.next [j] == raw.next [j]) {
372	continue;
373	}
374
375	DEBUG_PRINTF("state %hu: adding sym %u -> %hu to 2 \n", e.first,
376	j, raw_next.next[j]);
377	cr_all_j \|= rev_map.at(j);
378	}
379
380	if (cr_i.count() * cr_all_j.count() > `8`) {
381	DEBUG_PRINTF("adding %zu to double_cr\n", cr_i.count());
382	rv.double_cr \|= cr_i;
383	} else {
384	for (auto ii = cr_i.find_first(); ii != CharReach::npos;
385	ii = cr_i.find_next(ii)) {
386	for (auto jj = cr_all_j.find_first(); jj != CharReach::npos;
387	jj = cr_all_j.find_next(jj)) {
388	rv.double_byte.emplace((u8)ii, (u8)jj);
389	if (rv.double_byte.size() > `8`) {
390	DEBUG_PRINTF("outs2 too big\n");
391	outs2_broken = true;
392	goto done;
393	}
394	}
395	}
396	}
397	}
398
399	done:
400	assert(outs2_broken \|\| rv.double_byte.size() <= `8`);
401	if (outs2_broken) {
402	rv.double_byte.clear();
403	}
404	}
405
406	DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
407	DEBUG_PRINTF("broken %d\n", outs2_broken);
408	if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) &&
409	this_idx == rdfa.start_floating && this_idx != DEAD_STATE) {
410	DEBUG_PRINTF("looking for offset accel at %u\n", this_idx);
411	auto offset =
412	look_for_offset_accel(rdfa, this_idx, max_allowed_offset_accel());
413	DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), rv.cr.count());
414	if (double_byte_ok(offset) \|\| offset.cr.count() < rv.cr.count()) {
415	DEBUG_PRINTF("using offset accel\n");
416	rv = offset;
417	}
418	}
419
420	return rv;
421	}
422
423	void
424	accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
425	const AccelScheme &info,
426	void *accel_out) {
427	AccelAux accel = (AccelAux )accel_out;
428
429	DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
430	info.double_offset);
431	accel->generic.offset = verify_u8(info.offset);
432
433	if (double_byte_ok(info) && info.double_cr.none() &&
434	info.double_byte.size() == `1`) {
435	accel->accel_type = ACCEL_DVERM;
436	accel->dverm.c1 = info.double_byte.begin()->first;
437	accel->dverm.c2 = info.double_byte.begin()->second;
438	accel->dverm.offset = verify_u8(info.double_offset);
439	DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
440	return;
441	}
442
443	if (double_byte_ok(info) && info.double_cr.none() &&
444	(info.double_byte.size() == `2` \|\| info.double_byte.size() == `4`)) {
445	bool ok = true;
446
447	assert(!info.double_byte.empty());
448	u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
449	u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
450
451	for (const pair<u8, u8> &p : info.double_byte) {
452	if ((p.first & CASE_CLEAR) != firstC \|\|
453	(p.second & CASE_CLEAR) != secondC) {
454	ok = false;
455	break;
456	}
457	}
458
459	if (ok) {
460	accel->accel_type = ACCEL_DVERM_NOCASE;
461	accel->dverm.c1 = firstC;
462	accel->dverm.c2 = secondC;
463	accel->dverm.offset = verify_u8(info.double_offset);
464	DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
465	return;
466	}
467
468	u8 m1;
469	u8 m2;
470	if (buildDvermMask(info.double_byte, &m1, &m2)) {
471	accel->accel_type = ACCEL_DVERM_MASKED;
472	accel->dverm.offset = verify_u8(info.double_offset);
473	accel->dverm.c1 = info.double_byte.begin()->first & m1;
474	accel->dverm.c2 = info.double_byte.begin()->second & m2;
475	accel->dverm.m1 = m1;
476	accel->dverm.m2 = m2;
477	DEBUG_PRINTF(
478	"building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
479	accel->dverm.c1, accel->dverm.c2);
480	return;
481	}
482	}
483
484	if (double_byte_ok(info) &&
485	shuftiBuildDoubleMasks(
486	info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1,
487	(u8 )&accel->dshufti.hi1, (u8 )&accel->dshufti.lo2,
488	(u8 *)&accel->dshufti.hi2)) {
489	accel->accel_type = ACCEL_DSHUFTI;
490	accel->dshufti.offset = verify_u8(info.double_offset);
491	DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
492	return;
493	}
494
495	if (info.cr.none()) {
496	accel->accel_type = ACCEL_RED_TAPE;
497	DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
498	" from which there is no escape\n",
499	this_idx);
500	return;
501	}
502
503	if (info.cr.count() == `1`) {
504	accel->accel_type = ACCEL_VERM;
505	accel->verm.c = info.cr.find_first();
506	DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
507	return;
508	}
509
510	if (info.cr.count() == `2` && info.cr.isCaselessChar()) {
511	accel->accel_type = ACCEL_VERM_NOCASE;
512	accel->verm.c = info.cr.find_first() & CASE_CLEAR;
513	DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
514	return;
515	}
516
517	if (info.cr.count() > max_floating_stop_char()) {
518	accel->accel_type = ACCEL_NONE;
519	DEBUG_PRINTF("state %hu is too broad\n", this_idx);
520	return;
521	}
522
523	accel->accel_type = ACCEL_SHUFTI;
524	if (-`1` != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo,
525	(u8 *)&accel->shufti.hi)) {
526	DEBUG_PRINTF("state %hu is shufti\n", this_idx);
527	return;
528	}
529
530	assert(!info.cr.none());
531	accel->accel_type = ACCEL_TRUFFLE;
532	truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1,
533	(u8 *)&accel->truffle.mask2);
534	DEBUG_PRINTF("state %hu is truffle\n", this_idx);
535	}
536
537	map<dstate_id_t, AccelScheme>
538	accel_dfa_build_strat::getAccelInfo(const Grey &grey) {
539	map<dstate_id_t, AccelScheme> rv;
540	raw_dfa &rdfa = get_raw();
541	if (!grey.accelerateDFA) {
542	return rv;
543	}
544
545	dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
546	DEBUG_PRINTF("sds %hu\n", sds_proxy);
547
548	/ Find accel info for a single state. /
549	auto do_state = [&](size_t i) {
550	if (i == DEAD_STATE) {
551	return;
552	}
553
554	/ Note on report acceleration states: While we can't accelerate while*
555	* we are spamming out callbacks, the QR code paths don't raise reports
556	* during scanning so they can accelerate report states. */
557	if (generates_callbacks(rdfa.kind) && !rdfa.states [i].reports.empty()) {
558	return;
559	}
560
561	size_t single_limit =
562	i == sds_proxy ? max_floating_stop_char() : max_stop_char();
563	DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
564
565	AccelScheme ei = find_escape_strings(i);
566	if (ei.cr.count() > single_limit) {
567	DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
568	ei.cr.count());
569	return;
570	}
571
572	DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count());
573
574	rv [i] = ei;
575	};
576
577	if (only_accel_init) {
578	DEBUG_PRINTF("only computing accel for init states\n");
579	do_state (rdfa.start_anchored);
580	if (rdfa.start_floating != rdfa.start_anchored) {
581	do_state (rdfa.start_floating);
582	}
583	} else {
584	DEBUG_PRINTF("computing accel for all states\n");
585	for (size_t i = `0`; i < rdfa.states.size(); i++) {
586	do_state (i);
587	}
588	}
589
590	/ provide acceleration states to states in the region of sds /
591	if (contains(rv, sds_proxy)) {
592	AccelScheme sds_ei = rv [sds_proxy];
593	sds_ei.double_byte.clear(); / region based on single byte scheme*
594	* may differ from double byte */
595	DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
596	sds_ei.cr.count());
597	auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
598	for (auto s : sds_region) {
599	if (!contains(rv, s) \|\| better(sds_ei, rv [s])) {
600	rv [s] = sds_ei;
601	}
602	}
603	}
604
605	return rv;
606	}
607	};
608

Browse the source code of ClickHouse/contrib/hyperscan/src/nfa/accel_dfa_build_strat.cpp