teddy_compile.cpp source code [ClickHouse/contrib/hyperscan/src/fdr/teddy_compile.cpp]

1	/*
2	* Copyright (c) 2015-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/**
30	* \file
31	* \brief FDR literal matcher: Teddy build code.
32	*/
33
34	#include "teddy_compile.h"
35
36	#include "fdr.h"
37	#include "fdr_internal.h"
38	#include "fdr_compile_internal.h"
39	#include "fdr_confirm.h"
40	#include "fdr_engine_description.h"
41	#include "teddy_internal.h"
42	#include "teddy_engine_description.h"
43	#include "grey.h"
44	#include "ue2common.h"
45	#include "hwlm/hwlm_build.h"
46	#include "util/alloc.h"
47	#include "util/compare.h"
48	#include "util/container.h"
49	#include "util/make_unique.h"
50	#include "util/noncopyable.h"
51	#include "util/popcount.h"
52	#include "util/small_vector.h"
53	#include "util/target_info.h"
54	#include "util/verify_types.h"
55
56	#include <algorithm>
57	#include <cassert>
58	#include <cctype>
59	#include <cstdio>
60	#include <cstdlib>
61	#include <cstring>
62	#include <map>
63	#include <memory>
64	#include <set>
65	#include <string>
66	#include <vector>
67
68	using namespace std;
69
70	namespace ue2 {
71
72	namespace {
73
74	//#define TEDDY_DEBUG
75
76	/* \brief Max number of Teddy masks we use. /
77	static constexpr size_t MAX_NUM_MASKS = `4`;
78
79	class TeddyCompiler : noncopyable {
80	const TeddyEngineDescription &eng;
81	const Grey &grey;
82	const vector<hwlmLiteral> &lits;
83	map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
84	bool make_small;
85
86	public:
87	TeddyCompiler(const vector<hwlmLiteral> &lits_in,
88	map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
89	const TeddyEngineDescription &eng_in, bool make_small_in,
90	const Grey &grey_in)
91	: eng(eng_in), grey(grey_in), lits(lits_in),
92	bucketToLits (move(bucketToLits_in)), make_small(make_small_in) {}
93
94	bytecode_ptr<FDR> build();
95	};
96
97	class TeddySet {
98	/**
99	* \brief Estimate of the max number of literals in a set, used to
100	* minimise allocations.
101	*/
102	static constexpr size_t LITS_PER_SET = `20`;
103
104	/* \brief Number of masks. /
105	u32 len;
106
107	/**
108	* \brief A series of bitfields over 16 predicates that represent the
109	* shufti nibble set.
110	*
111	* So for num_masks = 4 we will represent our strings by 8 u16s in the
112	* vector that indicate what a shufti bucket would have to look like.
113	*/
114	small_vector<u16, MAX_NUM_MASKS * `2`> nibbleSets;
115
116	/**
117	* \brief Sorted, unique set of literals. We maintain our own set in a
118	* sorted vector to minimise allocations.
119	*/
120	small_vector<u32, LITS_PER_SET> litIds;
121
122	public:
123	explicit TeddySet(u32 len_in) : len(len_in), nibbleSets (len_in * `2`, `0`) {}
124	size_t litCount() const { return litIds.size(); }
125	const small_vector<u32, LITS_PER_SET> &getLits() const { return litIds; }
126
127	bool operator<(const TeddySet &s) const {
128	return litIds < s.litIds;
129	}
130
131	#ifdef TEDDY_DEBUG
132	void dump() const {
133	printf("TS: ");
134	for (u32 i = `0`; i < nibbleSets.size(); i++) {
135	printf("%04x ", (u32)nibbleSets[i]);
136	}
137	printf("\nnlits: %zu\nLit ids: ", litCount());
138	printf("Prob: %llu\n", probability());
139	for (const auto &id : litIds) {
140	printf("%u ", id);
141	}
142	printf("\n");
143	printf("Flood prone : %s\n", isRunProne() ? "yes" : "no");
144	}
145	#endif
146
147	bool identicalTail(const TeddySet &ts) const {
148	return nibbleSets == ts.nibbleSets;
149	}
150
151	void addLiteral(u32 lit_id, const hwlmLiteral &lit) {
152	const string &s = lit.s;
153	for (u32 i = `0`; i < len; i++) {
154	if (i < s.size()) {
155	u8 c = s [s.size() - i - `1`];
156	u8 c_hi = (c >> `4`) & `0xf`;
157	u8 c_lo = c & `0xf`;
158	nibbleSets [i * `2`] = `1` << c_lo;
159	if (lit.nocase && ourisalpha(c)) {
160	nibbleSets [i * `2` + `1`] =
161	(`1` << (c_hi & `0xd`)) \| (`1` << (c_hi \| `0x2`));
162	} else {
163	nibbleSets [i * `2` + `1`] = `1` << c_hi;
164	}
165	} else {
166	nibbleSets [i * `2`] = nibbleSets [i * `2` + `1`] = `0xffff`;
167	}
168	}
169	litIds.push_back(lit_id);
170	sort_and_unique(litIds);
171	}
172
173	// return a value p from 0 .. MAXINT64 that gives p/MAXINT64
174	// likelihood of this TeddySet firing a first-stage accept
175	// if it was given a bucket of its own and random data were
176	// to be passed in
177	u64a probability() const {
178	u64a val = `1`;
179	for (size_t i = `0`; i < nibbleSets.size(); i++) {
180	val *= popcount32((u32)nibbleSets [i]);
181	}
182	return val;
183	}
184
185	// return a score based around the chance of this hitting times
186	// a small fixed cost + the cost of traversing some sort of followup
187	// (assumption is that the followup is linear)
188	u64a heuristic() const {
189	return probability() * (`2` + litCount());
190	}
191
192	bool isRunProne() const {
193	u16 lo_and = `0xffff`;
194	u16 hi_and = `0xffff`;
195	for (u32 i = `0`; i < len; i++) {
196	lo_and &= nibbleSets [i * `2`];
197	hi_and &= nibbleSets [i * `2` + `1`];
198	}
199	// we're not flood-prone if there's no way to get
200	// through with a flood
201	if (!lo_and \|\| !hi_and) {
202	return false;
203	}
204	return true;
205	}
206
207	friend TeddySet merge(const TeddySet &a, const TeddySet &b) {
208	assert(a.nibbleSets.size() == b.nibbleSets.size());
209
210	TeddySet m(a);
211
212	for (size_t i = `0`; i < m.nibbleSets.size(); i++) {
213	m.nibbleSets [i] \|= b.nibbleSets [i];
214	}
215
216	m.litIds.insert(m.litIds.end(), b.litIds.begin(), b.litIds.end());
217	sort_and_unique(m.litIds);
218
219	return m;
220	}
221	};
222
223	static
224	bool pack(const vector<hwlmLiteral> &lits,
225	const TeddyEngineDescription &eng,
226	map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits) {
227	set<TeddySet> sts;
228
229	for (u32 i = `0`; i < lits.size(); i++) {
230	TeddySet ts(eng.numMasks);
231	ts.addLiteral(i, lits [i]);
232	sts.insert(ts);
233	}
234
235	while (`1`) {
236	#ifdef TEDDY_DEBUG
237	printf("Size %zu\n", sts.size());
238	for (const TeddySet &ts : sts) {
239	printf("\n");
240	ts.dump();
241	}
242	printf("\n===============================================\n");
243	#endif
244
245	auto m1 = sts.end(), m2 = sts.end();
246	u64a best = `0xffffffffffffffffULL`;
247
248	for (auto i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
249	const TeddySet &s1 = *i1;
250	for (auto i2 = next(i1), e2 = sts.end(); i2 != e2; ++i2) {
251	const TeddySet &s2 = *i2;
252
253	// be more conservative if we don't absolutely need to
254	// keep packing
255	if ((sts.size() <= eng.getNumBuckets()) &&
256	!s1.identicalTail(s2)) {
257	continue;
258	}
259
260	TeddySet tmpSet = merge(s1, s2);
261	u64a newScore = tmpSet.heuristic();
262	u64a oldScore = s1.heuristic() + s2.heuristic();
263	if (newScore < oldScore) {
264	m1 = i1;
265	m2 = i2;
266	break;
267	} else {
268	u64a score = newScore - oldScore;
269	bool oldRunProne = s1.isRunProne() && s2.isRunProne();
270	bool newRunProne = tmpSet.isRunProne();
271	if (newRunProne && !oldRunProne) {
272	continue;
273	}
274	if (score < best) {
275	best = score;
276	m1 = i1;
277	m2 = i2;
278	}
279	}
280	}
281	}
282	// if we didn't find a merge candidate, bail out
283	if ((m1 == sts.end()) \|\| (m2 == sts.end())) {
284	break;
285	}
286
287	// do the merge
288	TeddySet nts = merge(m1, m2);
289	#ifdef TEDDY_DEBUG
290	printf("Merging\n");
291	printf("m1 = \n");
292	m1->dump();
293	printf("m2 = \n");
294	m2->dump();
295	printf("nts = \n");
296	nts.dump();
297	printf("\n===============================================\n");
298	#endif
299	sts.erase(m1);
300	sts.erase(m2);
301	sts.insert(nts);
302	}
303
304	if (sts.size() > eng.getNumBuckets()) {
305	return false;
306	}
307
308	u32 bucket_id = `0`;
309	for (const TeddySet &ts : sts) {
310	const auto &ts_lits = ts.getLits();
311	auto &bucket_lits = bucketToLits [bucket_id];
312	bucket_lits.insert(end(bucket_lits), begin(ts_lits), end(ts_lits));
313	bucket_id++;
314	}
315	return true;
316	}
317
318	// this entry has all-zero mask to skip reinforcement
319	#define NO_REINFORCEMENT N_CHARS
320
321	// this means every entry in reinforcement table
322	#define ALL_CHAR_SET N_CHARS
323
324	// each item's reinforcement mask has REINFORCED_MSK_LEN bytes
325	#define REINFORCED_MSK_LEN 8
326
327	// reinforcement table size for each 8 buckets set
328	#define RTABLE_SIZE ((N_CHARS + 1) * REINFORCED_MSK_LEN)
329
330	static
331	void initReinforcedTable(u8 *rmsk) {
332	u64a mask = (u64a )rmsk;
333	fill_n(mask, N_CHARS, `0x00ffffffffffffffULL`);
334	}
335
336	static
337	void fillReinforcedMskZero(u8 *rmsk) {
338	u8 mc = rmsk + NO_REINFORCEMENT REINFORCED_MSK_LEN;
339	fill_n(mc, REINFORCED_MSK_LEN, `0x00`);
340	}
341
342	static
343	void fillReinforcedMsk(u8 *rmsk, u16 c, u32 j, u8 bmsk) {
344	assert(j > `0`);
345	if (c == ALL_CHAR_SET) {
346	for (size_t i = `0`; i < N_CHARS; i++) {
347	u8 mc = rmsk + i REINFORCED_MSK_LEN;
348	mc[j - `1`] &= ~bmsk;
349	}
350	} else {
351	u8 mc = rmsk + c REINFORCED_MSK_LEN;
352	mc[j - `1`] &= ~bmsk;
353	}
354	}
355
356	static
357	void fillNibbleMasks(const map<BucketIndex,
358	vector<LiteralIndex>> &bucketToLits,
359	const vector<hwlmLiteral> &lits,
360	u32 numMasks, u32 maskWidth, size_t maskLen,
361	u8 *baseMsk) {
362	memset(baseMsk, `0xff`, maskLen);
363
364	for (const auto &b2l : bucketToLits) {
365	const u32 &bucket_id = b2l.first;
366	const vector<LiteralIndex> &ids = b2l.second;
367	const u8 bmsk = `1U` << (bucket_id % `8`);
368
369	for (const LiteralIndex &lit_id : ids) {
370	const hwlmLiteral &l = lits [lit_id];
371	DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
372	const u32 sz = verify_u32(l.s.size());
373
374	// fill in masks
375	for (u32 j = `0`; j < numMasks; j++) {
376	const u32 msk_id_lo = j * `2` * maskWidth + (bucket_id / `8`);
377	const u32 msk_id_hi = (j * `2` + `1`) * maskWidth + (bucket_id / `8`);
378	const u32 lo_base = msk_id_lo * `16`;
379	const u32 hi_base = msk_id_hi * `16`;
380
381	// if we don't have a char at this position, fill in i
382	// locations in these masks with '1'
383	if (j >= sz) {
384	for (u32 n = `0`; n < `16`; n++) {
385	baseMsk[lo_base + n] &= ~bmsk;
386	baseMsk[hi_base + n] &= ~bmsk;
387	}
388	} else {
389	u8 c = l.s [sz - `1` - j];
390	// if we do have a char at this position
391	const u32 hiShift = `4`;
392	u32 n_hi = (c >> hiShift) & `0xf`;
393	u32 n_lo = c & `0xf`;
394
395	if (j < l.msk.size() && l.msk [l.msk.size() - `1` - j]) {
396	u8 m = l.msk [l.msk.size() - `1` - j];
397	u8 m_hi = (m >> hiShift) & `0xf`;
398	u8 m_lo = m & `0xf`;
399	u8 cmp = l.cmp [l.msk.size() - `1` - j];
400	u8 cmp_lo = cmp & `0xf`;
401	u8 cmp_hi = (cmp >> hiShift) & `0xf`;
402
403	for (u8 cm = `0`; cm < `0x10`; cm++) {
404	if ((cm & m_lo) == (cmp_lo & m_lo)) {
405	baseMsk[lo_base + cm] &= ~bmsk;
406	}
407	if ((cm & m_hi) == (cmp_hi & m_hi)) {
408	baseMsk[hi_base + cm] &= ~bmsk;
409	}
410	}
411	} else {
412	if (l.nocase && ourisalpha(c)) {
413	u32 cmHalfClear = (`0xdf` >> hiShift) & `0xf`;
414	u32 cmHalfSet = (`0x20` >> hiShift) & `0xf`;
415	baseMsk[hi_base + (n_hi & cmHalfClear)] &= ~bmsk;
416	baseMsk[hi_base + (n_hi \| cmHalfSet)] &= ~bmsk;
417	} else {
418	baseMsk[hi_base + n_hi] &= ~bmsk;
419	}
420	baseMsk[lo_base + n_lo] &= ~bmsk;
421	}
422	}
423	}
424	}
425	}
426	}
427
428	static
429	void fillReinforcedTable(const map<BucketIndex,
430	vector<LiteralIndex>> &bucketToLits,
431	const vector<hwlmLiteral> &lits,
432	u8 rtable_base, const* u32 num_tables) {
433	vector<u8 *> tables;
434	for (u32 i = `0`; i < num_tables; i++) {
435	tables.push_back(rtable_base + i * RTABLE_SIZE);
436	}
437
438	for (auto t : tables) {
439	initReinforcedTable(t);
440	}
441
442	for (const auto &b2l : bucketToLits) {
443	const u32 &bucket_id = b2l.first;
444	const vector<LiteralIndex> &ids = b2l.second;
445	u8 *rmsk = tables [bucket_id / `8`];
446	const u8 bmsk = `1U` << (bucket_id % `8`);
447
448	for (const LiteralIndex &lit_id : ids) {
449	const hwlmLiteral &l = lits [lit_id];
450	DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
451	const u32 sz = verify_u32(l.s.size());
452
453	// fill in reinforced masks
454	for (u32 j = `1`; j < REINFORCED_MSK_LEN; j++) {
455	if (sz - `1` < j) {
456	fillReinforcedMsk(rmsk, ALL_CHAR_SET, j, bmsk);
457	} else {
458	u8 c = l.s [sz - `1` - j];
459	if (l.nocase && ourisalpha(c)) {
460	u8 c_up = c & `0xdf`;
461	fillReinforcedMsk(rmsk, c_up, j, bmsk);
462	u8 c_lo = c \| `0x20`;
463	fillReinforcedMsk(rmsk, c_lo, j, bmsk);
464	} else {
465	fillReinforcedMsk(rmsk, c, j, bmsk);
466	}
467	}
468	}
469	}
470	}
471
472	for (auto t : tables) {
473	fillReinforcedMskZero(t);
474	}
475	}
476
477	bytecode_ptr<FDR> TeddyCompiler::build() {
478	u32 maskWidth = eng.getNumBuckets() / `8`;
479
480	size_t headerSize = sizeof(Teddy);
481	size_t maskLen = eng.numMasks * `16` * `2` * maskWidth;
482	size_t reinforcedMaskLen = RTABLE_SIZE * maskWidth;
483
484	auto floodTable = setupFDRFloodControl(lits, eng, grey);
485	auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
486
487	// Note: we place each major structure here on a cacheline boundary.
488	size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
489	ROUNDUP_CL(reinforcedMaskLen) +
490	ROUNDUP_CL(confirmTable.size()) + floodTable.size();
491
492	auto fdr = make_zeroed_bytecode_ptr<FDR>(size, `64`);
493	assert(fdr); // otherwise would have thrown std::bad_alloc
494	Teddy teddy = (Teddy )fdr.get(); // ugly
495	u8 teddy_base = (u8 )teddy;
496
497	// Write header.
498	teddy->size = size;
499	teddy->engineID = eng.getID();
500	teddy->maxStringLen = verify_u32(maxLen(lits));
501	teddy->numStrings = verify_u32(lits.size());
502
503	// Write confirm structures.
504	u8 *ptr = teddy_base + ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
505	ROUNDUP_CL(reinforcedMaskLen);
506	assert(ISALIGNED_CL(ptr));
507	teddy->confOffset = verify_u32(ptr - teddy_base);
508	memcpy(ptr, confirmTable.get(), confirmTable.size());
509	ptr += ROUNDUP_CL(confirmTable.size());
510
511	// Write flood control structures.
512	assert(ISALIGNED_CL(ptr));
513	teddy->floodOffset = verify_u32(ptr - teddy_base);
514	memcpy(ptr, floodTable.get(), floodTable.size());
515	ptr += floodTable.size();
516
517	// Write teddy masks.
518	u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize);
519	fillNibbleMasks(bucketToLits, lits, eng.numMasks, maskWidth, maskLen,
520	baseMsk);
521
522	// Write reinforcement masks.
523	u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen);
524	fillReinforcedTable(bucketToLits, lits, reinforcedMsk, maskWidth);
525
526	return fdr;
527	}
528
529
530	static
531	bool assignStringsToBuckets(
532	const vector<hwlmLiteral> &lits,
533	TeddyEngineDescription &eng,
534	map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
535	assert(eng.numMasks <= MAX_NUM_MASKS);
536	if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
537	DEBUG_PRINTF("too many literals: %zu\n", lits.size());
538	return false;
539	}
540
541	#ifdef TEDDY_DEBUG
542	for (size_t i = `0`; i < lits.size(); i++) {
543	printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
544	lits[i].nocase ? "caseless" : "caseful");
545	for (size_t j = `0`; j < lits[i].s.size(); j++) {
546	printf("%02x", ((u32)lits[i].s[j])&`0xff`);
547	}
548	printf("\n");
549	}
550	#endif
551
552	if (!pack(lits, eng, bucketToLits)) {
553	DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
554	lits.size(), eng.getNumBuckets());
555	return false;
556	}
557	return true;
558	}
559
560	} // namespace
561
562	bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey) {
563	TeddyCompiler tc(proto.lits, proto.bucketToLits, *(proto.teddyEng),
564	proto.make_small, grey);
565	return tc.build();
566	}
567
568
569	unique_ptr<HWLMProto> teddyBuildProtoHinted(
570	u8 engType, const vector<hwlmLiteral> &lits,
571	bool make_small, u32 hint, const target_t &target) {
572	unique_ptr<TeddyEngineDescription> des;
573	if (hint == HINT_INVALID) {
574	des = chooseTeddyEngine(target, lits);
575	} else {
576	des = getTeddyDescription(hint);
577	}
578	if (!des) {
579	return nullptr;
580	}
581
582	map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
583	if (!assignStringsToBuckets(lits, *des, bucketToLits)) {
584	return nullptr;
585	}
586
587	return ue2::make_unique<HWLMProto>(engType, move(des), lits,
588	bucketToLits, make_small);
589	}
590
591	} // namespace ue2
592

Browse the source code of ClickHouse/contrib/hyperscan/src/fdr/teddy_compile.cpp