fdr_compile.cpp source code [ClickHouse/contrib/hyperscan/src/fdr/fdr_compile.cpp]

1	/*
2	* Copyright (c) 2015-2019, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief FDR literal matcher: build API.
31	*/
32
33	#include "fdr_compile.h"
34
35	#include "fdr_internal.h"
36	#include "fdr_confirm.h"
37	#include "fdr_compile_internal.h"
38	#include "fdr_engine_description.h"
39	#include "teddy_compile.h"
40	#include "teddy_engine_description.h"
41	#include "grey.h"
42	#include "ue2common.h"
43	#include "hwlm/hwlm_build.h"
44	#include "util/compare.h"
45	#include "util/container.h"
46	#include "util/dump_mask.h"
47	#include "util/make_unique.h"
48	#include "util/math.h"
49	#include "util/noncopyable.h"
50	#include "util/target_info.h"
51	#include "util/ue2string.h"
52	#include "util/verify_types.h"
53
54	#include <algorithm>
55	#include <array>
56	#include <cassert>
57	#include <cctype>
58	#include <cstdio>
59	#include <cstdlib>
60	#include <cstring>
61	#include <limits>
62	#include <map>
63	#include <memory>
64	#include <numeric>
65	#include <set>
66	#include <string>
67	#include <unordered_map>
68	#include <unordered_set>
69	#include <vector>
70
71	#include <boost/multi_array.hpp>
72
73	using namespace std;
74
75	namespace ue2 {
76
77	namespace {
78
79	class FDRCompiler : noncopyable {
80	private:
81	const FDREngineDescription &eng;
82	const Grey &grey;
83	vector<u8> tab;
84	vector<hwlmLiteral> lits;
85	map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
86	bool make_small;
87
88	u8 *tabIndexToMask(u32 indexInTable);
89	#ifdef DEBUG
90	void dumpMasks(const u8 *defaultMask);
91	#endif
92	void setupTab();
93	bytecode_ptr<FDR> setupFDR();
94	void createInitialState(FDR *fdr);
95
96	public:
97	FDRCompiler(vector<hwlmLiteral> lits_in,
98	map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
99	const FDREngineDescription &eng_in,
100	bool make_small_in, const Grey &grey_in)
101	: eng(eng_in), grey(grey_in), tab (eng_in.getTabSizeBytes()),
102	lits (move(lits_in)), bucketToLits (move(bucketToLits_in)),
103	make_small(make_small_in) {}
104
105	bytecode_ptr<FDR> build();
106	};
107
108	u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
109	assert(indexInTable < tab.size());
110	return &tab [`0`] + (indexInTable * (eng.getSchemeWidth() / `8`));
111	}
112
113	static
114	void setbit(u8 *msk, u32 bit) {
115	msk[bit / `8`] \|= `1U` << (bit % `8`);
116	}
117
118	static
119	void clearbit(u8 *msk, u32 bit) {
120	msk[bit / `8`] &= ~(`1U` << (bit % `8`));
121	}
122
123	static
124	void andMask(u8 dest, const* u8 a, const* u8 *b, u32 num_bytes) {
125	for (u32 i = `0`; i < num_bytes; i++) {
126	dest[i] = a[i] & b[i];
127	}
128	}
129
130	void FDRCompiler::createInitialState(FDR *fdr) {
131	u8 start = (u8 )&fdr->start;
132
133	/ initial state should to be 1 in each slot in the bucket up to bucket*
134	* minlen - 1, and 0 thereafter */
135	for (BucketIndex b = `0`; b < eng.getNumBuckets(); b++) {
136	// Find the minimum length for the literals in this bucket.
137	const vector<LiteralIndex> &bucket_lits = bucketToLits [b];
138	u32 min_len = ~`0U`;
139	for (const LiteralIndex &lit_idx : bucket_lits) {
140	min_len = min(min_len, verify_u32(lits [lit_idx].s.length()));
141	}
142
143	DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
144	assert(min_len);
145
146	for (PositionInBucket i = `0`; i < eng.getBucketWidth(b); i++) {
147	if (i < min_len - `1`) {
148	setbit(start, eng.getSchemeBit(b, i));
149	}
150	}
151	}
152	}
153
154	/**
155	* \brief Lay out FDR structures in bytecode.
156	*
157	* Note that each major structure (header, table, confirm, flood control) is
158	* cacheline-aligned.
159	*/
160	bytecode_ptr<FDR> FDRCompiler::setupFDR() {
161	auto floodTable = setupFDRFloodControl(lits, eng, grey);
162	auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
163
164	size_t headerSize = sizeof(FDR);
165	size_t tabSize = eng.getTabSizeBytes();
166
167	// Note: we place each major structure here on a cacheline boundary.
168	size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(tabSize) +
169	ROUNDUP_CL(confirmTable.size()) + floodTable.size();
170
171	DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
172	"total=%zu\n",
173	headerSize, tabSize, confirmTable.size(), floodTable.size(),
174	size);
175
176	auto fdr = make_zeroed_bytecode_ptr<FDR>(size, `64`);
177	assert(fdr); // otherwise would have thrown std::bad_alloc
178
179	u8 fdr_base = (u8 )fdr.get();
180
181	// Write header.
182	fdr ->size = size;
183	fdr ->engineID = eng.getID();
184	fdr ->maxStringLen = verify_u32(maxLen(lits));
185	fdr ->numStrings = verify_u32(lits.size());
186	assert(eng.bits > `8` && eng.bits < `16`); // we allow domains 9 to 15 only
187	fdr ->domain = eng.bits;
188	fdr ->domainMask = (`1` << eng.bits) - `1`;
189	fdr ->tabSize = tabSize;
190	fdr ->stride = eng.stride;
191	createInitialState(fdr.get());
192
193	// Write table.
194	u8 ptr = fdr_base + ROUNDUP_CL(sizeof*(FDR));
195	assert(ISALIGNED_CL(ptr));
196	copy(tab.begin(), tab.end(), ptr);
197	ptr += ROUNDUP_CL(tabSize);
198
199	// Write confirm structures.
200	assert(ISALIGNED_CL(ptr));
201	fdr ->confOffset = verify_u32(ptr - fdr_base);
202	memcpy(ptr, confirmTable.get(), confirmTable.size());
203	ptr += ROUNDUP_CL(confirmTable.size());
204
205	// Write flood control structures.
206	assert(ISALIGNED_CL(ptr));
207	fdr ->floodOffset = verify_u32(ptr - fdr_base);
208	memcpy(ptr, floodTable.get(), floodTable.size());
209	ptr += floodTable.size(); // last write, no need to round up
210
211	return fdr;
212	}
213
214	//#define DEBUG_ASSIGNMENT
215
216	/**
217	* Utility class for computing:
218	*
219	* score(count, len) = pow(count, 1.05) * pow(len, -3)
220	*
221	* Calling pow() is expensive. This is mitigated by using pre-computed LUTs for
222	* small inputs and a cache for larger ones.
223	*/
224	class Scorer {
225	unordered_map<u32, double> count_factor_cache;
226
227	// LUT: pow(count, 1.05) for small values of count.
228	static const array<double, `100`> count_lut;
229
230	double count_factor(u32 count) {
231	if (count < count_lut.size()) {
232	return count_lut [count];
233	}
234
235	auto it = count_factor_cache.find(count);
236	if (it != count_factor_cache.end()) {
237	return it ->second;
238	}
239	double r = our_pow(count, `1.05`);
240	count_factor_cache.emplace(count, r);
241	return r;
242	}
243
244	// LUT: pow(len, -3) for len in range [0,8].
245	static const array<double, `9`> len_lut;
246
247	double len_factor(u32 len) {
248	assert(len <= len_lut.size());
249	return len_lut [len];
250	}
251
252	public:
253	double operator()(u32 len, u32 count) {
254	if (len == `0`) {
255	return numeric_limits<double>::max();
256	}
257	return count_factor(count) * len_factor(len);
258	}
259	};
260
261	const array<double, `100`> Scorer::count_lut{{
262	pow(`0`, `1.05`), pow(`1`, `1.05`), pow(`2`, `1.05`), pow(`3`, `1.05`), pow(`4`, `1.05`),
263	pow(`5`, `1.05`), pow(`6`, `1.05`), pow(`7`, `1.05`), pow(`8`, `1.05`), pow(`9`, `1.05`),
264	pow(`10`, `1.05`), pow(`11`, `1.05`), pow(`12`, `1.05`), pow(`13`, `1.05`), pow(`14`, `1.05`),
265	pow(`15`, `1.05`), pow(`16`, `1.05`), pow(`17`, `1.05`), pow(`18`, `1.05`), pow(`19`, `1.05`),
266	pow(`20`, `1.05`), pow(`21`, `1.05`), pow(`22`, `1.05`), pow(`23`, `1.05`), pow(`24`, `1.05`),
267	pow(`25`, `1.05`), pow(`26`, `1.05`), pow(`27`, `1.05`), pow(`28`, `1.05`), pow(`29`, `1.05`),
268	pow(`30`, `1.05`), pow(`31`, `1.05`), pow(`32`, `1.05`), pow(`33`, `1.05`), pow(`34`, `1.05`),
269	pow(`35`, `1.05`), pow(`36`, `1.05`), pow(`37`, `1.05`), pow(`38`, `1.05`), pow(`39`, `1.05`),
270	pow(`40`, `1.05`), pow(`41`, `1.05`), pow(`42`, `1.05`), pow(`43`, `1.05`), pow(`44`, `1.05`),
271	pow(`45`, `1.05`), pow(`46`, `1.05`), pow(`47`, `1.05`), pow(`48`, `1.05`), pow(`49`, `1.05`),
272	pow(`50`, `1.05`), pow(`51`, `1.05`), pow(`52`, `1.05`), pow(`53`, `1.05`), pow(`54`, `1.05`),
273	pow(`55`, `1.05`), pow(`56`, `1.05`), pow(`57`, `1.05`), pow(`58`, `1.05`), pow(`59`, `1.05`),
274	pow(`60`, `1.05`), pow(`61`, `1.05`), pow(`62`, `1.05`), pow(`63`, `1.05`), pow(`64`, `1.05`),
275	pow(`65`, `1.05`), pow(`66`, `1.05`), pow(`67`, `1.05`), pow(`68`, `1.05`), pow(`69`, `1.05`),
276	pow(`70`, `1.05`), pow(`71`, `1.05`), pow(`72`, `1.05`), pow(`73`, `1.05`), pow(`74`, `1.05`),
277	pow(`75`, `1.05`), pow(`76`, `1.05`), pow(`77`, `1.05`), pow(`78`, `1.05`), pow(`79`, `1.05`),
278	pow(`80`, `1.05`), pow(`81`, `1.05`), pow(`82`, `1.05`), pow(`83`, `1.05`), pow(`84`, `1.05`),
279	pow(`85`, `1.05`), pow(`86`, `1.05`), pow(`87`, `1.05`), pow(`88`, `1.05`), pow(`89`, `1.05`),
280	pow(`90`, `1.05`), pow(`91`, `1.05`), pow(`92`, `1.05`), pow(`93`, `1.05`), pow(`94`, `1.05`),
281	pow(`95`, `1.05`), pow(`96`, `1.05`), pow(`97`, `1.05`), pow(`98`, `1.05`), pow(`99`, `1.05`),
282	}};
283
284	const array<double, `9`> Scorer::len_lut{{
285	pow(`0`, -`3.0`), pow(`1`, -`3.0`), pow(`2`, -`3.0`), pow(`3`, -`3.0`), pow(`4`, -`3.0`),
286	pow(`5`, -`3.0`), pow(`6`, -`3.0`), pow(`7`, -`3.0`), pow(`8`, -`3.0`)}};
287
288	/**
289	* Returns true if the two given literals should be placed in the same chunk as
290	* they are identical except for a difference in caselessness.
291	*/
292	static
293	bool isEquivLit(const hwlmLiteral &a, const hwlmLiteral &b,
294	const hwlmLiteral *last_nocase_lit) {
295	const size_t a_len = a.s.size();
296	const size_t b_len = b.s.size();
297
298	if (a_len != b_len) {
299	return false;
300	}
301
302	bool nocase = last_nocase_lit && a_len == last_nocase_lit->s.size() &&
303	!cmp(a.s.c_str(), last_nocase_lit->s.c_str(), a_len, true);
304	return !cmp(a.s.c_str(), b.s.c_str(), a.s.size(), nocase);
305	}
306
307	struct Chunk {
308	Chunk(u32 first_id_in, u32 count_in, u32 length_in)
309	: first_id(first_id_in), count(count_in), length(length_in) {}
310	u32 first_id; //!< first id in this chunk
311	u32 count; //!< how many are in this chunk
312	u32 length; //!< how long things in the chunk are
313	};
314
315	static
316	vector<Chunk> assignChunks(const vector<hwlmLiteral> &lits,
317	const map<u32, u32> &lenCounts) {
318	const u32 CHUNK_MAX = `512`;
319	const u32 MAX_CONSIDERED_LENGTH = `16`;
320
321	// TODO: detailed early stage literal analysis for v. small cases (actually
322	// look at lits) yes - after we factor this out and merge in the Teddy
323	// style of building we can look at this, although the teddy merge
324	// modelling is quite different. It's still probably adaptable to some
325	// extent for this class of problem.
326
327	vector<Chunk> chunks;
328	chunks.reserve(CHUNK_MAX);
329
330	const u32 maxPerChunk = lits.size() /
331	(CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + `1`;
332
333	u32 currentSize = `0`;
334	u32 chunkStartID = `0`;
335	const hwlmLiteral last_nocase_lit = nullptr*;
336
337	for (u32 i = `0`; i < lits.size() && chunks.size() < CHUNK_MAX - `1`; i++) {
338	const auto &lit = lits [i];
339
340	DEBUG_PRINTF("i=%u, lit=%s%s\n", i, escapeString(lit.s).c_str(),
341	lit.nocase ? " (nocase)" : "");
342
343	// If this literal is identical to the last one (aside from differences
344	// in caselessness), keep going even if we will "overfill" a chunk; we
345	// don't want to split identical literals into different buckets.
346	if (i != `0` && isEquivLit(lit, lits [i - `1`], last_nocase_lit)) {
347	DEBUG_PRINTF("identical lit\n");
348	goto next_literal;
349	}
350
351	if ((currentSize < MAX_CONSIDERED_LENGTH &&
352	(lit.s.size() != currentSize)) \|\|
353	(currentSize != `1` && ((i - chunkStartID) >= maxPerChunk))) {
354	currentSize = lit.s.size();
355	if (!chunks.empty()) {
356	chunks.back().count = i - chunkStartID;
357	}
358	chunkStartID = i;
359	chunks.emplace_back(i, `0`, currentSize);
360	}
361	next_literal:
362	if (lit.nocase) {
363	last_nocase_lit = &lit;
364	}
365	}
366
367	assert(!chunks.empty());
368	chunks.back().count = lits.size() - chunkStartID;
369	// close off chunks with an empty row
370	chunks.emplace_back(lits.size(), `0`, `0`);
371
372	#ifdef DEBUG_ASSIGNMENT
373	for (size_t j = `0`; j < chunks.size(); j++) {
374	const auto &chunk = chunks[j];
375	printf("chunk %zu first_id=%u count=%u length=%u\n", j, chunk.first_id,
376	chunk.count, chunk.length);
377	}
378	#endif
379
380	DEBUG_PRINTF("built %zu chunks (%zu lits)\n", chunks.size(), lits.size());
381	assert(chunks.size() <= CHUNK_MAX);
382	return chunks;
383	}
384
385	static
386	map<BucketIndex, vector<LiteralIndex>> assignStringsToBuckets(
387	vector<hwlmLiteral> &lits,
388	const FDREngineDescription &eng) {
389	const double MAX_SCORE = numeric_limits<double>::max();
390
391	assert(!lits.empty()); // Shouldn't be called with no literals.
392
393	// Count the number of literals for each length.
394	map<u32, u32> lenCounts;
395	for (const auto &lit : lits) {
396	lenCounts [lit.s.size()]++;
397	}
398
399	#ifdef DEBUG_ASSIGNMENT
400	for (const auto &m : lenCounts) {
401	printf("l<%u>:%u ", m.first, m.second);
402	}
403	printf("\n");
404	#endif
405
406	// Sort literals by literal length. If tied on length, use lexicographic
407	// ordering (of the reversed literals).
408	stable_sort(lits.begin(), lits.end(),
409	[](const hwlmLiteral &a, const hwlmLiteral &b) {
410	if (a.s.size() != b.s.size()) {
411	return a.s.size() < b.s.size();
412	}
413	auto p = mismatch(a.s.rbegin(), a.s.rend(), b.s.rbegin());
414	if (p.first != a.s.rend()) {
415	return p.first < p.second;
416	}
417	// Sort caseless variants first.
418	return a.nocase > b.nocase;
419	});
420
421	vector<Chunk> chunks = assignChunks(lits, lenCounts);
422
423	const u32 numChunks = chunks.size();
424	const u32 numBuckets = eng.getNumBuckets();
425
426	// 2D array of (score, chunk index) pairs, indexed by
427	// [chunk_index][bucket_index].
428	boost::multi_array<pair<double, u32>, `2`> t(
429	boost::extents [numChunks][numBuckets]);
430
431	Scorer scorer;
432
433	for (u32 j = `0`; j < numChunks; j++) {
434	u32 cnt = `0`;
435	for (u32 k = j; k < numChunks; ++k) {
436	cnt += chunks [k].count;
437	}
438	t [j][`0`] = {scorer (chunks [j].length, cnt), `0`};
439	}
440
441	for (u32 i = `1`; i < numBuckets; i++) {
442	for (u32 j = `0`; j < numChunks - `1`; j++) { // don't do last, empty row
443	pair<double, u32> best = {MAX_SCORE, `0`};
444	u32 cnt = chunks [j].count;
445	for (u32 k = j + `1`; k < numChunks - `1`; k++) {
446	auto score = scorer (chunks [j].length, cnt);
447	if (score > best.first) {
448	break; // now worse locally than our best score, give up
449	}
450	score += t [k][i-`1`].first;
451	if (score < best.first) {
452	best = {score, k};
453	}
454	cnt += chunks [k].count;
455	}
456	t [j][i] = best;
457	}
458	t [numChunks - `1`][i] = {`0`,`0`}; // fill in empty final row for next iter
459	}
460
461	#ifdef DEBUG_ASSIGNMENT
462	for (u32 j = `0`; j < numChunks; j++) {
463	printf("%03u: ", j);
464	for (u32 i = `0`; i < numBuckets; i++) {
465	const auto &v = t[j][i];
466	printf("<%0.3f,%3d> ", v.first, v.second);
467	}
468	printf("\n");
469	}
470	#endif
471
472	// our best score is in t[0][N_BUCKETS-1] and we can follow the links
473	// to find where our buckets should start and what goes into them
474	vector<vector<LiteralIndex>> buckets;
475	for (u32 i = `0`, n = numBuckets; n && (i != numChunks - `1`); n--) {
476	u32 j = t [i][n - `1`].second;
477	if (j == `0`) {
478	j = numChunks - `1`;
479	}
480
481	// put chunks between i - j into bucket (numBuckets - n).
482	u32 first_id = chunks [i].first_id;
483	u32 last_id = chunks [j].first_id;
484	assert(first_id < last_id);
485	UNUSED const auto &first_lit = lits [first_id];
486	UNUSED const auto &last_lit = lits [last_id - `1`];
487	DEBUG_PRINTF("placing [%u-%u) in one bucket (%u lits, len %zu-%zu, "
488	"score %0.4f)\n",
489	first_id, last_id, last_id - first_id,
490	first_lit.s.length(), last_lit.s.length(),
491	scorer(first_lit.s.length(), last_id - first_id));
492
493	vector<LiteralIndex> litIds;
494	u32 cnt = last_id - first_id;
495	// long literals first for included literals checking
496	for (u32 k = `0`; k < cnt; k++) {
497	litIds.push_back(last_id - k - `1`);
498	}
499
500	i = j;
501	buckets.push_back(litIds);
502	}
503
504	// reverse bucket id, longer literals come first
505	map<BucketIndex, vector<LiteralIndex>> bucketToLits;
506	size_t bucketCnt = buckets.size();
507	for (size_t i = `0`; i < bucketCnt; i++) {
508	bucketToLits.emplace(bucketCnt - i - `1`, move(buckets [i]));
509	}
510
511	return bucketToLits;
512	}
513
514	#ifdef DEBUG
515	void FDRCompiler::dumpMasks(const u8 *defaultMask) {
516	const size_t width = eng.getSchemeWidth();
517	printf("default mask: %s\n", dumpMask(defaultMask, width).c_str());
518	for (u32 i = `0`; i < eng.getNumTableEntries(); i++) {
519	u8 *m = tabIndexToMask(i);
520	if (memcmp(m, defaultMask, width / `8`)) {
521	printf("tab %04x: %s\n", i, dumpMask(m, width).c_str());
522	}
523	}
524	}
525	#endif
526
527	static
528	bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
529	const vector<LiteralIndex> &vl,
530	const vector<hwlmLiteral> &lits,
531	SuffixPositionInString pos,
532	map<u32, unordered_set<u32>> &m2) {
533	assert(eng.bits < `32`);
534
535	u32 distance = `0`;
536	if (eng.bits <= `8`) {
537	distance = `1`;
538	} else if (eng.bits <= `16`) {
539	distance = `2`;
540	} else {
541	distance = `4`;
542	}
543
544	for (auto i = vl.begin(), e = vl.end(); i != e; ++i) {
545	if (e - i > `5`) {
546	__builtin_prefetch(&lits [*(i + `5`)]);
547	}
548	const hwlmLiteral &lit = lits [*i];
549	const size_t sz = lit.s.size();
550	u32 mask = `0`;
551	u32 dontCares = `0`;
552	for (u32 cnt = `0`; cnt < distance; cnt++) {
553	int newPos = pos - cnt;
554	u8 dontCareByte = `0x0`;
555	u8 maskByte = `0x0`;
556	if (newPos < `0` \|\| ((u32)newPos >= sz)) {
557	dontCareByte = `0xff`;
558	} else {
559	u8 c = lit.s [sz - newPos - `1`];
560	maskByte = c;
561	u32 remainder = eng.bits - cnt * `8`;
562	assert(remainder != `0`);
563	if (remainder < `8`) {
564	u8 cmask = (`1U` << remainder) - `1`;
565	maskByte &= cmask;
566	dontCareByte \|= ~cmask;
567	}
568	if (lit.nocase && ourisalpha(c)) {
569	maskByte &= `0xdf`;
570	dontCareByte \|= `0x20`;
571	}
572	}
573	u32 loc = cnt * `8`;
574	mask \|= maskByte << loc;
575	dontCares \|= dontCareByte << loc;
576	}
577
578	// truncate m and dc down to nBits
579	mask &= (`1U` << eng.bits) - `1`;
580	dontCares &= (`1U` << eng.bits) - `1`;
581	if (dontCares == ((`1U` << eng.bits) - `1`)) {
582	return true;
583	}
584	m2 [dontCares].insert(mask);
585	}
586	return false;
587	}
588
589	void FDRCompiler::setupTab() {
590	const size_t mask_size = eng.getSchemeWidth() / `8`;
591	assert(mask_size);
592
593	vector<u8> defaultMask(mask_size, `0xff`);
594	for (u32 i = `0`; i < eng.getNumTableEntries(); i++) {
595	memcpy(tabIndexToMask(i), &defaultMask [`0`], mask_size);
596	}
597
598	for (BucketIndex b = `0`; b < eng.getNumBuckets(); b++) {
599	const vector<LiteralIndex> &vl = bucketToLits [b];
600	SuffixPositionInString pLimit = eng.getBucketWidth(b);
601	for (SuffixPositionInString pos = `0`; pos < pLimit; pos++) {
602	u32 bit = eng.getSchemeBit(b, pos);
603	map<u32, unordered_set<u32>> m2;
604	bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
605	if (done) {
606	clearbit(&defaultMask [`0`], bit);
607	continue;
608	}
609	for (const auto &elem : m2) {
610	u32 dc = elem.first;
611	const unordered_set<u32> &mskSet = elem.second;
612	u32 v = ~dc;
613	do {
614	u32 b2 = v & dc;
615	for (const u32 &mskVal : mskSet) {
616	u32 val = (mskVal & ~dc) \| b2;
617	clearbit(tabIndexToMask(val), bit);
618	}
619	v = (v + (dc & -dc)) \| ~dc;
620	} while (v != ~dc);
621	}
622	}
623	}
624
625	for (u32 i = `0`; i < eng.getNumTableEntries(); i++) {
626	u8 *m = tabIndexToMask(i);
627	andMask(m, m, &defaultMask [`0`], mask_size);
628	}
629	#ifdef DEBUG
630	dumpMasks(&defaultMask[`0`]);
631	#endif
632	}
633
634	bytecode_ptr<FDR> FDRCompiler::build() {
635	setupTab();
636	return setupFDR();
637	}
638
639	static
640	bool isSuffix(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
641	const auto &s1 = lit1.s;
642	const auto &s2 = lit2.s;
643	size_t len1 = s1.length();
644	size_t len2 = s2.length();
645	assert(len1 >= len2);
646
647	if (lit1.nocase \|\| lit2.nocase) {
648	return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2,
649	[](char a, char b) { return mytoupper(a) == mytoupper(b); });
650	} else {
651	return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2);
652	}
653	}
654
655	/*
656	* if lit2 is a suffix of lit1 but the case sensitivity, groups or mask info
657	* of lit2 is a subset of lit1, then lit1 can't squash lit2 and lit2 can
658	* possibly match when lit1 matches. In this case, we can't do bucket
659	* squashing. e.g. AAA(no case) in bucket 0, AA(no case) and aa in bucket 1,
660	* we can't squash bucket 1 if we have input like "aaa" as aa can also match.
661	*/
662	static
663	bool includedCheck(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
664	/ lit1 is caseless and lit2 is case sensitive /
665	if ((lit1.nocase && !lit2.nocase)) {
666	return true;
667	}
668
669	/ lit2's group is a subset of lit1 /
670	if (lit1.groups != lit2.groups &&
671	(lit2.groups == (lit1.groups & lit2.groups))) {
672	return true;
673	}
674
675	/ TODO: narrow down cases for mask check /
676	if (lit1.cmp != lit2.cmp \|\| lit1.msk != lit2.msk) {
677	return true;
678	}
679
680	return false;
681	}
682
683	/*
684	* if lit2 is an included literal of both lit0 and lit1, then lit0 and lit1
685	* shouldn't match at the same offset, otherwise we give up squashing for lit1.
686	* e.g. lit0:AAA(no case), lit1:aa, lit2:A(no case). We can have duplicate
687	* matches for input "aaa" if lit0 and lit1 both squash lit2.
688	*/
689	static
690	bool checkParentLit(
691	const vector<hwlmLiteral> &lits, u32 pos1,
692	const unordered_set<u32> &parent_map,
693	const unordered_map<u32, unordered_set<u32>> &exception_map) {
694	assert(pos1 < lits.size());
695	const auto &lit1 = lits [pos1];
696	for (const auto pos2 : parent_map) {
697	if (contains(exception_map, pos2)) {
698	const auto &exception_pos = exception_map.at(pos2);
699	if (contains(exception_pos, pos1)) {
700	return false;
701	}
702	}
703
704	/ if lit1 isn't an exception of lit2, then we have to do further*
705	* exclusive check.
706	* TODO: More mask checks. Note if two literals are group exclusive,
707	* it is possible that they match at the same offset. */
708	assert(pos2 < lits.size());
709	const auto &lit2 = lits [pos2];
710	if (isSuffix(lit2, lit1)) {
711	return false;
712	}
713	}
714
715	return true;
716	}
717
718	static
719	void buildSquashMask(vector<hwlmLiteral> &lits, u32 id1, u32 bucket1,
720	size_t start, const vector<pair<u32, u32>> &group,
721	unordered_map<u32, unordered_set<u32>> &parent_map,
722	unordered_map<u32, unordered_set<u32>> &exception_map) {
723	auto &lit1 = lits [id1];
724	DEBUG_PRINTF("b:%u len:%zu\n", bucket1, lit1.s.length());
725
726	size_t cnt = group.size();
727	bool included = false;
728	bool exception = false;
729	u32 child_id = ~`0U`;
730	for (size_t i = start; i < cnt; i++) {
731	u32 bucket2 = group [i].first;
732	assert(bucket2 >= bucket1);
733
734	u32 id2 = group [i].second;
735	auto &lit2 = lits [id2];
736	// check if lit2 is a suffix of lit1
737	if (isSuffix(lit1, lit2)) {
738	/ if we have a included literal in the same bucket,*
739	* quit and let the included literal to do possible squashing */
740	if (bucket1 == bucket2) {
741	DEBUG_PRINTF("same bucket\n");
742	return;
743	}
744	/ if lit2 is a suffix but doesn't pass included checks for*
745	* extra info, we give up sqaushing */
746	if (includedCheck(lit1, lit2)) {
747	DEBUG_PRINTF("find exceptional suffix %u\n", lit2.id);
748	exception_map [id1].insert(id2);
749	exception = true;
750	} else if (checkParentLit(lits, id1, parent_map [id2],
751	exception_map)) {
752	if (lit1.included_id == INVALID_LIT_ID) {
753	DEBUG_PRINTF("find suffix lit1 %u lit2 %u\n",
754	lit1.id, lit2.id);
755	lit1.included_id = lit2.id;
756	} else {
757	/ if we have multiple included literals in one bucket,*
758	* give up squashing. */
759	DEBUG_PRINTF("multiple included literals\n");
760	lit1.included_id = INVALID_LIT_ID;
761	return;
762	}
763	child_id = id2;
764	included = true;
765	}
766	}
767
768	size_t next = i + `1`;
769	u32 nextBucket = next < cnt ? group [next].first : ~`0U`;
770	if (bucket2 != nextBucket) {
771	if (included) {
772	if (exception) {
773	/ give up if we have exception literals*
774	* in the same bucket as the included literal. */
775	lit1.included_id = INVALID_LIT_ID;
776	} else {
777	parent_map [child_id].insert(id1);
778
779	lit1.squash \|= `1U` << bucket2;
780	DEBUG_PRINTF("build squash mask %2x for %u\n",
781	lit1.squash, lit1.id);
782	}
783	return;
784	}
785	exception = false;
786	}
787	}
788	}
789
790	static constexpr u32 INCLUDED_LIMIT = `1000`;
791
792	static
793	void findIncludedLits(vector<hwlmLiteral> &lits,
794	const vector<vector<pair<u32, u32>>> &lastCharMap) {
795	/ Map for finding the positions of literal which includes a literal*
796	* in FDR hwlm literal vector. */
797	unordered_map<u32, unordered_set<u32>> parent_map;
798
799	/ Map for finding the positions of exception literals which could*
800	* sometimes match if a literal matches in FDR hwlm literal vector. */
801	unordered_map<u32, unordered_set<u32>> exception_map;
802	for (const auto &group : lastCharMap) {
803	size_t cnt = group.size();
804	if (cnt > INCLUDED_LIMIT) {
805	continue;
806	}
807	for (size_t i = `0`; i < cnt; i++) {
808	u32 bucket1 = group [i].first;
809	u32 id1 = group [i].second;
810	if (lits [id1].pure) {
811	continue;
812	}
813	buildSquashMask(lits, id1, bucket1, i + `1`, group, parent_map,
814	exception_map);
815	}
816	}
817	}
818
819	static
820	void addIncludedInfo(
821	vector<hwlmLiteral> &lits, u32 nBuckets,
822	map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
823	vector<vector<pair<u32, u32>>> lastCharMap(`256`);
824
825	for (BucketIndex b = `0`; b < nBuckets; b++) {
826	if (!bucketToLits [b].empty()) {
827	for (const LiteralIndex &lit_idx : bucketToLits [b]) {
828	const auto &lit = lits [lit_idx];
829	u8 c = mytoupper(lit.s.back());
830	lastCharMap [c].emplace_back(b, lit_idx);
831	}
832	}
833	}
834
835	findIncludedLits(lits, lastCharMap);
836	}
837
838	} // namespace
839
840	static
841	unique_ptr<HWLMProto> fdrBuildProtoInternal(u8 engType,
842	vector<hwlmLiteral> &lits,
843	bool make_small,
844	const target_t &target,
845	const Grey &grey, u32 hint) {
846	DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
847
848	if (grey.fdrAllowTeddy) {
849	auto proto = teddyBuildProtoHinted(engType, lits, make_small, hint,
850	target);
851	if (proto) {
852	DEBUG_PRINTF("build with teddy succeeded\n");
853	return proto;
854	} else {
855	DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
856	}
857	}
858
859	auto des = (hint == HINT_INVALID) ? chooseEngine(target, lits, make_small)
860	: getFdrDescription(hint);
861	if (!des) {
862	return nullptr;
863	}
864
865	// temporary hack for unit testing
866	if (hint != HINT_INVALID) {
867	des ->bits = `9`;
868	des ->stride = `1`;
869	}
870
871	auto bucketToLits = assignStringsToBuckets(lits, *des);
872	addIncludedInfo(lits, des ->getNumBuckets(), bucketToLits);
873	auto proto =
874	ue2::make_unique<HWLMProto>(engType, move(des), lits, bucketToLits,
875	make_small);
876	return proto;
877	}
878
879	unique_ptr<HWLMProto> fdrBuildProto(u8 engType, vector<hwlmLiteral> lits,
880	bool make_small, const target_t &target,
881	const Grey &grey) {
882	return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
883	HINT_INVALID);
884	}
885
886	static
887	bytecode_ptr<FDR> fdrBuildTableInternal(const HWLMProto &proto,
888	const Grey &grey) {
889
890	if (proto.teddyEng) {
891	return teddyBuildTable(proto, grey);
892	}
893
894	FDRCompiler fc(proto.lits, proto.bucketToLits, *(proto.fdrEng),
895	proto.make_small, grey);
896	return fc.build();
897	}
898
899	bytecode_ptr<FDR> fdrBuildTable(const HWLMProto &proto, const Grey &grey) {
900	return fdrBuildTableInternal(proto, grey);
901	}
902
903	#if !defined(RELEASE_BUILD)
904
905	unique_ptr<HWLMProto> fdrBuildProtoHinted(u8 engType,
906	vector<hwlmLiteral> lits,
907	bool make_small, u32 hint,
908	const target_t &target,
909	const Grey &grey) {
910	return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
911	hint);
912	}
913
914	#endif
915
916	size_t fdrSize(const FDR *fdr) {
917	assert(fdr);
918	return fdr->size;
919	}
920
921	} // namespace ue2
922

Browse the source code of ClickHouse/contrib/hyperscan/src/fdr/fdr_compile.cpp