fdr_confirm_compile.cpp source code [ClickHouse/contrib/hyperscan/src/fdr/fdr_confirm_compile.cpp]

1	/*
2	* Copyright (c) 2015-2019, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	#include "fdr_internal.h"
30	#include "fdr_compile_internal.h"
31	#include "fdr_confirm.h"
32	#include "engine_description.h"
33	#include "teddy_engine_description.h"
34	#include "ue2common.h"
35	#include "util/alloc.h"
36	#include "util/bitutils.h"
37	#include "util/compare.h"
38	#include "util/container.h"
39	#include "util/verify_types.h"
40
41	#include <algorithm>
42	#include <cstring>
43	#include <set>
44
45	using namespace std;
46
47	namespace ue2 {
48
49	using BC2CONF = map<BucketIndex, bytecode_ptr<FDRConfirm>>;
50
51	static
52	u64a make_u64a_mask(const vector<u8> &v) {
53	assert(v.size() <= sizeof(u64a));
54	if (v.size() > sizeof(u64a)) {
55	throw std::exception ();
56	}
57
58	u64a mask = `0`;
59	size_t vlen = v.size();
60	size_t len = std::min(vlen, sizeof(mask));
61	unsigned char m = (unsigned* char *)&mask;
62	memcpy(m + sizeof(mask) - len, &v [vlen - len], len);
63	return mask;
64	}
65
66	/**
67	* Build a temporary vector of LitInfo structures (without the corresponding
68	* pointers to the actual strings; these cannot be laid out yet). These
69	* stay in 1:1 correspondence with the lits[] vector as that's the only
70	* place we have to obtain our full strings.
71	*/
72	static
73	void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
74	CONF_TYPE &andmsk) {
75	const CONF_TYPE all_ones = ~(u64a)`0`;
76	andmsk = all_ones; // fill in with 'and' of all literal masks
77
78	for (LiteralIndex i = `0`; i < lits.size(); i++) {
79	const hwlmLiteral &lit = lits [i];
80	LitInfo &info = tmpLitInfo [i];
81	memset(&info, `0`, sizeof(info));
82	info.id = lit.id;
83	u8 flags = `0`;
84	if (lit.noruns) {
85	flags \|= FDR_LIT_FLAG_NOREPEAT;
86	}
87	info.flags = flags;
88	info.size = verify_u8(max(lit.msk.size(), lit.s.size()));
89	info.groups = lit.groups;
90	info.pure = lit.pure;
91
92	// these are built up assuming a LE machine
93	CONF_TYPE msk = all_ones;
94	CONF_TYPE val = `0`;
95	for (u32 j = `0`; j < sizeof(CONF_TYPE); j++) {
96	u32 shiftLoc = (sizeof(CONF_TYPE) - j - `1`) * `8`;
97	if (j >= lit.s.size()) {
98	msk &= ~((CONF_TYPE)`0xff` << shiftLoc);
99	} else {
100	u8 c = lit.s [lit.s.size() - j - `1`];
101	if (lit.nocase && ourisalpha(c)) {
102	msk &= ~((CONF_TYPE)CASE_BIT << shiftLoc);
103	val \|= (CONF_TYPE)(c & CASE_CLEAR) << shiftLoc;
104	} else {
105	val \|= (CONF_TYPE)c << shiftLoc;
106	}
107	}
108	}
109
110	info.v = val;
111	info.msk = msk;
112	if (!lit.msk.empty()) {
113	u64a l_msk = make_u64a_mask(lit.msk);
114	u64a l_cmp = make_u64a_mask(lit.cmp);
115
116	// test for consistency - if there's intersection, then v and msk
117	// values must line up
118	UNUSED u64a intersection = l_msk & info.msk;
119	assert((info.v & intersection) == (l_cmp & intersection));
120
121	// incorporate lit.msk, lit.cmp into v and msk
122	info.msk \|= l_msk;
123	info.v \|= l_cmp;
124	}
125
126	andmsk &= info.msk;
127	}
128	}
129
130	//#define FDR_CONFIRM_DUMP 1
131
132	static
133	bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
134	bool make_small) {
135	// Every literal must fit within CONF_TYPE.
136	assert(all_of_in(lits, [](const hwlmLiteral &lit) {
137	return lit.s.size() <= sizeof(CONF_TYPE);
138	}));
139
140	vector<LitInfo> tmpLitInfo(lits.size());
141	CONF_TYPE andmsk;
142	fillLitInfo(lits, tmpLitInfo, andmsk);
143
144	#ifdef FDR_CONFIRM_DUMP
145	printf("-------------------\n");
146	#endif
147
148	// just magic numbers and crude measures for now
149	u32 nBits;
150	if (make_small) {
151	nBits = min(`10U`, lg2(lits.size()) + `1`);
152	} else {
153	nBits = lg2(lits.size()) + `4`;
154	}
155
156	CONF_TYPE mult = (CONF_TYPE)`0x0b4e0ef37bc32127ULL`;
157
158	// we can walk the vector and assign elements from the vectors to a
159	// map by hash value
160	map<u32, vector<LiteralIndex> > res2lits;
161	hwlm_group_t gm = `0`;
162	for (LiteralIndex i = `0`; i < lits.size(); i++) {
163	LitInfo & li = tmpLitInfo [i];
164	u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
165	DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
166	res2lits [hash].push_back(i);
167	gm \|= li.groups;
168	}
169
170	#ifdef FDR_CONFIRM_DUMP
171	// print out the literals reversed - makes it easier to line up analyses
172	// that are end-offset based
173	for (const auto &m : res2lits) {
174	const u32 &hash = m.first;
175	const vector<LiteralIndex> &vlidx = m.second;
176	if (vlidx.size() <= `1`) {
177	continue;
178	}
179	printf("%x -> %zu literals\n", hash, vlidx.size());
180	size_t min_len = lits[vlidx.front()].s.size();
181
182	vector<set<u8>> vsl; // contains the set of chars at each location
183	// reversed from the end
184
185	for (const auto &litIdx : vlidx) {
186	const auto &lit = lits[litIdx];
187	if (lit.s.size() > vsl.size()) {
188	vsl.resize(lit.s.size());
189	}
190	for (size_t j = lit.s.size(); j != `0`; j--) {
191	vsl[lit.s.size() - j].insert(lit.s[j - `1`]);
192	}
193	min_len = min(min_len, lit.s.size());
194	}
195	printf("common ");
196	for (size_t j = `0`; j < min_len; j++) {
197	if (vsl[j].size() == `1`) {
198	printf("%02x", *vsl[j].begin());
199	} else {
200	printf("__");
201	}
202	}
203	printf("\n");
204	for (const auto &litIdx : vlidx) {
205	const auto &lit = lits[litIdx];
206	printf("%8x %c", lit.id, lit.nocase ? `'!'` : `' '`);
207	for (size_t j = lit.s.size(); j != `0`; j--) {
208	size_t dist_from_end = lit.s.size() - j;
209	if (dist_from_end < min_len && vsl[dist_from_end].size() == `1`) {
210	printf("__");
211	} else {
212	printf("%02x", lit.s[j - `1`]);
213	}
214	}
215	printf("\n");
216	}
217	size_t total_compares = `0`;
218	for (const auto &v : vsl) {
219	total_compares += v.size();
220	}
221	size_t total_string_size = `0`;
222	for (const auto &litIdx : vlidx) {
223	const auto &lit = lits[litIdx];
224	total_string_size += lit.s.size();
225	}
226	printf("Total compare load: %zu Total string size: %zu\n\n",
227	total_compares, total_string_size);
228	}
229	#endif
230
231	const size_t bitsToLitIndexSize = (`1U` << nBits) * sizeof(u32);
232
233	// this size can now be a worst-case as we can always be a bit smaller
234	size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) +
235	ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) +
236	sizeof(LitInfo) * lits.size();
237	size = ROUNDUP_N(size, alignof(FDRConfirm));
238
239	auto fdrc = make_zeroed_bytecode_ptr<FDRConfirm>(size);
240	assert(fdrc); // otherwise would have thrown std::bad_alloc
241
242	fdrc ->andmsk = andmsk;
243	fdrc ->mult = mult;
244	fdrc ->nBits = nBits;
245
246	fdrc ->groups = gm;
247
248	// After the FDRConfirm, we have the lit index array.
249	u8 fdrc_base = (u8 )fdrc.get();
250	u8 ptr = fdrc_base + sizeof(fdrc);
251	ptr = ROUNDUP_PTR(ptr, alignof(u32));
252	u32 bitsToLitIndex = (u32 )ptr;
253	ptr += bitsToLitIndexSize;
254
255	// After the lit index array, we have the LitInfo structures themselves,
256	// which vary in size (as each may have a variable-length string after it).
257	ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
258
259	// Walk the map by hash value assigning indexes and laying out the
260	// elements (and their associated string confirm material) in memory.
261	for (const auto &m : res2lits) {
262	const u32 hash = m.first;
263	const vector<LiteralIndex> &vlidx = m.second;
264	bitsToLitIndex[hash] = verify_u32(ptr - fdrc_base);
265	for (auto i = vlidx.begin(), e = vlidx.end(); i != e; ++i) {
266	LiteralIndex litIdx = *i;
267
268	// Write LitInfo header.
269	LitInfo &finalLI = (LitInfo )ptr;
270	finalLI = tmpLitInfo [litIdx];
271
272	ptr += sizeof(LitInfo); // String starts directly after LitInfo.
273	assert(lits[litIdx].s.size() <= sizeof(CONF_TYPE));
274	if (next(i) == e) {
275	finalLI.next = `0`;
276	} else {
277	finalLI.next = `1`;
278	}
279	}
280	assert((size_t)(ptr - fdrc_base) <= size);
281	}
282
283	// Return actual used size, not worst-case size. Must be rounded up to
284	// FDRConfirm alignment so that the caller can lay out a sequence of these.
285	size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
286	alignof(FDRConfirm));
287	assert(actual_size <= size);
288	fdrc.shrink(actual_size);
289
290	return fdrc;
291	}
292
293	bytecode_ptr<u8>
294	setupFullConfs(const vector<hwlmLiteral> &lits,
295	const EngineDescription &eng,
296	const map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
297	bool make_small) {
298	unique_ptr<TeddyEngineDescription> teddyDescr =
299	getTeddyDescription(eng.getID());
300
301	BC2CONF bc2Conf;
302	u32 totalConfirmSize = `0`;
303	for (BucketIndex b = `0`; b < eng.getNumBuckets(); b++) {
304	if (contains(bucketToLits, b)) {
305	vector<hwlmLiteral> vl;
306	for (const LiteralIndex &lit_idx : bucketToLits.at(b)) {
307	vl.push_back(lits [lit_idx]);
308	}
309
310	DEBUG_PRINTF("b %d sz %zu\n", b, vl.size());
311	auto fc = getFDRConfirm(vl, make_small);
312	totalConfirmSize += fc.size();
313	bc2Conf.emplace(b, move(fc));
314	}
315	}
316
317	u32 nBuckets = eng.getNumBuckets();
318	u32 totalConfSwitchSize = ROUNDUP_CL(nBuckets * sizeof(u32));
319	u32 totalSize = totalConfSwitchSize + totalConfirmSize;
320
321	auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, `64`);
322	assert(buf); // otherwise would have thrown std::bad_alloc
323
324	u32 confBase = (u32 )buf.get();
325	u8 *ptr = buf.get() + totalConfSwitchSize;
326	assert(ISALIGNED_CL(ptr));
327
328	for (const auto &m : bc2Conf) {
329	const BucketIndex &idx = m.first;
330	const bytecode_ptr<FDRConfirm> &p = m.second;
331	// confirm offset is relative to the base of this structure, now
332	u32 confirm_offset = verify_u32(ptr - buf.get());
333	memcpy(ptr, p.get(), p.size());
334	ptr += p.size();
335	confBase[idx] = confirm_offset;
336	}
337
338	return buf;
339	}
340
341	} // namespace ue2
342

Browse the source code of ClickHouse/contrib/hyperscan/src/fdr/fdr_confirm_compile.cpp