vermicelli.h source code [ClickHouse/contrib/hyperscan/src/nfa/vermicelli.h]

1	/*
2	* Copyright (c) 2015-2016, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief Vermicelli: single-byte and double-byte acceleration.
31	*/
32
33	#ifndef VERMICELLI_H
34	#define VERMICELLI_H
35
36	#include "util/bitutils.h"
37	#include "util/simd_utils.h"
38	#include "util/unaligned.h"
39
40	#include "vermicelli_sse.h"
41
42	static really_inline
43	const u8 vermicelliExec(char* c, char nocase, const u8 *buf,
44	const u8 *buf_end) {
45	DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
46	nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
47	assert(buf < buf_end);
48
49	// Handle small scans.
50	if (buf_end - buf < VERM_BOUNDARY) {
51	for (; buf < buf_end; buf++) {
52	char cur = (char)*buf;
53	if (nocase) {
54	cur &= CASE_CLEAR;
55	}
56	if (cur == c) {
57	break;
58	}
59	}
60	return buf;
61	}
62
63	VERM_TYPE chars = VERM_SET_FN(c); / nocase already uppercase /
64	uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
65	if (min) {
66	// Input isn't aligned, so we need to run one iteration with an
67	// unaligned load, then skip buf forward to the next aligned address.
68	// There's some small overlap here, but we don't mind scanning it twice
69	// if we can do it quickly, do we?
70	const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, `0`)
71	: vermUnalign(chars, buf, `0`);
72	if (ptr) {
73	return ptr;
74	}
75
76	buf += VERM_BOUNDARY - min;
77	assert(buf < buf_end);
78	}
79
80	// Aligned loops from here on in
81	const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - `1`, `0`)
82	: vermSearchAligned(chars, buf, buf_end - `1`, `0`);
83	if (ptr) {
84	return ptr;
85	}
86
87	// Tidy up the mess at the end
88	ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, `0`)
89	: vermUnalign(chars, buf_end - VERM_BOUNDARY, `0`);
90	return ptr ? ptr : buf_end;
91	}
92
93	/ like vermicelliExec except returns the address of the first character which*
94	* is not c */
95	static really_inline
96	const u8 nvermicelliExec(char* c, char nocase, const u8 *buf,
97	const u8 *buf_end) {
98	DEBUG_PRINTF("nverm scan %s\\x%02hhx over %zu bytes\n",
99	nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
100	assert(buf < buf_end);
101
102	// Handle small scans.
103	if (buf_end - buf < VERM_BOUNDARY) {
104	for (; buf < buf_end; buf++) {
105	char cur = (char)*buf;
106	if (nocase) {
107	cur &= CASE_CLEAR;
108	}
109	if (cur != c) {
110	break;
111	}
112	}
113	return buf;
114	}
115
116	VERM_TYPE chars = VERM_SET_FN(c); / nocase already uppercase /
117	size_t min = (size_t)buf % VERM_BOUNDARY;
118	if (min) {
119	// Input isn't aligned, so we need to run one iteration with an
120	// unaligned load, then skip buf forward to the next aligned address.
121	// There's some small overlap here, but we don't mind scanning it twice
122	// if we can do it quickly, do we?
123	const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, `1`)
124	: vermUnalign(chars, buf, `1`);
125	if (ptr) {
126	return ptr;
127	}
128
129	buf += VERM_BOUNDARY - min;
130	assert(buf < buf_end);
131	}
132
133	// Aligned loops from here on in
134	const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - `1`, `1`)
135	: vermSearchAligned(chars, buf, buf_end - `1`, `1`);
136	if (ptr) {
137	return ptr;
138	}
139
140	// Tidy up the mess at the end
141	ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, `1`)
142	: vermUnalign(chars, buf_end - VERM_BOUNDARY, `1`);
143	return ptr ? ptr : buf_end;
144	}
145
146	static really_inline
147	const u8 vermicelliDoubleExec(char* c1, char c2, char nocase, const u8 *buf,
148	const u8 *buf_end) {
149	DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
150	nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
151	assert(buf < buf_end);
152	assert((buf_end - buf) >= VERM_BOUNDARY);
153
154	uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
155	VERM_TYPE chars1 = VERM_SET_FN(c1); / nocase already uppercase /
156	VERM_TYPE chars2 = VERM_SET_FN(c2); / nocase already uppercase /
157
158	if (min) {
159	// Input isn't aligned, so we need to run one iteration with an
160	// unaligned load, then skip buf forward to the next aligned address.
161	// There's some small overlap here, but we don't mind scanning it twice
162	// if we can do it quickly, do we?
163	const u8 *ptr = nocase
164	? dvermPreconditionNocase(chars1, chars2, buf)
165	: dvermPrecondition(chars1, chars2, buf);
166	if (ptr) {
167	return ptr;
168	}
169
170	buf += VERM_BOUNDARY - min;
171	assert(buf < buf_end);
172	}
173
174	// Aligned loops from here on in
175	const u8 *ptr = nocase ? dvermSearchAlignedNocase(chars1, chars2, c1, c2,
176	buf, buf_end)
177	: dvermSearchAligned(chars1, chars2, c1, c2, buf,
178	buf_end);
179	if (ptr) {
180	return ptr;
181	}
182
183	// Tidy up the mess at the end
184	ptr = nocase ? dvermPreconditionNocase(chars1, chars2,
185	buf_end - VERM_BOUNDARY)
186	: dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY);
187
188	if (ptr) {
189	return ptr;
190	}
191
192	/ check for partial match at end /
193	u8 mask = nocase ? CASE_CLEAR : `0xff`;
194	if ((buf_end[-`1`] & mask) == (u8)c1) {
195	DEBUG_PRINTF("partial!!!\n");
196	return buf_end - `1`;
197	}
198
199	return buf_end;
200	}
201
202	static really_inline
203	const u8 vermicelliDoubleMaskedExec(char* c1, char c2, char m1, char m2,
204	const u8 buf, const* u8 *buf_end) {
205	DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) "
206	"over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf));
207	assert(buf < buf_end);
208	assert((buf_end - buf) >= VERM_BOUNDARY);
209
210	uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
211	VERM_TYPE chars1 = VERM_SET_FN(c1);
212	VERM_TYPE chars2 = VERM_SET_FN(c2);
213	VERM_TYPE mask1 = VERM_SET_FN(m1);
214	VERM_TYPE mask2 = VERM_SET_FN(m2);
215
216	if (min) {
217	// Input isn't aligned, so we need to run one iteration with an
218	// unaligned load, then skip buf forward to the next aligned address.
219	// There's some small overlap here, but we don't mind scanning it twice
220	// if we can do it quickly, do we?
221	const u8 *p = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf);
222	if (p) {
223	return p;
224	}
225
226	buf += VERM_BOUNDARY - min;
227	assert(buf < buf_end);
228	}
229
230	// Aligned loops from here on in
231	const u8 *ptr = dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1,
232	c2, m1, m2, buf, buf_end);
233	if (ptr) {
234	return ptr;
235	}
236
237	// Tidy up the mess at the end
238	ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2,
239	buf_end - VERM_BOUNDARY);
240
241	if (ptr) {
242	return ptr;
243	}
244
245	/ check for partial match at end /
246	if ((buf_end[-`1`] & m1) == (u8)c1) {
247	return buf_end - `1`;
248	}
249
250	return buf_end;
251	}
252
253	// Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if
254	// character not found.
255	static really_inline
256	const u8 rvermicelliExec(char* c, char nocase, const u8 *buf,
257	const u8 *buf_end) {
258	DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n",
259	nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
260	assert(buf < buf_end);
261
262	// Handle small scans.
263	if (buf_end - buf < VERM_BOUNDARY) {
264	for (buf_end--; buf_end >= buf; buf_end--) {
265	char cur = (char)*buf_end;
266	if (nocase) {
267	cur &= CASE_CLEAR;
268	}
269	if (cur == c) {
270	break;
271	}
272	}
273	return buf_end;
274	}
275
276	VERM_TYPE chars = VERM_SET_FN(c); / nocase already uppercase /
277	size_t min = (size_t)buf_end % VERM_BOUNDARY;
278
279	if (min) {
280	// Input isn't aligned, so we need to run one iteration with an
281	// unaligned load, then skip buf backward to the next aligned address.
282	// There's some small overlap here, but we don't mind scanning it twice
283	// if we can do it quickly, do we?
284	if (nocase) {
285	const u8 *ptr =
286	rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, `0`);
287	if (ptr) {
288	return ptr;
289	}
290	} else {
291	const u8 *ptr = rvermUnalign(chars, buf_end - VERM_BOUNDARY, `0`);
292	if (ptr) {
293	return ptr;
294	}
295	}
296
297	buf_end -= min;
298	if (buf >= buf_end) {
299	return buf_end;
300	}
301	}
302
303	// Aligned loops from here on in.
304	const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, `0`)
305	: rvermSearchAligned(chars, buf, buf_end, `0`);
306	if (ptr) {
307	return ptr;
308	}
309
310	// Tidy up the mess at the end, return buf - 1 if not found.
311	ptr = nocase ? rvermUnalignNocase(chars, buf, `0`)
312	: rvermUnalign(chars, buf, `0`);
313	return ptr ? ptr : buf - `1`;
314	}
315
316	/ like rvermicelliExec except returns the address of the last character which*
317	* is not c */
318	static really_inline
319	const u8 rnvermicelliExec(char* c, char nocase, const u8 *buf,
320	const u8 *buf_end) {
321	DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n",
322	nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
323	assert(buf < buf_end);
324
325	// Handle small scans.
326	if (buf_end - buf < VERM_BOUNDARY) {
327	for (buf_end--; buf_end >= buf; buf_end--) {
328	char cur = (char)*buf_end;
329	if (nocase) {
330	cur &= CASE_CLEAR;
331	}
332	if (cur != c) {
333	break;
334	}
335	}
336	return buf_end;
337	}
338
339	VERM_TYPE chars = VERM_SET_FN(c); / nocase already uppercase /
340	size_t min = (size_t)buf_end % VERM_BOUNDARY;
341
342	if (min) {
343	// Input isn't aligned, so we need to run one iteration with an
344	// unaligned load, then skip buf backward to the next aligned address.
345	// There's some small overlap here, but we don't mind scanning it twice
346	// if we can do it quickly, do we?
347	if (nocase) {
348	const u8 *ptr =
349	rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, `1`);
350	if (ptr) {
351	return ptr;
352	}
353	} else {
354	const u8 *ptr = rvermUnalign(chars, buf_end - VERM_BOUNDARY, `1`);
355	if (ptr) {
356	return ptr;
357	}
358	}
359
360	buf_end -= min;
361	if (buf >= buf_end) {
362	return buf_end;
363	}
364	}
365
366	// Aligned loops from here on in.
367	const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, `1`)
368	: rvermSearchAligned(chars, buf, buf_end, `1`);
369	if (ptr) {
370	return ptr;
371	}
372
373	// Tidy up the mess at the end, return buf - 1 if not found.
374	ptr = nocase ? rvermUnalignNocase(chars, buf, `1`)
375	: rvermUnalign(chars, buf, `1`);
376	return ptr ? ptr : buf - `1`;
377	}
378
379	/ returns highest offset of c2 (NOTE: not c1) /
380	static really_inline
381	const u8 rvermicelliDoubleExec(char* c1, char c2, char nocase, const u8 *buf,
382	const u8 *buf_end) {
383	DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
384	nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
385	assert(buf < buf_end);
386	assert((buf_end - buf) >= VERM_BOUNDARY);
387
388	size_t min = (size_t)buf_end % VERM_BOUNDARY;
389	VERM_TYPE chars1 = VERM_SET_FN(c1); / nocase already uppercase /
390	VERM_TYPE chars2 = VERM_SET_FN(c2); / nocase already uppercase /
391
392	if (min) {
393	// input not aligned, so we need to run one iteration with an unaligned
394	// load, then skip buf forward to the next aligned address. There's
395	// some small overlap here, but we don't mind scanning it twice if we
396	// can do it quickly, do we?
397	const u8 *ptr;
398	if (nocase) {
399	ptr = rdvermPreconditionNocase(chars1, chars2,
400	buf_end - VERM_BOUNDARY);
401	} else {
402	ptr = rdvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY);
403	}
404
405	if (ptr) {
406	return ptr;
407	}
408
409	buf_end -= min;
410	if (buf >= buf_end) {
411	return buf_end;
412	}
413	}
414
415	// Aligned loops from here on in
416	if (nocase) {
417	return rdvermSearchAlignedNocase(chars1, chars2, c1, c2, buf, buf_end);
418	} else {
419	return rdvermSearchAligned(chars1, chars2, c1, c2, buf, buf_end);
420	}
421	}
422
423	#endif /* VERMICELLI_H */
424

Browse the source code of ClickHouse/contrib/hyperscan/src/nfa/vermicelli.h