1/*
2 * Copyright (c) 2015-2016, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Vermicelli: single-byte and double-byte acceleration.
31 */
32
33#ifndef VERMICELLI_H
34#define VERMICELLI_H
35
36#include "util/bitutils.h"
37#include "util/simd_utils.h"
38#include "util/unaligned.h"
39
40#include "vermicelli_sse.h"
41
42static really_inline
43const u8 *vermicelliExec(char c, char nocase, const u8 *buf,
44 const u8 *buf_end) {
45 DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n",
46 nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
47 assert(buf < buf_end);
48
49 // Handle small scans.
50 if (buf_end - buf < VERM_BOUNDARY) {
51 for (; buf < buf_end; buf++) {
52 char cur = (char)*buf;
53 if (nocase) {
54 cur &= CASE_CLEAR;
55 }
56 if (cur == c) {
57 break;
58 }
59 }
60 return buf;
61 }
62
63 VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
64 uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
65 if (min) {
66 // Input isn't aligned, so we need to run one iteration with an
67 // unaligned load, then skip buf forward to the next aligned address.
68 // There's some small overlap here, but we don't mind scanning it twice
69 // if we can do it quickly, do we?
70 const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, 0)
71 : vermUnalign(chars, buf, 0);
72 if (ptr) {
73 return ptr;
74 }
75
76 buf += VERM_BOUNDARY - min;
77 assert(buf < buf_end);
78 }
79
80 // Aligned loops from here on in
81 const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - 1, 0)
82 : vermSearchAligned(chars, buf, buf_end - 1, 0);
83 if (ptr) {
84 return ptr;
85 }
86
87 // Tidy up the mess at the end
88 ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 0)
89 : vermUnalign(chars, buf_end - VERM_BOUNDARY, 0);
90 return ptr ? ptr : buf_end;
91}
92
93/* like vermicelliExec except returns the address of the first character which
94 * is not c */
95static really_inline
96const u8 *nvermicelliExec(char c, char nocase, const u8 *buf,
97 const u8 *buf_end) {
98 DEBUG_PRINTF("nverm scan %s\\x%02hhx over %zu bytes\n",
99 nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
100 assert(buf < buf_end);
101
102 // Handle small scans.
103 if (buf_end - buf < VERM_BOUNDARY) {
104 for (; buf < buf_end; buf++) {
105 char cur = (char)*buf;
106 if (nocase) {
107 cur &= CASE_CLEAR;
108 }
109 if (cur != c) {
110 break;
111 }
112 }
113 return buf;
114 }
115
116 VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
117 size_t min = (size_t)buf % VERM_BOUNDARY;
118 if (min) {
119 // Input isn't aligned, so we need to run one iteration with an
120 // unaligned load, then skip buf forward to the next aligned address.
121 // There's some small overlap here, but we don't mind scanning it twice
122 // if we can do it quickly, do we?
123 const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, 1)
124 : vermUnalign(chars, buf, 1);
125 if (ptr) {
126 return ptr;
127 }
128
129 buf += VERM_BOUNDARY - min;
130 assert(buf < buf_end);
131 }
132
133 // Aligned loops from here on in
134 const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - 1, 1)
135 : vermSearchAligned(chars, buf, buf_end - 1, 1);
136 if (ptr) {
137 return ptr;
138 }
139
140 // Tidy up the mess at the end
141 ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 1)
142 : vermUnalign(chars, buf_end - VERM_BOUNDARY, 1);
143 return ptr ? ptr : buf_end;
144}
145
146static really_inline
147const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
148 const u8 *buf_end) {
149 DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
150 nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
151 assert(buf < buf_end);
152 assert((buf_end - buf) >= VERM_BOUNDARY);
153
154 uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
155 VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
156 VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
157
158 if (min) {
159 // Input isn't aligned, so we need to run one iteration with an
160 // unaligned load, then skip buf forward to the next aligned address.
161 // There's some small overlap here, but we don't mind scanning it twice
162 // if we can do it quickly, do we?
163 const u8 *ptr = nocase
164 ? dvermPreconditionNocase(chars1, chars2, buf)
165 : dvermPrecondition(chars1, chars2, buf);
166 if (ptr) {
167 return ptr;
168 }
169
170 buf += VERM_BOUNDARY - min;
171 assert(buf < buf_end);
172 }
173
174 // Aligned loops from here on in
175 const u8 *ptr = nocase ? dvermSearchAlignedNocase(chars1, chars2, c1, c2,
176 buf, buf_end)
177 : dvermSearchAligned(chars1, chars2, c1, c2, buf,
178 buf_end);
179 if (ptr) {
180 return ptr;
181 }
182
183 // Tidy up the mess at the end
184 ptr = nocase ? dvermPreconditionNocase(chars1, chars2,
185 buf_end - VERM_BOUNDARY)
186 : dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY);
187
188 if (ptr) {
189 return ptr;
190 }
191
192 /* check for partial match at end */
193 u8 mask = nocase ? CASE_CLEAR : 0xff;
194 if ((buf_end[-1] & mask) == (u8)c1) {
195 DEBUG_PRINTF("partial!!!\n");
196 return buf_end - 1;
197 }
198
199 return buf_end;
200}
201
202static really_inline
203const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
204 const u8 *buf, const u8 *buf_end) {
205 DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) "
206 "over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf));
207 assert(buf < buf_end);
208 assert((buf_end - buf) >= VERM_BOUNDARY);
209
210 uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
211 VERM_TYPE chars1 = VERM_SET_FN(c1);
212 VERM_TYPE chars2 = VERM_SET_FN(c2);
213 VERM_TYPE mask1 = VERM_SET_FN(m1);
214 VERM_TYPE mask2 = VERM_SET_FN(m2);
215
216 if (min) {
217 // Input isn't aligned, so we need to run one iteration with an
218 // unaligned load, then skip buf forward to the next aligned address.
219 // There's some small overlap here, but we don't mind scanning it twice
220 // if we can do it quickly, do we?
221 const u8 *p = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf);
222 if (p) {
223 return p;
224 }
225
226 buf += VERM_BOUNDARY - min;
227 assert(buf < buf_end);
228 }
229
230 // Aligned loops from here on in
231 const u8 *ptr = dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1,
232 c2, m1, m2, buf, buf_end);
233 if (ptr) {
234 return ptr;
235 }
236
237 // Tidy up the mess at the end
238 ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2,
239 buf_end - VERM_BOUNDARY);
240
241 if (ptr) {
242 return ptr;
243 }
244
245 /* check for partial match at end */
246 if ((buf_end[-1] & m1) == (u8)c1) {
247 return buf_end - 1;
248 }
249
250 return buf_end;
251}
252
253// Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if
254// character not found.
255static really_inline
256const u8 *rvermicelliExec(char c, char nocase, const u8 *buf,
257 const u8 *buf_end) {
258 DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n",
259 nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
260 assert(buf < buf_end);
261
262 // Handle small scans.
263 if (buf_end - buf < VERM_BOUNDARY) {
264 for (buf_end--; buf_end >= buf; buf_end--) {
265 char cur = (char)*buf_end;
266 if (nocase) {
267 cur &= CASE_CLEAR;
268 }
269 if (cur == c) {
270 break;
271 }
272 }
273 return buf_end;
274 }
275
276 VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
277 size_t min = (size_t)buf_end % VERM_BOUNDARY;
278
279 if (min) {
280 // Input isn't aligned, so we need to run one iteration with an
281 // unaligned load, then skip buf backward to the next aligned address.
282 // There's some small overlap here, but we don't mind scanning it twice
283 // if we can do it quickly, do we?
284 if (nocase) {
285 const u8 *ptr =
286 rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 0);
287 if (ptr) {
288 return ptr;
289 }
290 } else {
291 const u8 *ptr = rvermUnalign(chars, buf_end - VERM_BOUNDARY, 0);
292 if (ptr) {
293 return ptr;
294 }
295 }
296
297 buf_end -= min;
298 if (buf >= buf_end) {
299 return buf_end;
300 }
301 }
302
303 // Aligned loops from here on in.
304 const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, 0)
305 : rvermSearchAligned(chars, buf, buf_end, 0);
306 if (ptr) {
307 return ptr;
308 }
309
310 // Tidy up the mess at the end, return buf - 1 if not found.
311 ptr = nocase ? rvermUnalignNocase(chars, buf, 0)
312 : rvermUnalign(chars, buf, 0);
313 return ptr ? ptr : buf - 1;
314}
315
316/* like rvermicelliExec except returns the address of the last character which
317 * is not c */
318static really_inline
319const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf,
320 const u8 *buf_end) {
321 DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n",
322 nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
323 assert(buf < buf_end);
324
325 // Handle small scans.
326 if (buf_end - buf < VERM_BOUNDARY) {
327 for (buf_end--; buf_end >= buf; buf_end--) {
328 char cur = (char)*buf_end;
329 if (nocase) {
330 cur &= CASE_CLEAR;
331 }
332 if (cur != c) {
333 break;
334 }
335 }
336 return buf_end;
337 }
338
339 VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */
340 size_t min = (size_t)buf_end % VERM_BOUNDARY;
341
342 if (min) {
343 // Input isn't aligned, so we need to run one iteration with an
344 // unaligned load, then skip buf backward to the next aligned address.
345 // There's some small overlap here, but we don't mind scanning it twice
346 // if we can do it quickly, do we?
347 if (nocase) {
348 const u8 *ptr =
349 rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 1);
350 if (ptr) {
351 return ptr;
352 }
353 } else {
354 const u8 *ptr = rvermUnalign(chars, buf_end - VERM_BOUNDARY, 1);
355 if (ptr) {
356 return ptr;
357 }
358 }
359
360 buf_end -= min;
361 if (buf >= buf_end) {
362 return buf_end;
363 }
364 }
365
366 // Aligned loops from here on in.
367 const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, 1)
368 : rvermSearchAligned(chars, buf, buf_end, 1);
369 if (ptr) {
370 return ptr;
371 }
372
373 // Tidy up the mess at the end, return buf - 1 if not found.
374 ptr = nocase ? rvermUnalignNocase(chars, buf, 1)
375 : rvermUnalign(chars, buf, 1);
376 return ptr ? ptr : buf - 1;
377}
378
379/* returns highest offset of c2 (NOTE: not c1) */
380static really_inline
381const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
382 const u8 *buf_end) {
383 DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
384 nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
385 assert(buf < buf_end);
386 assert((buf_end - buf) >= VERM_BOUNDARY);
387
388 size_t min = (size_t)buf_end % VERM_BOUNDARY;
389 VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
390 VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
391
392 if (min) {
393 // input not aligned, so we need to run one iteration with an unaligned
394 // load, then skip buf forward to the next aligned address. There's
395 // some small overlap here, but we don't mind scanning it twice if we
396 // can do it quickly, do we?
397 const u8 *ptr;
398 if (nocase) {
399 ptr = rdvermPreconditionNocase(chars1, chars2,
400 buf_end - VERM_BOUNDARY);
401 } else {
402 ptr = rdvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY);
403 }
404
405 if (ptr) {
406 return ptr;
407 }
408
409 buf_end -= min;
410 if (buf >= buf_end) {
411 return buf_end;
412 }
413 }
414
415 // Aligned loops from here on in
416 if (nocase) {
417 return rdvermSearchAlignedNocase(chars1, chars2, c1, c2, buf, buf_end);
418 } else {
419 return rdvermSearchAligned(chars1, chars2, c1, c2, buf, buf_end);
420 }
421}
422
423#endif /* VERMICELLI_H */
424