1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Noodle literal matcher: runtime. |
31 | */ |
32 | #include "hwlm.h" |
33 | #include "noodle_engine.h" |
34 | #include "noodle_internal.h" |
35 | #include "scratch.h" |
36 | #include "ue2common.h" |
37 | #include "util/arch.h" |
38 | #include "util/bitutils.h" |
39 | #include "util/compare.h" |
40 | #include "util/intrinsics.h" |
41 | #include "util/join.h" |
42 | #include "util/masked_move.h" |
43 | #include "util/partial_store.h" |
44 | #include "util/simd_utils.h" |
45 | |
46 | #include <ctype.h> |
47 | #include <stdbool.h> |
48 | #include <string.h> |
49 | |
50 | /** \brief Noodle runtime context. */ |
51 | struct cb_info { |
52 | HWLMCallback cb; //!< callback function called on match |
53 | u32 id; //!< ID to pass to callback on match |
54 | struct hs_scratch *scratch; //!< scratch to pass to callback |
55 | size_t offsetAdj; //!< used in streaming mode |
56 | }; |
57 | |
58 | #if defined(HAVE_AVX512) |
59 | #define CHUNKSIZE 64 |
60 | #define MASK_TYPE m512 |
61 | #define Z_BITS 64 |
62 | #define Z_TYPE u64a |
63 | #elif defined(HAVE_AVX2) |
64 | #define CHUNKSIZE 32 |
65 | #define MASK_TYPE m256 |
66 | #define Z_BITS 32 |
67 | #define Z_TYPE u32 |
68 | #else |
69 | #define CHUNKSIZE 16 |
70 | #define MASK_TYPE m128 |
71 | #define Z_BITS 32 |
72 | #define Z_TYPE u32 |
73 | #endif |
74 | |
75 | |
76 | #define RETURN_IF_TERMINATED(x) \ |
77 | { \ |
78 | if ((x) == HWLM_TERMINATED) { \ |
79 | return HWLM_TERMINATED; \ |
80 | } \ |
81 | } |
82 | |
83 | #define SINGLE_ZSCAN() \ |
84 | do { \ |
85 | while (unlikely(z)) { \ |
86 | Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ |
87 | size_t matchPos = d - buf + pos; \ |
88 | DEBUG_PRINTF("match pos %zu\n", matchPos); \ |
89 | hwlmcb_rv_t rv = final(n, buf, len, 1, cbi, matchPos); \ |
90 | RETURN_IF_TERMINATED(rv); \ |
91 | } \ |
92 | } while (0) |
93 | |
94 | #define DOUBLE_ZSCAN() \ |
95 | do { \ |
96 | while (unlikely(z)) { \ |
97 | Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ |
98 | size_t matchPos = d - buf + pos - 1; \ |
99 | DEBUG_PRINTF("match pos %zu\n", matchPos); \ |
100 | hwlmcb_rv_t rv = final(n, buf, len, 0, cbi, matchPos); \ |
101 | RETURN_IF_TERMINATED(rv); \ |
102 | } \ |
103 | } while (0) |
104 | |
105 | static really_inline |
106 | u8 caseClear8(u8 x, bool noCase) { |
107 | return (u8)(noCase ? (x & (u8)0xdf) : x); |
108 | } |
109 | |
110 | // Make sure the rest of the string is there. The single character scanner |
111 | // is used only for single chars with case insensitivity used correctly, |
112 | // so it can go straight to the callback if we get this far. |
113 | static really_inline |
114 | hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len, |
115 | char single, const struct cb_info *cbi, size_t pos) { |
116 | if (single) { |
117 | if (n->msk_len == 1) { |
118 | goto match; |
119 | } |
120 | } |
121 | assert(len >= n->msk_len); |
122 | u64a v = |
123 | partial_load_u64a(buf + pos + n->key_offset - n->msk_len, n->msk_len); |
124 | DEBUG_PRINTF("v %016llx msk %016llx cmp %016llx\n" , v, n->msk, n->cmp); |
125 | if ((v & n->msk) != n->cmp) { |
126 | /* mask didn't match */ |
127 | return HWLM_SUCCESS; |
128 | } |
129 | |
130 | match: |
131 | pos -= cbi->offsetAdj; |
132 | DEBUG_PRINTF("match @ %zu\n" , pos + n->key_offset); |
133 | hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->scratch); |
134 | if (rv == HWLM_TERMINATE_MATCHING) { |
135 | return HWLM_TERMINATED; |
136 | } |
137 | return HWLM_SUCCESS; |
138 | } |
139 | |
140 | #if defined(HAVE_AVX512) |
141 | #define CHUNKSIZE 64 |
142 | #define MASK_TYPE m512 |
143 | #include "noodle_engine_avx512.c" |
144 | #elif defined(HAVE_AVX2) |
145 | #define CHUNKSIZE 32 |
146 | #define MASK_TYPE m256 |
147 | #include "noodle_engine_avx2.c" |
148 | #else |
149 | #define CHUNKSIZE 16 |
150 | #define MASK_TYPE m128 |
151 | #include "noodle_engine_sse.c" |
152 | #endif |
153 | |
154 | static really_inline |
155 | hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf, |
156 | size_t len, size_t start, bool noCase, |
157 | const struct cb_info *cbi) { |
158 | |
159 | const MASK_TYPE mask1 = getMask(n->key0, noCase); |
160 | const MASK_TYPE caseMask = getCaseMask(); |
161 | |
162 | size_t offset = start + n->msk_len - 1; |
163 | size_t end = len; |
164 | assert(offset < end); |
165 | |
166 | #if !defined(HAVE_AVX512) |
167 | hwlm_error_t rv; |
168 | |
169 | if (end - offset < CHUNKSIZE) { |
170 | rv = scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, offset, |
171 | end); |
172 | return rv; |
173 | } |
174 | |
175 | if (end - offset == CHUNKSIZE) { |
176 | rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, |
177 | cbi, offset, end); |
178 | return rv; |
179 | } |
180 | |
181 | uintptr_t data = (uintptr_t)buf; |
182 | uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data; |
183 | uintptr_t last = data + end; |
184 | uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data; |
185 | uintptr_t s3Start = end - CHUNKSIZE; |
186 | |
187 | if (offset != s2Start) { |
188 | // first scan out to the fast scan starting point |
189 | DEBUG_PRINTF("stage 1: -> %zu\n" , s2Start); |
190 | rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, |
191 | cbi, offset, s2Start); |
192 | RETURN_IF_TERMINATED(rv); |
193 | } |
194 | |
195 | if (likely(s2Start != s2End)) { |
196 | // scan as far as we can, bounded by the last point this key can |
197 | // possibly match |
198 | DEBUG_PRINTF("fast: ~ %zu -> %zu\n" , s2Start, s2End); |
199 | rv = scanSingleFast(n, buf, len, noCase, caseMask, mask1, cbi, s2Start, |
200 | s2End); |
201 | RETURN_IF_TERMINATED(rv); |
202 | } |
203 | |
204 | // if we are done bail out |
205 | if (s2End == len) { |
206 | return HWLM_SUCCESS; |
207 | } |
208 | |
209 | DEBUG_PRINTF("stage 3: %zu -> %zu\n" , s2End, len); |
210 | rv = scanSingleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, cbi, |
211 | s2End, len); |
212 | |
213 | return rv; |
214 | #else // HAVE_AVX512 |
215 | return scanSingle512(n, buf, len, noCase, caseMask, mask1, cbi, offset, |
216 | end); |
217 | #endif |
218 | } |
219 | |
220 | static really_inline |
221 | hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf, |
222 | size_t len, size_t start, bool noCase, |
223 | const struct cb_info *cbi) { |
224 | // we stop scanning for the key-fragment when the rest of the key can't |
225 | // possibly fit in the remaining buffer |
226 | size_t end = len - n->key_offset + 2; |
227 | |
228 | // the first place the key can match |
229 | size_t offset = start + n->msk_len - n->key_offset; |
230 | |
231 | const MASK_TYPE caseMask = getCaseMask(); |
232 | const MASK_TYPE mask1 = getMask(n->key0, noCase); |
233 | const MASK_TYPE mask2 = getMask(n->key1, noCase); |
234 | |
235 | #if !defined(HAVE_AVX512) |
236 | hwlm_error_t rv; |
237 | |
238 | if (end - offset < CHUNKSIZE) { |
239 | rv = scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, |
240 | offset, end); |
241 | return rv; |
242 | } |
243 | if (end - offset == CHUNKSIZE) { |
244 | rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, |
245 | mask2, cbi, offset, end); |
246 | return rv; |
247 | } |
248 | |
249 | uintptr_t data = (uintptr_t)buf; |
250 | uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data; |
251 | uintptr_t s1End = s2Start + 1; |
252 | uintptr_t last = data + end; |
253 | uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data; |
254 | uintptr_t s3Start = end - CHUNKSIZE; |
255 | uintptr_t off = offset; |
256 | |
257 | if (s2Start != off) { |
258 | // first scan out to the fast scan starting point plus one char past to |
259 | // catch the key on the overlap |
260 | DEBUG_PRINTF("stage 1: %zu -> %zu\n" , off, s2Start); |
261 | rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, |
262 | mask2, cbi, off, s1End); |
263 | RETURN_IF_TERMINATED(rv); |
264 | } |
265 | off = s1End; |
266 | |
267 | if (s2Start >= end) { |
268 | DEBUG_PRINTF("s2 == mL %zu\n" , end); |
269 | return HWLM_SUCCESS; |
270 | } |
271 | |
272 | if (likely(s2Start != s2End)) { |
273 | // scan as far as we can, bounded by the last point this key can |
274 | // possibly match |
275 | DEBUG_PRINTF("fast: ~ %zu -> %zu\n" , s2Start, s3Start); |
276 | rv = scanDoubleFast(n, buf, len, noCase, caseMask, mask1, mask2, cbi, |
277 | s2Start, s2End); |
278 | RETURN_IF_TERMINATED(rv); |
279 | off = s2End; |
280 | } |
281 | |
282 | // if there isn't enough data left to match the key, bail out |
283 | if (s2End == end) { |
284 | return HWLM_SUCCESS; |
285 | } |
286 | |
287 | DEBUG_PRINTF("stage 3: %zu -> %zu\n" , s3Start, end); |
288 | rv = scanDoubleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, |
289 | mask2, cbi, off, end); |
290 | |
291 | return rv; |
292 | #else // AVX512 |
293 | return scanDouble512(n, buf, len, noCase, caseMask, mask1, mask2, cbi, |
294 | offset, end); |
295 | #endif // AVX512 |
296 | } |
297 | |
298 | |
299 | static really_inline |
300 | hwlm_error_t scanSingleNoCase(const struct noodTable *n, const u8 *buf, |
301 | size_t len, size_t start, |
302 | const struct cb_info *cbi) { |
303 | return scanSingleMain(n, buf, len, start, 1, cbi); |
304 | } |
305 | |
306 | static really_inline |
307 | hwlm_error_t scanSingleCase(const struct noodTable *n, const u8 *buf, |
308 | size_t len, size_t start, |
309 | const struct cb_info *cbi) { |
310 | return scanSingleMain(n, buf, len, start, 0, cbi); |
311 | } |
312 | |
313 | // Single-character specialisation, used when keyLen = 1 |
314 | static really_inline |
315 | hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len, |
316 | size_t start, bool noCase, const struct cb_info *cbi) { |
317 | if (!ourisalpha(n->key0)) { |
318 | noCase = 0; // force noCase off if we don't have an alphabetic char |
319 | } |
320 | |
321 | // kinda ugly, but this forces constant propagation |
322 | if (noCase) { |
323 | return scanSingleNoCase(n, buf, len, start, cbi); |
324 | } else { |
325 | return scanSingleCase(n, buf, len, start, cbi); |
326 | } |
327 | } |
328 | |
329 | |
330 | static really_inline |
331 | hwlm_error_t scanDoubleNoCase(const struct noodTable *n, const u8 *buf, |
332 | size_t len, size_t start, |
333 | const struct cb_info *cbi) { |
334 | return scanDoubleMain(n, buf, len, start, 1, cbi); |
335 | } |
336 | |
337 | static really_inline |
338 | hwlm_error_t scanDoubleCase(const struct noodTable *n, const u8 *buf, |
339 | size_t len, size_t start, |
340 | const struct cb_info *cbi) { |
341 | return scanDoubleMain(n, buf, len, start, 0, cbi); |
342 | } |
343 | |
344 | |
345 | static really_inline |
346 | hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len, |
347 | size_t start, bool noCase, const struct cb_info *cbi) { |
348 | // kinda ugly, but this forces constant propagation |
349 | if (noCase) { |
350 | return scanDoubleNoCase(n, buf, len, start, cbi); |
351 | } else { |
352 | return scanDoubleCase(n, buf, len, start, cbi); |
353 | } |
354 | } |
355 | |
356 | // main entry point for the scan code |
357 | static really_inline |
358 | hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len, |
359 | size_t start, char single, bool noCase, |
360 | const struct cb_info *cbi) { |
361 | if (len - start < n->msk_len) { |
362 | // can't find string of length keyLen in a shorter buffer |
363 | return HWLM_SUCCESS; |
364 | } |
365 | |
366 | if (single) { |
367 | return scanSingle(n, buf, len, start, noCase, cbi); |
368 | } else { |
369 | return scanDouble(n, buf, len, start, noCase, cbi); |
370 | } |
371 | } |
372 | |
373 | /** \brief Block-mode scanner. */ |
374 | hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, |
375 | size_t start, HWLMCallback cb, |
376 | struct hs_scratch *scratch) { |
377 | assert(n && buf); |
378 | |
379 | struct cb_info cbi = {cb, n->id, scratch, 0}; |
380 | DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n" , len, n->msk_len, |
381 | (const char *)&n->cmp, buf); |
382 | |
383 | return scan(n, buf, len, start, n->single, n->nocase, &cbi); |
384 | } |
385 | |
386 | /** \brief Streaming-mode scanner. */ |
387 | hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, |
388 | size_t hlen, const u8 *buf, size_t len, |
389 | HWLMCallback cb, struct hs_scratch *scratch) { |
390 | assert(n); |
391 | |
392 | if (len + hlen < n->msk_len) { |
393 | DEBUG_PRINTF("not enough bytes for a match\n" ); |
394 | return HWLM_SUCCESS; |
395 | } |
396 | |
397 | struct cb_info cbi = {cb, n->id, scratch, 0}; |
398 | DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n" , len, hlen, |
399 | n->msk_len, (const char *)&n->cmp, buf); |
400 | |
401 | if (hlen && n->msk_len > 1) { |
402 | /* |
403 | * we have history, so build up a buffer from enough of the history |
404 | * buffer plus what we've been given to scan. Since this is relatively |
405 | * short, just check against msk+cmp per byte offset for matches. |
406 | */ |
407 | assert(hbuf); |
408 | u8 ALIGN_DIRECTIVE temp_buf[HWLM_LITERAL_MAX_LEN * 2]; |
409 | memset(temp_buf, 0, sizeof(temp_buf)); |
410 | |
411 | assert(n->msk_len); |
412 | size_t tl1 = MIN((size_t)n->msk_len - 1, hlen); |
413 | size_t tl2 = MIN((size_t)n->msk_len - 1, len); |
414 | |
415 | assert(tl1 + tl2 <= sizeof(temp_buf)); |
416 | assert(tl1 + tl2 >= n->msk_len); |
417 | assert(tl1 <= sizeof(u64a)); |
418 | assert(tl2 <= sizeof(u64a)); |
419 | DEBUG_PRINTF("using %zu bytes of hist and %zu bytes of buf\n" , tl1, tl2); |
420 | |
421 | unaligned_store_u64a(temp_buf, |
422 | partial_load_u64a(hbuf + hlen - tl1, tl1)); |
423 | unaligned_store_u64a(temp_buf + tl1, partial_load_u64a(buf, tl2)); |
424 | |
425 | for (size_t i = 0; i <= tl1 + tl2 - n->msk_len; i++) { |
426 | u64a v = unaligned_load_u64a(temp_buf + i); |
427 | if ((v & n->msk) == n->cmp) { |
428 | size_t m_end = -tl1 + i + n->msk_len - 1; |
429 | DEBUG_PRINTF("match @ %zu (i %zu)\n" , m_end, i); |
430 | hwlmcb_rv_t rv = cb(m_end, n->id, scratch); |
431 | if (rv == HWLM_TERMINATE_MATCHING) { |
432 | return HWLM_TERMINATED; |
433 | } |
434 | } |
435 | } |
436 | } |
437 | |
438 | assert(buf); |
439 | |
440 | cbi.offsetAdj = 0; |
441 | return scan(n, buf, len, 0, n->single, n->nocase, &cbi); |
442 | } |
443 | |