1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Noodle literal matcher: runtime.
31 */
32#include "hwlm.h"
33#include "noodle_engine.h"
34#include "noodle_internal.h"
35#include "scratch.h"
36#include "ue2common.h"
37#include "util/arch.h"
38#include "util/bitutils.h"
39#include "util/compare.h"
40#include "util/intrinsics.h"
41#include "util/join.h"
42#include "util/masked_move.h"
43#include "util/partial_store.h"
44#include "util/simd_utils.h"
45
46#include <ctype.h>
47#include <stdbool.h>
48#include <string.h>
49
50/** \brief Noodle runtime context. */
51struct cb_info {
52 HWLMCallback cb; //!< callback function called on match
53 u32 id; //!< ID to pass to callback on match
54 struct hs_scratch *scratch; //!< scratch to pass to callback
55 size_t offsetAdj; //!< used in streaming mode
56};
57
58#if defined(HAVE_AVX512)
59#define CHUNKSIZE 64
60#define MASK_TYPE m512
61#define Z_BITS 64
62#define Z_TYPE u64a
63#elif defined(HAVE_AVX2)
64#define CHUNKSIZE 32
65#define MASK_TYPE m256
66#define Z_BITS 32
67#define Z_TYPE u32
68#else
69#define CHUNKSIZE 16
70#define MASK_TYPE m128
71#define Z_BITS 32
72#define Z_TYPE u32
73#endif
74
75
76#define RETURN_IF_TERMINATED(x) \
77 { \
78 if ((x) == HWLM_TERMINATED) { \
79 return HWLM_TERMINATED; \
80 } \
81 }
82
83#define SINGLE_ZSCAN() \
84 do { \
85 while (unlikely(z)) { \
86 Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \
87 size_t matchPos = d - buf + pos; \
88 DEBUG_PRINTF("match pos %zu\n", matchPos); \
89 hwlmcb_rv_t rv = final(n, buf, len, 1, cbi, matchPos); \
90 RETURN_IF_TERMINATED(rv); \
91 } \
92 } while (0)
93
94#define DOUBLE_ZSCAN() \
95 do { \
96 while (unlikely(z)) { \
97 Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \
98 size_t matchPos = d - buf + pos - 1; \
99 DEBUG_PRINTF("match pos %zu\n", matchPos); \
100 hwlmcb_rv_t rv = final(n, buf, len, 0, cbi, matchPos); \
101 RETURN_IF_TERMINATED(rv); \
102 } \
103 } while (0)
104
105static really_inline
106u8 caseClear8(u8 x, bool noCase) {
107 return (u8)(noCase ? (x & (u8)0xdf) : x);
108}
109
110// Make sure the rest of the string is there. The single character scanner
111// is used only for single chars with case insensitivity used correctly,
112// so it can go straight to the callback if we get this far.
113static really_inline
114hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len,
115 char single, const struct cb_info *cbi, size_t pos) {
116 if (single) {
117 if (n->msk_len == 1) {
118 goto match;
119 }
120 }
121 assert(len >= n->msk_len);
122 u64a v =
123 partial_load_u64a(buf + pos + n->key_offset - n->msk_len, n->msk_len);
124 DEBUG_PRINTF("v %016llx msk %016llx cmp %016llx\n", v, n->msk, n->cmp);
125 if ((v & n->msk) != n->cmp) {
126 /* mask didn't match */
127 return HWLM_SUCCESS;
128 }
129
130match:
131 pos -= cbi->offsetAdj;
132 DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset);
133 hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->scratch);
134 if (rv == HWLM_TERMINATE_MATCHING) {
135 return HWLM_TERMINATED;
136 }
137 return HWLM_SUCCESS;
138}
139
140#if defined(HAVE_AVX512)
141#define CHUNKSIZE 64
142#define MASK_TYPE m512
143#include "noodle_engine_avx512.c"
144#elif defined(HAVE_AVX2)
145#define CHUNKSIZE 32
146#define MASK_TYPE m256
147#include "noodle_engine_avx2.c"
148#else
149#define CHUNKSIZE 16
150#define MASK_TYPE m128
151#include "noodle_engine_sse.c"
152#endif
153
154static really_inline
155hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf,
156 size_t len, size_t start, bool noCase,
157 const struct cb_info *cbi) {
158
159 const MASK_TYPE mask1 = getMask(n->key0, noCase);
160 const MASK_TYPE caseMask = getCaseMask();
161
162 size_t offset = start + n->msk_len - 1;
163 size_t end = len;
164 assert(offset < end);
165
166#if !defined(HAVE_AVX512)
167 hwlm_error_t rv;
168
169 if (end - offset < CHUNKSIZE) {
170 rv = scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, offset,
171 end);
172 return rv;
173 }
174
175 if (end - offset == CHUNKSIZE) {
176 rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
177 cbi, offset, end);
178 return rv;
179 }
180
181 uintptr_t data = (uintptr_t)buf;
182 uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data;
183 uintptr_t last = data + end;
184 uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
185 uintptr_t s3Start = end - CHUNKSIZE;
186
187 if (offset != s2Start) {
188 // first scan out to the fast scan starting point
189 DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
190 rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
191 cbi, offset, s2Start);
192 RETURN_IF_TERMINATED(rv);
193 }
194
195 if (likely(s2Start != s2End)) {
196 // scan as far as we can, bounded by the last point this key can
197 // possibly match
198 DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End);
199 rv = scanSingleFast(n, buf, len, noCase, caseMask, mask1, cbi, s2Start,
200 s2End);
201 RETURN_IF_TERMINATED(rv);
202 }
203
204 // if we are done bail out
205 if (s2End == len) {
206 return HWLM_SUCCESS;
207 }
208
209 DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, len);
210 rv = scanSingleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, cbi,
211 s2End, len);
212
213 return rv;
214#else // HAVE_AVX512
215 return scanSingle512(n, buf, len, noCase, caseMask, mask1, cbi, offset,
216 end);
217#endif
218}
219
220static really_inline
221hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf,
222 size_t len, size_t start, bool noCase,
223 const struct cb_info *cbi) {
224 // we stop scanning for the key-fragment when the rest of the key can't
225 // possibly fit in the remaining buffer
226 size_t end = len - n->key_offset + 2;
227
228 // the first place the key can match
229 size_t offset = start + n->msk_len - n->key_offset;
230
231 const MASK_TYPE caseMask = getCaseMask();
232 const MASK_TYPE mask1 = getMask(n->key0, noCase);
233 const MASK_TYPE mask2 = getMask(n->key1, noCase);
234
235#if !defined(HAVE_AVX512)
236 hwlm_error_t rv;
237
238 if (end - offset < CHUNKSIZE) {
239 rv = scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
240 offset, end);
241 return rv;
242 }
243 if (end - offset == CHUNKSIZE) {
244 rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
245 mask2, cbi, offset, end);
246 return rv;
247 }
248
249 uintptr_t data = (uintptr_t)buf;
250 uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data;
251 uintptr_t s1End = s2Start + 1;
252 uintptr_t last = data + end;
253 uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
254 uintptr_t s3Start = end - CHUNKSIZE;
255 uintptr_t off = offset;
256
257 if (s2Start != off) {
258 // first scan out to the fast scan starting point plus one char past to
259 // catch the key on the overlap
260 DEBUG_PRINTF("stage 1: %zu -> %zu\n", off, s2Start);
261 rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1,
262 mask2, cbi, off, s1End);
263 RETURN_IF_TERMINATED(rv);
264 }
265 off = s1End;
266
267 if (s2Start >= end) {
268 DEBUG_PRINTF("s2 == mL %zu\n", end);
269 return HWLM_SUCCESS;
270 }
271
272 if (likely(s2Start != s2End)) {
273 // scan as far as we can, bounded by the last point this key can
274 // possibly match
275 DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start);
276 rv = scanDoubleFast(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
277 s2Start, s2End);
278 RETURN_IF_TERMINATED(rv);
279 off = s2End;
280 }
281
282 // if there isn't enough data left to match the key, bail out
283 if (s2End == end) {
284 return HWLM_SUCCESS;
285 }
286
287 DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end);
288 rv = scanDoubleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1,
289 mask2, cbi, off, end);
290
291 return rv;
292#else // AVX512
293 return scanDouble512(n, buf, len, noCase, caseMask, mask1, mask2, cbi,
294 offset, end);
295#endif // AVX512
296}
297
298
299static really_inline
300hwlm_error_t scanSingleNoCase(const struct noodTable *n, const u8 *buf,
301 size_t len, size_t start,
302 const struct cb_info *cbi) {
303 return scanSingleMain(n, buf, len, start, 1, cbi);
304}
305
306static really_inline
307hwlm_error_t scanSingleCase(const struct noodTable *n, const u8 *buf,
308 size_t len, size_t start,
309 const struct cb_info *cbi) {
310 return scanSingleMain(n, buf, len, start, 0, cbi);
311}
312
313// Single-character specialisation, used when keyLen = 1
314static really_inline
315hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len,
316 size_t start, bool noCase, const struct cb_info *cbi) {
317 if (!ourisalpha(n->key0)) {
318 noCase = 0; // force noCase off if we don't have an alphabetic char
319 }
320
321 // kinda ugly, but this forces constant propagation
322 if (noCase) {
323 return scanSingleNoCase(n, buf, len, start, cbi);
324 } else {
325 return scanSingleCase(n, buf, len, start, cbi);
326 }
327}
328
329
330static really_inline
331hwlm_error_t scanDoubleNoCase(const struct noodTable *n, const u8 *buf,
332 size_t len, size_t start,
333 const struct cb_info *cbi) {
334 return scanDoubleMain(n, buf, len, start, 1, cbi);
335}
336
337static really_inline
338hwlm_error_t scanDoubleCase(const struct noodTable *n, const u8 *buf,
339 size_t len, size_t start,
340 const struct cb_info *cbi) {
341 return scanDoubleMain(n, buf, len, start, 0, cbi);
342}
343
344
345static really_inline
346hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len,
347 size_t start, bool noCase, const struct cb_info *cbi) {
348 // kinda ugly, but this forces constant propagation
349 if (noCase) {
350 return scanDoubleNoCase(n, buf, len, start, cbi);
351 } else {
352 return scanDoubleCase(n, buf, len, start, cbi);
353 }
354}
355
356// main entry point for the scan code
357static really_inline
358hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len,
359 size_t start, char single, bool noCase,
360 const struct cb_info *cbi) {
361 if (len - start < n->msk_len) {
362 // can't find string of length keyLen in a shorter buffer
363 return HWLM_SUCCESS;
364 }
365
366 if (single) {
367 return scanSingle(n, buf, len, start, noCase, cbi);
368 } else {
369 return scanDouble(n, buf, len, start, noCase, cbi);
370 }
371}
372
373/** \brief Block-mode scanner. */
374hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
375 size_t start, HWLMCallback cb,
376 struct hs_scratch *scratch) {
377 assert(n && buf);
378
379 struct cb_info cbi = {cb, n->id, scratch, 0};
380 DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n", len, n->msk_len,
381 (const char *)&n->cmp, buf);
382
383 return scan(n, buf, len, start, n->single, n->nocase, &cbi);
384}
385
386/** \brief Streaming-mode scanner. */
387hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
388 size_t hlen, const u8 *buf, size_t len,
389 HWLMCallback cb, struct hs_scratch *scratch) {
390 assert(n);
391
392 if (len + hlen < n->msk_len) {
393 DEBUG_PRINTF("not enough bytes for a match\n");
394 return HWLM_SUCCESS;
395 }
396
397 struct cb_info cbi = {cb, n->id, scratch, 0};
398 DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n", len, hlen,
399 n->msk_len, (const char *)&n->cmp, buf);
400
401 if (hlen && n->msk_len > 1) {
402 /*
403 * we have history, so build up a buffer from enough of the history
404 * buffer plus what we've been given to scan. Since this is relatively
405 * short, just check against msk+cmp per byte offset for matches.
406 */
407 assert(hbuf);
408 u8 ALIGN_DIRECTIVE temp_buf[HWLM_LITERAL_MAX_LEN * 2];
409 memset(temp_buf, 0, sizeof(temp_buf));
410
411 assert(n->msk_len);
412 size_t tl1 = MIN((size_t)n->msk_len - 1, hlen);
413 size_t tl2 = MIN((size_t)n->msk_len - 1, len);
414
415 assert(tl1 + tl2 <= sizeof(temp_buf));
416 assert(tl1 + tl2 >= n->msk_len);
417 assert(tl1 <= sizeof(u64a));
418 assert(tl2 <= sizeof(u64a));
419 DEBUG_PRINTF("using %zu bytes of hist and %zu bytes of buf\n", tl1, tl2);
420
421 unaligned_store_u64a(temp_buf,
422 partial_load_u64a(hbuf + hlen - tl1, tl1));
423 unaligned_store_u64a(temp_buf + tl1, partial_load_u64a(buf, tl2));
424
425 for (size_t i = 0; i <= tl1 + tl2 - n->msk_len; i++) {
426 u64a v = unaligned_load_u64a(temp_buf + i);
427 if ((v & n->msk) == n->cmp) {
428 size_t m_end = -tl1 + i + n->msk_len - 1;
429 DEBUG_PRINTF("match @ %zu (i %zu)\n", m_end, i);
430 hwlmcb_rv_t rv = cb(m_end, n->id, scratch);
431 if (rv == HWLM_TERMINATE_MATCHING) {
432 return HWLM_TERMINATED;
433 }
434 }
435 }
436 }
437
438 assert(buf);
439
440 cbi.offsetAdj = 0;
441 return scan(n, buf, len, 0, n->single, n->nocase, &cbi);
442}
443