1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Large Bounded Repeat (LBR) engine: runtime code. |
31 | */ |
32 | #include "lbr.h" |
33 | |
34 | #include "lbr_internal.h" |
35 | #include "nfa_api.h" |
36 | #include "nfa_api_queue.h" |
37 | #include "nfa_internal.h" |
38 | #include "repeat.h" |
39 | #include "repeat_internal.h" |
40 | #include "shufti.h" |
41 | #include "truffle.h" |
42 | #include "vermicelli.h" |
43 | #include "util/partial_store.h" |
44 | #include "util/unaligned.h" |
45 | |
46 | /** \brief Sentinel value used to indicate that a repeat is dead/empty/unused. |
47 | * * */ |
48 | #define REPEAT_DEAD 0xffffffffffffffffull |
49 | |
50 | enum MatchMode { |
51 | CALLBACK_OUTPUT, |
52 | STOP_AT_MATCH, |
53 | }; |
54 | |
55 | static really_inline |
56 | const struct RepeatInfo *getRepeatInfo(const struct lbr_common *l) { |
57 | const struct RepeatInfo *repeatInfo = |
58 | (const struct RepeatInfo *)((const char *)l + l->repeatInfoOffset); |
59 | return repeatInfo; |
60 | } |
61 | |
62 | static really_inline |
63 | void lbrCompressState(const struct lbr_common *l, u64a offset, |
64 | const struct lbr_state *lstate, char *stream_state) { |
65 | assert(l && lstate && stream_state); |
66 | assert(ISALIGNED(lstate)); |
67 | |
68 | const struct RepeatInfo *info = getRepeatInfo(l); |
69 | repeatPack(stream_state, info, &lstate->ctrl, offset); |
70 | } |
71 | |
72 | static really_inline |
73 | void lbrExpandState(const struct lbr_common *l, u64a offset, |
74 | const char *stream_state, struct lbr_state *lstate) { |
75 | assert(l && stream_state && lstate); |
76 | assert(ISALIGNED(lstate)); |
77 | |
78 | const struct RepeatInfo *info = getRepeatInfo(l); |
79 | repeatUnpack(stream_state, info, offset, &lstate->ctrl); |
80 | lstate->lastEscape = 0; |
81 | } |
82 | |
83 | static really_inline |
84 | void clearRepeat(const struct RepeatInfo *info, struct lbr_state *lstate) { |
85 | assert(info && lstate); |
86 | |
87 | DEBUG_PRINTF("clear repeat at %p\n" , lstate); |
88 | |
89 | switch ((enum RepeatType)info->type) { |
90 | case REPEAT_RING: |
91 | lstate->ctrl.ring.offset = REPEAT_DEAD; |
92 | break; |
93 | case REPEAT_RANGE: |
94 | lstate->ctrl.range.offset = REPEAT_DEAD; |
95 | break; |
96 | case REPEAT_FIRST: |
97 | case REPEAT_LAST: |
98 | lstate->ctrl.offset.offset = REPEAT_DEAD; |
99 | break; |
100 | case REPEAT_BITMAP: |
101 | lstate->ctrl.bitmap.offset = REPEAT_DEAD; |
102 | break; |
103 | case REPEAT_SPARSE_OPTIMAL_P: |
104 | lstate->ctrl.ring.offset = REPEAT_DEAD; |
105 | break; |
106 | case REPEAT_TRAILER: |
107 | lstate->ctrl.trailer.offset = REPEAT_DEAD; |
108 | break; |
109 | default: |
110 | assert(0); |
111 | break; |
112 | } |
113 | } |
114 | |
115 | static really_inline |
116 | char repeatIsDead(const struct RepeatInfo *info, |
117 | const struct lbr_state *lstate) { |
118 | assert(info && lstate); |
119 | |
120 | switch ((enum RepeatType)info->type) { |
121 | case REPEAT_RING: |
122 | return lstate->ctrl.ring.offset == REPEAT_DEAD; |
123 | case REPEAT_RANGE: |
124 | return lstate->ctrl.range.offset == REPEAT_DEAD; |
125 | case REPEAT_FIRST: |
126 | case REPEAT_LAST: |
127 | return lstate->ctrl.offset.offset == REPEAT_DEAD; |
128 | case REPEAT_BITMAP: |
129 | return lstate->ctrl.bitmap.offset == REPEAT_DEAD; |
130 | case REPEAT_SPARSE_OPTIMAL_P: |
131 | return lstate->ctrl.ring.offset == REPEAT_DEAD; |
132 | case REPEAT_TRAILER: |
133 | return lstate->ctrl.trailer.offset == REPEAT_DEAD; |
134 | case REPEAT_ALWAYS: |
135 | assert(!"REPEAT_ALWAYS should only be used by Castle" ); |
136 | return 0; |
137 | } |
138 | |
139 | assert(0); |
140 | return 1; |
141 | } |
142 | |
143 | /** Returns true if the LBR can produce matches at offsets greater than the |
144 | * given one. TODO: can this be combined with lbrIsActive? */ |
145 | static really_inline |
146 | char lbrIsAlive(const struct lbr_common *l, const struct lbr_state *lstate, |
147 | const char *state, u64a offset) { |
148 | assert(l && lstate && state); |
149 | |
150 | const struct RepeatInfo *info = getRepeatInfo(l); |
151 | if (repeatIsDead(info, lstate)) { |
152 | DEBUG_PRINTF("repeat is dead\n" ); |
153 | return 0; |
154 | } |
155 | |
156 | if (info->repeatMax == REPEAT_INF) { |
157 | DEBUG_PRINTF("active repeat with inf max bound, alive\n" ); |
158 | return 1; |
159 | } |
160 | |
161 | assert(info->repeatMax < REPEAT_INF); |
162 | const char *repeatState = state + info->packedCtrlSize; |
163 | u64a lastTop = repeatLastTop(info, &lstate->ctrl, repeatState); |
164 | if (offset < lastTop + info->repeatMax) { |
165 | DEBUG_PRINTF("alive, as we can still produce matches after %llu\n" , |
166 | offset); |
167 | return 1; |
168 | } |
169 | |
170 | DEBUG_PRINTF("dead\n" ); |
171 | return 0; |
172 | } |
173 | |
174 | /** Returns true if the LBR is matching at the given offset or it could produce |
175 | * a match in the future. */ |
176 | static really_inline |
177 | char lbrIsActive(const struct lbr_common *l, const struct lbr_state *lstate, |
178 | const char *state, u64a offset) { |
179 | assert(l && lstate && state); |
180 | const struct RepeatInfo *info = getRepeatInfo(l); |
181 | assert(!repeatIsDead(info, lstate)); // Guaranteed by caller. |
182 | |
183 | const char *repeatState = state + info->packedCtrlSize; |
184 | if (repeatHasMatch(info, &lstate->ctrl, repeatState, offset) == |
185 | REPEAT_MATCH) { |
186 | DEBUG_PRINTF("currently matching\n" ); |
187 | return 1; |
188 | } |
189 | |
190 | u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, offset); |
191 | if (i != 0) { |
192 | DEBUG_PRINTF("active, next match is at %llu\n" , i); |
193 | return 1; |
194 | } |
195 | |
196 | DEBUG_PRINTF("no more matches\n" ); |
197 | return 0; |
198 | } |
199 | |
200 | static really_inline |
201 | void lbrTop(const struct lbr_common *l, struct lbr_state *lstate, char *state, |
202 | u64a offset) { |
203 | assert(l && lstate && state); |
204 | DEBUG_PRINTF("top at %llu\n" , offset); |
205 | |
206 | const struct RepeatInfo *info = getRepeatInfo(l); |
207 | char *repeatState = state + info->packedCtrlSize; |
208 | |
209 | char is_alive = !repeatIsDead(info, lstate); |
210 | if (is_alive) { |
211 | // Ignore duplicate TOPs. |
212 | u64a last = repeatLastTop(info, &lstate->ctrl, repeatState); |
213 | assert(last <= offset); |
214 | if (last == offset) { |
215 | return; |
216 | } |
217 | } |
218 | |
219 | repeatStore(info, &lstate->ctrl, repeatState, offset, is_alive); |
220 | } |
221 | |
222 | static really_inline |
223 | char lbrInAccept(const struct lbr_common *l, const struct lbr_state *lstate, |
224 | const char *state, u64a offset, ReportID report) { |
225 | assert(l && lstate && state); |
226 | DEBUG_PRINTF("offset=%llu, report=%u\n" , offset, report); |
227 | |
228 | if (report != l->report) { |
229 | DEBUG_PRINTF("report=%u is not LBR report %u\n" , report, l->report); |
230 | return 0; |
231 | } |
232 | |
233 | const struct RepeatInfo *info = getRepeatInfo(l); |
234 | assert(!repeatIsDead(info, lstate)); // Guaranteed by caller. |
235 | |
236 | const char *repeatState = state + info->packedCtrlSize; |
237 | return repeatHasMatch(info, &lstate->ctrl, repeatState, offset) == |
238 | REPEAT_MATCH; |
239 | } |
240 | |
241 | static really_inline |
242 | char lbrFindMatch(const struct lbr_common *l, const u64a begin, const u64a end, |
243 | const struct lbr_state *lstate, const char *state, |
244 | size_t *mloc) { |
245 | DEBUG_PRINTF("begin=%llu, end=%llu\n" , begin, end); |
246 | assert(begin <= end); |
247 | |
248 | if (begin == end) { |
249 | return 0; |
250 | } |
251 | |
252 | const struct RepeatInfo *info = getRepeatInfo(l); |
253 | const char *repeatState = state + info->packedCtrlSize; |
254 | u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, begin); |
255 | if (i == 0) { |
256 | DEBUG_PRINTF("no more matches\n" ); |
257 | return 0; |
258 | } |
259 | if (i > end) { |
260 | DEBUG_PRINTF("next match at %llu is beyond the horizon\n" , i); |
261 | return 0; |
262 | } |
263 | |
264 | DEBUG_PRINTF("stop at match at %llu\n" , i); |
265 | assert(mloc); |
266 | *mloc = i - begin; |
267 | return 1; |
268 | } |
269 | |
270 | static really_inline |
271 | char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end, |
272 | const struct lbr_state *lstate, const char *state, |
273 | NfaCallback cb, void *ctx) { |
274 | DEBUG_PRINTF("begin=%llu, end=%llu\n" , begin, end); |
275 | assert(begin <= end); |
276 | |
277 | if (begin == end) { |
278 | return MO_CONTINUE_MATCHING; |
279 | } |
280 | |
281 | const struct RepeatInfo *info = getRepeatInfo(l); |
282 | const char *repeatState = state + info->packedCtrlSize; |
283 | |
284 | u64a i = begin; |
285 | for (;;) { |
286 | i = repeatNextMatch(info, &lstate->ctrl, repeatState, i); |
287 | if (i == 0) { |
288 | DEBUG_PRINTF("no more matches\n" ); |
289 | return MO_CONTINUE_MATCHING; |
290 | } |
291 | if (i > end) { |
292 | DEBUG_PRINTF("next match at %llu is beyond the horizon\n" , i); |
293 | return MO_CONTINUE_MATCHING; |
294 | } |
295 | |
296 | DEBUG_PRINTF("firing match at %llu\n" , i); |
297 | if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) { |
298 | return MO_HALT_MATCHING; |
299 | } |
300 | } |
301 | |
302 | assert(0); |
303 | return MO_CONTINUE_MATCHING; |
304 | } |
305 | |
306 | static really_inline |
307 | char lbrRevScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, |
308 | UNUSED size_t begin, UNUSED size_t end, |
309 | UNUSED size_t *loc) { |
310 | assert(begin <= end); |
311 | assert(nfa->type == LBR_NFA_DOT); |
312 | // Nothing can kill a dot! |
313 | return 0; |
314 | } |
315 | |
316 | static really_inline |
317 | char lbrRevScanVerm(const struct NFA *nfa, const u8 *buf, |
318 | size_t begin, size_t end, size_t *loc) { |
319 | assert(begin <= end); |
320 | assert(nfa->type == LBR_NFA_VERM); |
321 | const struct lbr_verm *l = getImplNfa(nfa); |
322 | |
323 | if (begin == end) { |
324 | return 0; |
325 | } |
326 | |
327 | const u8 *ptr = rvermicelliExec(l->c, 0, buf + begin, buf + end); |
328 | if (ptr == buf + begin - 1) { |
329 | DEBUG_PRINTF("no escape found\n" ); |
330 | return 0; |
331 | } |
332 | |
333 | assert(loc); |
334 | *loc = (size_t)(ptr - buf); |
335 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
336 | assert((char)*ptr == l->c); |
337 | return 1; |
338 | } |
339 | |
340 | static really_inline |
341 | char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf, |
342 | size_t begin, size_t end, size_t *loc) { |
343 | assert(begin <= end); |
344 | assert(nfa->type == LBR_NFA_NVERM); |
345 | const struct lbr_verm *l = getImplNfa(nfa); |
346 | |
347 | if (begin == end) { |
348 | return 0; |
349 | } |
350 | |
351 | const u8 *ptr = rnvermicelliExec(l->c, 0, buf + begin, buf + end); |
352 | if (ptr == buf + begin - 1) { |
353 | DEBUG_PRINTF("no escape found\n" ); |
354 | return 0; |
355 | } |
356 | |
357 | assert(loc); |
358 | *loc = (size_t)(ptr - buf); |
359 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
360 | assert((char)*ptr != l->c); |
361 | return 1; |
362 | } |
363 | |
364 | static really_inline |
365 | char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf, |
366 | size_t begin, size_t end, |
367 | size_t *loc) { |
368 | assert(begin <= end); |
369 | assert(nfa->type == LBR_NFA_SHUF); |
370 | const struct lbr_shuf *l = getImplNfa(nfa); |
371 | |
372 | if (begin == end) { |
373 | return 0; |
374 | } |
375 | |
376 | const u8 *ptr = rshuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end); |
377 | if (ptr == buf + begin - 1) { |
378 | DEBUG_PRINTF("no escape found\n" ); |
379 | return 0; |
380 | } |
381 | |
382 | assert(loc); |
383 | *loc = (size_t)(ptr - buf); |
384 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
385 | return 1; |
386 | } |
387 | |
388 | static really_inline |
389 | char lbrRevScanTruf(const struct NFA *nfa, const u8 *buf, |
390 | size_t begin, size_t end, |
391 | size_t *loc) { |
392 | assert(begin <= end); |
393 | assert(nfa->type == LBR_NFA_TRUF); |
394 | const struct lbr_truf *l = getImplNfa(nfa); |
395 | |
396 | if (begin == end) { |
397 | return 0; |
398 | } |
399 | |
400 | const u8 *ptr = rtruffleExec(l->mask1, l->mask2, buf + begin, buf + end); |
401 | if (ptr == buf + begin - 1) { |
402 | DEBUG_PRINTF("no escape found\n" ); |
403 | return 0; |
404 | } |
405 | |
406 | assert(loc); |
407 | *loc = (size_t)(ptr - buf); |
408 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
409 | return 1; |
410 | } |
411 | |
412 | static really_inline |
413 | char lbrFwdScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, |
414 | UNUSED size_t begin, UNUSED size_t end, |
415 | UNUSED size_t *loc) { |
416 | assert(begin <= end); |
417 | assert(nfa->type == LBR_NFA_DOT); |
418 | // Nothing can kill a dot! |
419 | return 0; |
420 | } |
421 | |
422 | static really_inline |
423 | char lbrFwdScanVerm(const struct NFA *nfa, const u8 *buf, |
424 | size_t begin, size_t end, size_t *loc) { |
425 | assert(begin <= end); |
426 | assert(nfa->type == LBR_NFA_VERM); |
427 | const struct lbr_verm *l = getImplNfa(nfa); |
428 | |
429 | if (begin == end) { |
430 | return 0; |
431 | } |
432 | |
433 | const u8 *ptr = vermicelliExec(l->c, 0, buf + begin, buf + end); |
434 | if (ptr == buf + end) { |
435 | DEBUG_PRINTF("no escape found\n" ); |
436 | return 0; |
437 | } |
438 | |
439 | assert(loc); |
440 | *loc = (size_t)(ptr - buf); |
441 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
442 | assert((char)*ptr == l->c); |
443 | return 1; |
444 | } |
445 | |
446 | static really_inline |
447 | char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf, |
448 | size_t begin, size_t end, size_t *loc) { |
449 | assert(begin <= end); |
450 | assert(nfa->type == LBR_NFA_NVERM); |
451 | const struct lbr_verm *l = getImplNfa(nfa); |
452 | |
453 | if (begin == end) { |
454 | return 0; |
455 | } |
456 | |
457 | const u8 *ptr = nvermicelliExec(l->c, 0, buf + begin, buf + end); |
458 | if (ptr == buf + end) { |
459 | DEBUG_PRINTF("no escape found\n" ); |
460 | return 0; |
461 | } |
462 | |
463 | assert(loc); |
464 | *loc = (size_t)(ptr - buf); |
465 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
466 | assert((char)*ptr != l->c); |
467 | return 1; |
468 | } |
469 | |
470 | static really_inline |
471 | char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf, |
472 | size_t begin, size_t end, |
473 | size_t *loc) { |
474 | assert(begin <= end); |
475 | assert(nfa->type == LBR_NFA_SHUF); |
476 | const struct lbr_shuf *l = getImplNfa(nfa); |
477 | |
478 | if (begin == end) { |
479 | return 0; |
480 | } |
481 | |
482 | const u8 *ptr = shuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end); |
483 | if (ptr == buf + end) { |
484 | DEBUG_PRINTF("no escape found\n" ); |
485 | return 0; |
486 | } |
487 | |
488 | assert(loc); |
489 | *loc = (size_t)(ptr - buf); |
490 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
491 | return 1; |
492 | } |
493 | |
494 | static really_inline |
495 | char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf, |
496 | size_t begin, size_t end, |
497 | size_t *loc) { |
498 | assert(begin <= end); |
499 | assert(nfa->type == LBR_NFA_TRUF); |
500 | const struct lbr_truf *l = getImplNfa(nfa); |
501 | |
502 | if (begin == end) { |
503 | return 0; |
504 | } |
505 | |
506 | const u8 *ptr = truffleExec(l->mask1, l->mask2, buf + begin, buf + end); |
507 | if (ptr == buf + end) { |
508 | DEBUG_PRINTF("no escape found\n" ); |
509 | return 0; |
510 | } |
511 | |
512 | assert(loc); |
513 | *loc = (size_t)(ptr - buf); |
514 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
515 | return 1; |
516 | } |
517 | |
518 | #define ENGINE_ROOT_NAME Dot |
519 | #include "lbr_common_impl.h" |
520 | |
521 | #define ENGINE_ROOT_NAME Verm |
522 | #include "lbr_common_impl.h" |
523 | |
524 | #define ENGINE_ROOT_NAME NVerm |
525 | #include "lbr_common_impl.h" |
526 | |
527 | #define ENGINE_ROOT_NAME Shuf |
528 | #include "lbr_common_impl.h" |
529 | |
530 | #define ENGINE_ROOT_NAME Truf |
531 | #include "lbr_common_impl.h" |
532 | |