1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Large Bounded Repeat (LBR) engine: runtime code.
31 */
32#include "lbr.h"
33
34#include "lbr_internal.h"
35#include "nfa_api.h"
36#include "nfa_api_queue.h"
37#include "nfa_internal.h"
38#include "repeat.h"
39#include "repeat_internal.h"
40#include "shufti.h"
41#include "truffle.h"
42#include "vermicelli.h"
43#include "util/partial_store.h"
44#include "util/unaligned.h"
45
46/** \brief Sentinel value used to indicate that a repeat is dead/empty/unused.
47 * * */
48#define REPEAT_DEAD 0xffffffffffffffffull
49
50enum MatchMode {
51 CALLBACK_OUTPUT,
52 STOP_AT_MATCH,
53};
54
55static really_inline
56const struct RepeatInfo *getRepeatInfo(const struct lbr_common *l) {
57 const struct RepeatInfo *repeatInfo =
58 (const struct RepeatInfo *)((const char *)l + l->repeatInfoOffset);
59 return repeatInfo;
60}
61
62static really_inline
63void lbrCompressState(const struct lbr_common *l, u64a offset,
64 const struct lbr_state *lstate, char *stream_state) {
65 assert(l && lstate && stream_state);
66 assert(ISALIGNED(lstate));
67
68 const struct RepeatInfo *info = getRepeatInfo(l);
69 repeatPack(stream_state, info, &lstate->ctrl, offset);
70}
71
72static really_inline
73void lbrExpandState(const struct lbr_common *l, u64a offset,
74 const char *stream_state, struct lbr_state *lstate) {
75 assert(l && stream_state && lstate);
76 assert(ISALIGNED(lstate));
77
78 const struct RepeatInfo *info = getRepeatInfo(l);
79 repeatUnpack(stream_state, info, offset, &lstate->ctrl);
80 lstate->lastEscape = 0;
81}
82
83static really_inline
84void clearRepeat(const struct RepeatInfo *info, struct lbr_state *lstate) {
85 assert(info && lstate);
86
87 DEBUG_PRINTF("clear repeat at %p\n", lstate);
88
89 switch ((enum RepeatType)info->type) {
90 case REPEAT_RING:
91 lstate->ctrl.ring.offset = REPEAT_DEAD;
92 break;
93 case REPEAT_RANGE:
94 lstate->ctrl.range.offset = REPEAT_DEAD;
95 break;
96 case REPEAT_FIRST:
97 case REPEAT_LAST:
98 lstate->ctrl.offset.offset = REPEAT_DEAD;
99 break;
100 case REPEAT_BITMAP:
101 lstate->ctrl.bitmap.offset = REPEAT_DEAD;
102 break;
103 case REPEAT_SPARSE_OPTIMAL_P:
104 lstate->ctrl.ring.offset = REPEAT_DEAD;
105 break;
106 case REPEAT_TRAILER:
107 lstate->ctrl.trailer.offset = REPEAT_DEAD;
108 break;
109 default:
110 assert(0);
111 break;
112 }
113}
114
115static really_inline
116char repeatIsDead(const struct RepeatInfo *info,
117 const struct lbr_state *lstate) {
118 assert(info && lstate);
119
120 switch ((enum RepeatType)info->type) {
121 case REPEAT_RING:
122 return lstate->ctrl.ring.offset == REPEAT_DEAD;
123 case REPEAT_RANGE:
124 return lstate->ctrl.range.offset == REPEAT_DEAD;
125 case REPEAT_FIRST:
126 case REPEAT_LAST:
127 return lstate->ctrl.offset.offset == REPEAT_DEAD;
128 case REPEAT_BITMAP:
129 return lstate->ctrl.bitmap.offset == REPEAT_DEAD;
130 case REPEAT_SPARSE_OPTIMAL_P:
131 return lstate->ctrl.ring.offset == REPEAT_DEAD;
132 case REPEAT_TRAILER:
133 return lstate->ctrl.trailer.offset == REPEAT_DEAD;
134 case REPEAT_ALWAYS:
135 assert(!"REPEAT_ALWAYS should only be used by Castle");
136 return 0;
137 }
138
139 assert(0);
140 return 1;
141}
142
143/** Returns true if the LBR can produce matches at offsets greater than the
144 * given one. TODO: can this be combined with lbrIsActive? */
145static really_inline
146char lbrIsAlive(const struct lbr_common *l, const struct lbr_state *lstate,
147 const char *state, u64a offset) {
148 assert(l && lstate && state);
149
150 const struct RepeatInfo *info = getRepeatInfo(l);
151 if (repeatIsDead(info, lstate)) {
152 DEBUG_PRINTF("repeat is dead\n");
153 return 0;
154 }
155
156 if (info->repeatMax == REPEAT_INF) {
157 DEBUG_PRINTF("active repeat with inf max bound, alive\n");
158 return 1;
159 }
160
161 assert(info->repeatMax < REPEAT_INF);
162 const char *repeatState = state + info->packedCtrlSize;
163 u64a lastTop = repeatLastTop(info, &lstate->ctrl, repeatState);
164 if (offset < lastTop + info->repeatMax) {
165 DEBUG_PRINTF("alive, as we can still produce matches after %llu\n",
166 offset);
167 return 1;
168 }
169
170 DEBUG_PRINTF("dead\n");
171 return 0;
172}
173
174/** Returns true if the LBR is matching at the given offset or it could produce
175 * a match in the future. */
176static really_inline
177char lbrIsActive(const struct lbr_common *l, const struct lbr_state *lstate,
178 const char *state, u64a offset) {
179 assert(l && lstate && state);
180 const struct RepeatInfo *info = getRepeatInfo(l);
181 assert(!repeatIsDead(info, lstate)); // Guaranteed by caller.
182
183 const char *repeatState = state + info->packedCtrlSize;
184 if (repeatHasMatch(info, &lstate->ctrl, repeatState, offset) ==
185 REPEAT_MATCH) {
186 DEBUG_PRINTF("currently matching\n");
187 return 1;
188 }
189
190 u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, offset);
191 if (i != 0) {
192 DEBUG_PRINTF("active, next match is at %llu\n", i);
193 return 1;
194 }
195
196 DEBUG_PRINTF("no more matches\n");
197 return 0;
198}
199
200static really_inline
201void lbrTop(const struct lbr_common *l, struct lbr_state *lstate, char *state,
202 u64a offset) {
203 assert(l && lstate && state);
204 DEBUG_PRINTF("top at %llu\n", offset);
205
206 const struct RepeatInfo *info = getRepeatInfo(l);
207 char *repeatState = state + info->packedCtrlSize;
208
209 char is_alive = !repeatIsDead(info, lstate);
210 if (is_alive) {
211 // Ignore duplicate TOPs.
212 u64a last = repeatLastTop(info, &lstate->ctrl, repeatState);
213 assert(last <= offset);
214 if (last == offset) {
215 return;
216 }
217 }
218
219 repeatStore(info, &lstate->ctrl, repeatState, offset, is_alive);
220}
221
222static really_inline
223char lbrInAccept(const struct lbr_common *l, const struct lbr_state *lstate,
224 const char *state, u64a offset, ReportID report) {
225 assert(l && lstate && state);
226 DEBUG_PRINTF("offset=%llu, report=%u\n", offset, report);
227
228 if (report != l->report) {
229 DEBUG_PRINTF("report=%u is not LBR report %u\n", report, l->report);
230 return 0;
231 }
232
233 const struct RepeatInfo *info = getRepeatInfo(l);
234 assert(!repeatIsDead(info, lstate)); // Guaranteed by caller.
235
236 const char *repeatState = state + info->packedCtrlSize;
237 return repeatHasMatch(info, &lstate->ctrl, repeatState, offset) ==
238 REPEAT_MATCH;
239}
240
241static really_inline
242char lbrFindMatch(const struct lbr_common *l, const u64a begin, const u64a end,
243 const struct lbr_state *lstate, const char *state,
244 size_t *mloc) {
245 DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
246 assert(begin <= end);
247
248 if (begin == end) {
249 return 0;
250 }
251
252 const struct RepeatInfo *info = getRepeatInfo(l);
253 const char *repeatState = state + info->packedCtrlSize;
254 u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, begin);
255 if (i == 0) {
256 DEBUG_PRINTF("no more matches\n");
257 return 0;
258 }
259 if (i > end) {
260 DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i);
261 return 0;
262 }
263
264 DEBUG_PRINTF("stop at match at %llu\n", i);
265 assert(mloc);
266 *mloc = i - begin;
267 return 1;
268}
269
270static really_inline
271char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end,
272 const struct lbr_state *lstate, const char *state,
273 NfaCallback cb, void *ctx) {
274 DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
275 assert(begin <= end);
276
277 if (begin == end) {
278 return MO_CONTINUE_MATCHING;
279 }
280
281 const struct RepeatInfo *info = getRepeatInfo(l);
282 const char *repeatState = state + info->packedCtrlSize;
283
284 u64a i = begin;
285 for (;;) {
286 i = repeatNextMatch(info, &lstate->ctrl, repeatState, i);
287 if (i == 0) {
288 DEBUG_PRINTF("no more matches\n");
289 return MO_CONTINUE_MATCHING;
290 }
291 if (i > end) {
292 DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i);
293 return MO_CONTINUE_MATCHING;
294 }
295
296 DEBUG_PRINTF("firing match at %llu\n", i);
297 if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) {
298 return MO_HALT_MATCHING;
299 }
300 }
301
302 assert(0);
303 return MO_CONTINUE_MATCHING;
304}
305
306static really_inline
307char lbrRevScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf,
308 UNUSED size_t begin, UNUSED size_t end,
309 UNUSED size_t *loc) {
310 assert(begin <= end);
311 assert(nfa->type == LBR_NFA_DOT);
312 // Nothing can kill a dot!
313 return 0;
314}
315
316static really_inline
317char lbrRevScanVerm(const struct NFA *nfa, const u8 *buf,
318 size_t begin, size_t end, size_t *loc) {
319 assert(begin <= end);
320 assert(nfa->type == LBR_NFA_VERM);
321 const struct lbr_verm *l = getImplNfa(nfa);
322
323 if (begin == end) {
324 return 0;
325 }
326
327 const u8 *ptr = rvermicelliExec(l->c, 0, buf + begin, buf + end);
328 if (ptr == buf + begin - 1) {
329 DEBUG_PRINTF("no escape found\n");
330 return 0;
331 }
332
333 assert(loc);
334 *loc = (size_t)(ptr - buf);
335 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
336 assert((char)*ptr == l->c);
337 return 1;
338}
339
340static really_inline
341char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf,
342 size_t begin, size_t end, size_t *loc) {
343 assert(begin <= end);
344 assert(nfa->type == LBR_NFA_NVERM);
345 const struct lbr_verm *l = getImplNfa(nfa);
346
347 if (begin == end) {
348 return 0;
349 }
350
351 const u8 *ptr = rnvermicelliExec(l->c, 0, buf + begin, buf + end);
352 if (ptr == buf + begin - 1) {
353 DEBUG_PRINTF("no escape found\n");
354 return 0;
355 }
356
357 assert(loc);
358 *loc = (size_t)(ptr - buf);
359 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
360 assert((char)*ptr != l->c);
361 return 1;
362}
363
364static really_inline
365char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf,
366 size_t begin, size_t end,
367 size_t *loc) {
368 assert(begin <= end);
369 assert(nfa->type == LBR_NFA_SHUF);
370 const struct lbr_shuf *l = getImplNfa(nfa);
371
372 if (begin == end) {
373 return 0;
374 }
375
376 const u8 *ptr = rshuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end);
377 if (ptr == buf + begin - 1) {
378 DEBUG_PRINTF("no escape found\n");
379 return 0;
380 }
381
382 assert(loc);
383 *loc = (size_t)(ptr - buf);
384 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
385 return 1;
386}
387
388static really_inline
389char lbrRevScanTruf(const struct NFA *nfa, const u8 *buf,
390 size_t begin, size_t end,
391 size_t *loc) {
392 assert(begin <= end);
393 assert(nfa->type == LBR_NFA_TRUF);
394 const struct lbr_truf *l = getImplNfa(nfa);
395
396 if (begin == end) {
397 return 0;
398 }
399
400 const u8 *ptr = rtruffleExec(l->mask1, l->mask2, buf + begin, buf + end);
401 if (ptr == buf + begin - 1) {
402 DEBUG_PRINTF("no escape found\n");
403 return 0;
404 }
405
406 assert(loc);
407 *loc = (size_t)(ptr - buf);
408 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
409 return 1;
410}
411
412static really_inline
413char lbrFwdScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf,
414 UNUSED size_t begin, UNUSED size_t end,
415 UNUSED size_t *loc) {
416 assert(begin <= end);
417 assert(nfa->type == LBR_NFA_DOT);
418 // Nothing can kill a dot!
419 return 0;
420}
421
422static really_inline
423char lbrFwdScanVerm(const struct NFA *nfa, const u8 *buf,
424 size_t begin, size_t end, size_t *loc) {
425 assert(begin <= end);
426 assert(nfa->type == LBR_NFA_VERM);
427 const struct lbr_verm *l = getImplNfa(nfa);
428
429 if (begin == end) {
430 return 0;
431 }
432
433 const u8 *ptr = vermicelliExec(l->c, 0, buf + begin, buf + end);
434 if (ptr == buf + end) {
435 DEBUG_PRINTF("no escape found\n");
436 return 0;
437 }
438
439 assert(loc);
440 *loc = (size_t)(ptr - buf);
441 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
442 assert((char)*ptr == l->c);
443 return 1;
444}
445
446static really_inline
447char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf,
448 size_t begin, size_t end, size_t *loc) {
449 assert(begin <= end);
450 assert(nfa->type == LBR_NFA_NVERM);
451 const struct lbr_verm *l = getImplNfa(nfa);
452
453 if (begin == end) {
454 return 0;
455 }
456
457 const u8 *ptr = nvermicelliExec(l->c, 0, buf + begin, buf + end);
458 if (ptr == buf + end) {
459 DEBUG_PRINTF("no escape found\n");
460 return 0;
461 }
462
463 assert(loc);
464 *loc = (size_t)(ptr - buf);
465 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
466 assert((char)*ptr != l->c);
467 return 1;
468}
469
470static really_inline
471char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf,
472 size_t begin, size_t end,
473 size_t *loc) {
474 assert(begin <= end);
475 assert(nfa->type == LBR_NFA_SHUF);
476 const struct lbr_shuf *l = getImplNfa(nfa);
477
478 if (begin == end) {
479 return 0;
480 }
481
482 const u8 *ptr = shuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end);
483 if (ptr == buf + end) {
484 DEBUG_PRINTF("no escape found\n");
485 return 0;
486 }
487
488 assert(loc);
489 *loc = (size_t)(ptr - buf);
490 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
491 return 1;
492}
493
494static really_inline
495char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf,
496 size_t begin, size_t end,
497 size_t *loc) {
498 assert(begin <= end);
499 assert(nfa->type == LBR_NFA_TRUF);
500 const struct lbr_truf *l = getImplNfa(nfa);
501
502 if (begin == end) {
503 return 0;
504 }
505
506 const u8 *ptr = truffleExec(l->mask1, l->mask2, buf + begin, buf + end);
507 if (ptr == buf + end) {
508 DEBUG_PRINTF("no escape found\n");
509 return 0;
510 }
511
512 assert(loc);
513 *loc = (size_t)(ptr - buf);
514 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
515 return 1;
516}
517
518#define ENGINE_ROOT_NAME Dot
519#include "lbr_common_impl.h"
520
521#define ENGINE_ROOT_NAME Verm
522#include "lbr_common_impl.h"
523
524#define ENGINE_ROOT_NAME NVerm
525#include "lbr_common_impl.h"
526
527#define ENGINE_ROOT_NAME Shuf
528#include "lbr_common_impl.h"
529
530#define ENGINE_ROOT_NAME Truf
531#include "lbr_common_impl.h"
532