1/*
2 * Copyright (c) 2015-2016, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Castle: multi-tenant repeat engine, runtime code.
31 */
32
33#include "castle.h"
34
35#include "castle_internal.h"
36#include "nfa_api.h"
37#include "nfa_api_queue.h"
38#include "nfa_internal.h"
39#include "repeat.h"
40#include "shufti.h"
41#include "truffle.h"
42#include "vermicelli.h"
43#include "util/bitutils.h"
44#include "util/multibit.h"
45#include "util/partial_store.h"
46#include "ue2common.h"
47
48static really_inline
49const struct SubCastle *getSubCastle(const struct Castle *c, u32 num) {
50 assert(num < c->numRepeats);
51 const struct SubCastle *sub =
52 (const struct SubCastle *)((const char *)c + sizeof(struct Castle));
53 assert(ISALIGNED(sub));
54 return &sub[num];
55}
56
57static really_inline
58const struct RepeatInfo *getRepeatInfo(const struct SubCastle *sub) {
59 const struct RepeatInfo *repeatInfo =
60 (const struct RepeatInfo *)((const char *)sub + sub->repeatInfoOffset);
61 return repeatInfo;
62}
63
64static really_inline
65union RepeatControl *getControl(char *full_state, const struct SubCastle *sub) {
66 union RepeatControl *rctrl =
67 (union RepeatControl *)(full_state + sub->fullStateOffset);
68 assert(ISALIGNED(rctrl));
69 return rctrl;
70}
71
72static really_inline
73const union RepeatControl *getControlConst(const char *full_state,
74 const struct SubCastle *sub) {
75 const union RepeatControl *rctrl =
76 (const union RepeatControl *)(full_state + sub->fullStateOffset);
77 assert(ISALIGNED(rctrl));
78 return rctrl;
79}
80
81enum MatchMode {
82 CALLBACK_OUTPUT,
83 STOP_AT_MATCH,
84};
85
86static really_inline
87char subCastleReportCurrent(const struct Castle *c, struct mq *q,
88 const u64a offset, const u32 subIdx) {
89 const struct SubCastle *sub = getSubCastle(c, subIdx);
90 const struct RepeatInfo *info = getRepeatInfo(sub);
91
92 union RepeatControl *rctrl = getControl(q->state, sub);
93 char *rstate = (char *)q->streamState + sub->streamStateOffset +
94 info->packedCtrlSize;
95 enum RepeatMatch match =
96 repeatHasMatch(info, rctrl, rstate, offset);
97 DEBUG_PRINTF("repeatHasMatch returned %d\n", match);
98 if (match == REPEAT_MATCH) {
99 DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset,
100 subIdx, sub->report);
101 if (q->cb(0, offset, sub->report, q->context) == MO_HALT_MATCHING) {
102 return MO_HALT_MATCHING;
103 }
104 }
105
106 return MO_CONTINUE_MATCHING;
107}
108
109static really_inline
110int castleReportCurrent(const struct Castle *c, struct mq *q) {
111 const u64a offset = q_cur_offset(q);
112 DEBUG_PRINTF("offset=%llu\n", offset);
113
114 if (c->exclusive) {
115 u8 *active = (u8 *)q->streamState;
116 u8 *groups = active + c->groupIterOffset;
117 for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
118 i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
119 u8 *cur = active + i * c->activeIdxSize;
120 const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
121 DEBUG_PRINTF("subcastle %u\n", activeIdx);
122 if (subCastleReportCurrent(c, q,
123 offset, activeIdx) == MO_HALT_MATCHING) {
124 return MO_HALT_MATCHING;
125 }
126 }
127 }
128
129 if (c->exclusive != PURE_EXCLUSIVE) {
130 const u8 *active = (const u8 *)q->streamState + c->activeOffset;
131 for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
132 i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
133 DEBUG_PRINTF("subcastle %u\n", i);
134 if (subCastleReportCurrent(c, q, offset, i) == MO_HALT_MATCHING) {
135 return MO_HALT_MATCHING;
136 }
137 }
138 }
139
140 return MO_CONTINUE_MATCHING;
141}
142
143static really_inline
144char subCastleInAccept(const struct Castle *c, struct mq *q,
145 const ReportID report, const u64a offset,
146 const u32 subIdx) {
147 const struct SubCastle *sub = getSubCastle(c, subIdx);
148
149 if (sub->report != report) {
150 return 0;
151 }
152 const struct RepeatInfo *info = getRepeatInfo(sub);
153
154 union RepeatControl *rctrl = getControl(q->state, sub);
155 char *rstate = (char *)q->streamState + sub->streamStateOffset +
156 info->packedCtrlSize;
157 enum RepeatMatch match =
158 repeatHasMatch(info, rctrl, rstate, offset);
159 if (match == REPEAT_MATCH) {
160 DEBUG_PRINTF("in an accept\n");
161 return 1;
162 }
163
164 return 0;
165}
166
167static really_inline
168char castleInAccept(const struct Castle *c, struct mq *q,
169 const ReportID report, const u64a offset) {
170 DEBUG_PRINTF("offset=%llu\n", offset);
171 /* ignore when just catching up due to full queue */
172 if (report == MO_INVALID_IDX) {
173 return 0;
174 }
175
176 if (c->exclusive) {
177 u8 *active = (u8 *)q->streamState;
178 u8 *groups = active + c->groupIterOffset;
179 for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
180 i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
181 u8 *cur = active + i * c->activeIdxSize;
182 const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
183 DEBUG_PRINTF("subcastle %u\n", activeIdx);
184 if (subCastleInAccept(c, q, report, offset, activeIdx)) {
185 return 1;
186 }
187 }
188 }
189
190 if (c->exclusive != PURE_EXCLUSIVE) {
191 const u8 *active = (const u8 *)q->streamState + c->activeOffset;
192 for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
193 i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
194 DEBUG_PRINTF("subcastle %u\n", i);
195 if (subCastleInAccept(c, q, report, offset, i)) {
196 return 1;
197 }
198 }
199 }
200
201 return 0;
202}
203
204static really_inline
205void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
206 void *full_state, void *stream_state,
207 const u32 subIdx) {
208 const struct SubCastle *sub = getSubCastle(c, subIdx);
209 const struct RepeatInfo *info = getRepeatInfo(sub);
210
211 union RepeatControl *rctrl = getControl(full_state, sub);
212 char *rstate = (char *)stream_state + sub->streamStateOffset +
213 info->packedCtrlSize;
214
215 if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) {
216 DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset);
217 if (sub->exclusiveId < c->numRepeats) {
218 u8 *active = (u8 *)stream_state;
219 u8 *groups = active + c->groupIterOffset;
220 mmbit_unset(groups, c->numGroups, sub->exclusiveId);
221 } else {
222 u8 *active = (u8 *)stream_state + c->activeOffset;
223 mmbit_unset(active, c->numRepeats, subIdx);
224 }
225 }
226}
227
228static really_inline
229void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset,
230 void *full_state, void *stream_state) {
231 DEBUG_PRINTF("offset=%llu\n", offset);
232
233 if (!c->staleIterOffset) {
234 DEBUG_PRINTF("{no repeats can go stale}\n");
235 return; /* no subcastle can ever go stale */
236 }
237
238 if (c->exclusive) {
239 u8 *active = (u8 *)stream_state;
240 u8 *groups = active + c->groupIterOffset;
241 for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
242 i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
243 u8 *cur = active + i * c->activeIdxSize;
244 const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
245 DEBUG_PRINTF("subcastle %u\n", activeIdx);
246 subCastleDeactivateStaleSubs(c, offset, full_state,
247 stream_state, activeIdx);
248 }
249 }
250
251 if (c->exclusive != PURE_EXCLUSIVE) {
252 const u8 *active = (const u8 *)stream_state + c->activeOffset;
253 const struct mmbit_sparse_iter *it
254 = (const void *)((const char *)c + c->staleIterOffset);
255
256 struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
257 u32 numRepeats = c->numRepeats;
258 u32 idx = 0;
259
260 u32 i = mmbit_sparse_iter_begin(active, numRepeats, &idx, it, si_state);
261 while(i != MMB_INVALID) {
262 DEBUG_PRINTF("subcastle %u\n", i);
263 subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, i);
264 i = mmbit_sparse_iter_next(active, numRepeats, i, &idx, it,
265 si_state);
266 }
267 }
268}
269
270static really_inline
271void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset,
272 void *full_state, void *stream_state,
273 UNUSED char stale_checked) {
274 assert(top < c->numRepeats);
275
276 const struct SubCastle *sub = getSubCastle(c, top);
277 const struct RepeatInfo *info = getRepeatInfo(sub);
278 union RepeatControl *rctrl = getControl(full_state, sub);
279 char *rstate = (char *)stream_state + sub->streamStateOffset +
280 info->packedCtrlSize;
281
282 char is_alive = 0;
283 u8 *active = (u8 *)stream_state;
284 if (sub->exclusiveId < c->numRepeats) {
285 u8 *groups = active + c->groupIterOffset;
286 active += sub->exclusiveId * c->activeIdxSize;
287 if (mmbit_set(groups, c->numGroups, sub->exclusiveId)) {
288 const u32 activeIdx = partial_load_u32(active, c->activeIdxSize);
289 is_alive = (activeIdx == top);
290 }
291
292 if (!is_alive) {
293 partial_store_u32(active, top, c->activeIdxSize);
294 }
295 } else {
296 active += c->activeOffset;
297 is_alive = mmbit_set(active, c->numRepeats, top);
298 }
299
300 if (!is_alive) {
301 DEBUG_PRINTF("first top for inactive repeat %u\n", top);
302 } else {
303 DEBUG_PRINTF("repeat %u is already alive\n", top);
304 // Caller should ensure we're not stale.
305 assert(!stale_checked
306 || repeatHasMatch(info, rctrl, rstate, offset) != REPEAT_STALE);
307
308 // Ignore duplicate top events.
309 u64a last = repeatLastTop(info, rctrl, rstate);
310
311 assert(last <= offset);
312 if (last == offset) {
313 DEBUG_PRINTF("dupe top at %llu\n", offset);
314 return;
315 }
316 }
317
318 repeatStore(info, rctrl, rstate, offset, is_alive);
319}
320
321static really_inline
322void subCastleFindMatch(const struct Castle *c, const u64a begin,
323 const u64a end, void *full_state, void *stream_state,
324 size_t *mloc, char *found, const u32 subIdx) {
325 const struct SubCastle *sub = getSubCastle(c, subIdx);
326 const struct RepeatInfo *info = getRepeatInfo(sub);
327 union RepeatControl *rctrl = getControl(full_state, sub);
328 char *rstate = (char *)stream_state + sub->streamStateOffset +
329 info->packedCtrlSize;
330
331 u64a match = repeatNextMatch(info, rctrl, rstate, begin);
332 if (match == 0) {
333 DEBUG_PRINTF("no more matches for sub %u\n", subIdx);
334 if (sub->exclusiveId < c->numRepeats) {
335 u8 *groups = (u8 *)stream_state + c->groupIterOffset;
336 mmbit_unset(groups, c->numGroups, sub->exclusiveId);
337 } else {
338 u8 *active = (u8 *)stream_state + c->activeOffset;
339 mmbit_unset(active, c->numRepeats, subIdx);
340 }
341 return;
342 } else if (match > end) {
343 DEBUG_PRINTF("next match for sub %u at %llu is > horizon\n", subIdx,
344 match);
345 return;
346 }
347 DEBUG_PRINTF("sub %u earliest match at %llu\n", subIdx, match);
348 size_t diff = match - begin;
349 if (!(*found) || diff < *mloc) {
350 *mloc = diff;
351 DEBUG_PRINTF("mloc=%zu\n", *mloc);
352 }
353 *found = 1;
354}
355
356static really_inline
357char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end,
358 void *full_state, void *stream_state, size_t *mloc) {
359 DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
360 assert(begin <= end);
361
362 if (begin == end) {
363 DEBUG_PRINTF("no work to do\n");
364 return 0;
365 }
366
367 char found = 0;
368 *mloc = 0;
369
370 if (c->exclusive) {
371 u8 *active = (u8 *)stream_state;
372 u8 *groups = active + c->groupIterOffset;
373 for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
374 i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
375 u8 *cur = active + i * c->activeIdxSize;
376 const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
377 DEBUG_PRINTF("subcastle %u\n", activeIdx);
378 subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
379 &found, activeIdx);
380 }
381 }
382
383 if (c->exclusive != PURE_EXCLUSIVE) {
384 u8 *active = (u8 *)stream_state + c->activeOffset;
385 for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
386 i != MMB_INVALID;
387 i = mmbit_iterate(active, c->numRepeats, i)) {
388 DEBUG_PRINTF("subcastle %u\n", i);
389 subCastleFindMatch(c, begin, end, full_state, stream_state, mloc,
390 &found, i);
391 }
392 }
393
394 return found;
395}
396
397static really_inline
398u64a subCastleNextMatch(const struct Castle *c, void *full_state,
399 void *stream_state, const u64a loc,
400 const u32 subIdx) {
401 DEBUG_PRINTF("subcastle %u\n", subIdx);
402 const struct SubCastle *sub = getSubCastle(c, subIdx);
403 const struct RepeatInfo *info = getRepeatInfo(sub);
404 const union RepeatControl *rctrl =
405 getControlConst(full_state, sub);
406 const char *rstate = (const char *)stream_state +
407 sub->streamStateOffset +
408 info->packedCtrlSize;
409
410 return repeatNextMatch(info, rctrl, rstate, loc);
411}
412
413static really_inline
414void set_matching(const struct Castle *c, const u64a match, u8 *active,
415 u8 *matching, const u32 active_size, const u32 active_id,
416 const u32 matching_id, u64a *offset, const u64a end) {
417 if (match == 0) {
418 DEBUG_PRINTF("no more matches\n");
419 mmbit_unset(active, active_size, active_id);
420 } else if (match > end) {
421 // If we had a local copy of the active mmbit, we could skip
422 // looking at this repeat again. But we don't, so we just move
423 // on.
424 } else if (match == *offset) {
425 mmbit_set(matching, c->numRepeats, matching_id);
426 } else if (match < *offset) {
427 // New minimum offset.
428 *offset = match;
429 mmbit_clear(matching, c->numRepeats);
430 mmbit_set(matching, c->numRepeats, matching_id);
431 }
432}
433
434static really_inline
435void subCastleMatchLoop(const struct Castle *c, void *full_state,
436 void *stream_state, const u64a end,
437 const u64a loc, u64a *offset) {
438 u8 *active = (u8 *)stream_state + c->activeOffset;
439 u8 *matching = full_state;
440 for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
441 i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
442 u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i);
443 set_matching(c, match, active, matching, c->numRepeats, i,
444 i, offset, end);
445 }
446}
447
448static really_inline
449char subCastleFireMatch(const struct Castle *c, const void *full_state,
450 UNUSED const void *stream_state, NfaCallback cb,
451 void *ctx, const u64a offset) {
452 const u8 *matching = full_state;
453
454 // Fire all matching sub-castles at this offset.
455 for (u32 i = mmbit_iterate(matching, c->numRepeats, MMB_INVALID);
456 i != MMB_INVALID;
457 i = mmbit_iterate(matching, c->numRepeats, i)) {
458 const struct SubCastle *sub = getSubCastle(c, i);
459 DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i);
460 if (cb(0, offset, sub->report, ctx) == MO_HALT_MATCHING) {
461 DEBUG_PRINTF("caller told us to halt\n");
462 return MO_HALT_MATCHING;
463 }
464 }
465
466 return MO_CONTINUE_MATCHING;
467}
468
469static really_inline
470char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end,
471 void *full_state, void *stream_state, NfaCallback cb,
472 void *ctx) {
473 DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end);
474 assert(begin <= end);
475
476 u8 *matching = full_state; // temp multibit
477
478 u64a loc = begin;
479 while (loc < end) {
480
481 // Find minimum next offset for the next match(es) from amongst our
482 // active sub-castles, and store the indices of the sub-castles that
483 // match at that offset in the 'matching' mmbit, which is in the
484 // full_state (scratch).
485
486 u64a offset = end; // min offset of next match
487 u32 activeIdx = 0;
488 mmbit_clear(matching, c->numRepeats);
489 if (c->exclusive) {
490 u8 *active = (u8 *)stream_state;
491 u8 *groups = active + c->groupIterOffset;
492 for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
493 i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
494 u8 *cur = active + i * c->activeIdxSize;
495 activeIdx = partial_load_u32(cur, c->activeIdxSize);
496 u64a match = subCastleNextMatch(c, full_state, stream_state,
497 loc, activeIdx);
498 set_matching(c, match, groups, matching, c->numGroups, i,
499 activeIdx, &offset, end);
500 }
501 }
502
503 if (c->exclusive != PURE_EXCLUSIVE) {
504 subCastleMatchLoop(c, full_state, stream_state,
505 end, loc, &offset);
506 }
507 DEBUG_PRINTF("offset=%llu\n", offset);
508 if (!mmbit_any(matching, c->numRepeats)) {
509 DEBUG_PRINTF("no more matches\n");
510 break;
511 }
512
513 if (subCastleFireMatch(c, full_state, stream_state,
514 cb, ctx, offset) == MO_HALT_MATCHING) {
515 return MO_HALT_MATCHING;
516 }
517 loc = offset;
518 }
519
520 return MO_CONTINUE_MATCHING;
521}
522
523static really_inline
524char castleScanVerm(const struct Castle *c, const u8 *buf, const size_t begin,
525 const size_t end, size_t *loc) {
526 const u8 *ptr = vermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
527 if (ptr == buf + end) {
528 DEBUG_PRINTF("no escape found\n");
529 return 0;
530 }
531
532 assert(loc);
533 assert(ptr >= buf && ptr < buf + end);
534 *loc = (size_t)(ptr - buf);
535 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
536 return 1;
537}
538
539static really_inline
540char castleScanNVerm(const struct Castle *c, const u8 *buf, const size_t begin,
541 const size_t end, size_t *loc) {
542 const u8 *ptr = nvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
543 if (ptr == buf + end) {
544 DEBUG_PRINTF("no escape found\n");
545 return 0;
546 }
547
548 assert(loc);
549 assert(ptr >= buf && ptr < buf + end);
550 *loc = (size_t)(ptr - buf);
551 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
552 return 1;
553}
554
555static really_inline
556char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin,
557 const size_t end, size_t *loc) {
558 const m128 mask_lo = c->u.shuf.mask_lo;
559 const m128 mask_hi = c->u.shuf.mask_hi;
560 const u8 *ptr = shuftiExec(mask_lo, mask_hi, buf + begin, buf + end);
561 if (ptr == buf + end) {
562 DEBUG_PRINTF("no escape found\n");
563 return 0;
564 }
565
566 assert(loc);
567 assert(ptr >= buf && ptr < buf + end);
568 *loc = (size_t)(ptr - buf);
569 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
570 return 1;
571}
572
573static really_inline
574char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin,
575 const size_t end, size_t *loc) {
576 const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2,
577 buf + begin, buf + end);
578 if (ptr == buf + end) {
579 DEBUG_PRINTF("no escape found\n");
580 return 0;
581 }
582
583 assert(loc);
584 assert(ptr >= buf && ptr < buf + end);
585 *loc = (size_t)(ptr - buf);
586 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
587 return 1;
588}
589
590static really_inline
591char castleScan(const struct Castle *c, const u8 *buf, const size_t begin,
592 const size_t end, size_t *loc) {
593 assert(begin <= end);
594
595 if (begin == end) {
596 return 0;
597 }
598
599 switch (c->type) {
600 case CASTLE_DOT:
601 // Nothing can stop a dot scan!
602 return 0;
603 case CASTLE_VERM:
604 return castleScanVerm(c, buf, begin, end, loc);
605 case CASTLE_NVERM:
606 return castleScanNVerm(c, buf, begin, end, loc);
607 case CASTLE_SHUFTI:
608 return castleScanShufti(c, buf, begin, end, loc);
609 case CASTLE_TRUFFLE:
610 return castleScanTruffle(c, buf, begin, end, loc);
611 default:
612 DEBUG_PRINTF("unknown scan type!\n");
613 assert(0);
614 return 0;
615 }
616}
617
618static really_inline
619char castleRevScanVerm(const struct Castle *c, const u8 *buf,
620 const size_t begin, const size_t end, size_t *loc) {
621 const u8 *ptr = rvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
622 if (ptr == buf + begin - 1) {
623 DEBUG_PRINTF("no escape found\n");
624 return 0;
625 }
626
627 assert(loc);
628 assert(ptr >= buf && ptr < buf + end);
629 *loc = (size_t)(ptr - buf);
630 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
631 return 1;
632}
633
634static really_inline
635char castleRevScanNVerm(const struct Castle *c, const u8 *buf,
636 const size_t begin, const size_t end, size_t *loc) {
637 const u8 *ptr = rnvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end);
638 if (ptr == buf + begin - 1) {
639 DEBUG_PRINTF("no escape found\n");
640 return 0;
641 }
642
643 assert(loc);
644 assert(ptr >= buf && ptr < buf + end);
645 *loc = (size_t)(ptr - buf);
646 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
647 return 1;
648}
649
650static really_inline
651char castleRevScanShufti(const struct Castle *c, const u8 *buf,
652 const size_t begin, const size_t end, size_t *loc) {
653 const m128 mask_lo = c->u.shuf.mask_lo;
654 const m128 mask_hi = c->u.shuf.mask_hi;
655 const u8 *ptr = rshuftiExec(mask_lo, mask_hi, buf + begin, buf + end);
656 if (ptr == buf + begin - 1) {
657 DEBUG_PRINTF("no escape found\n");
658 return 0;
659 }
660
661 assert(loc);
662 assert(ptr >= buf && ptr < buf + end);
663 *loc = (size_t)(ptr - buf);
664 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
665 return 1;
666}
667
668static really_inline
669char castleRevScanTruffle(const struct Castle *c, const u8 *buf,
670 const size_t begin, const size_t end, size_t *loc) {
671 const u8 *ptr = rtruffleExec(c->u.truffle.mask1, c->u.truffle.mask2,
672 buf + begin, buf + end);
673 if (ptr == buf + begin - 1) {
674 DEBUG_PRINTF("no escape found\n");
675 return 0;
676 }
677
678 assert(loc);
679 assert(ptr >= buf && ptr < buf + end);
680 *loc = (size_t)(ptr - buf);
681 DEBUG_PRINTF("escape found at offset %zu\n", *loc);
682 return 1;
683}
684
685static really_inline
686char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin,
687 const size_t end, size_t *loc) {
688 assert(begin <= end);
689 DEBUG_PRINTF("scanning backwards over (%zu,%zu]\n", begin, end);
690 if (begin == end) {
691 return 0;
692 }
693
694 switch (c->type) {
695 case CASTLE_DOT:
696 // Nothing can stop a dot scan!
697 return 0;
698 case CASTLE_VERM:
699 return castleRevScanVerm(c, buf, begin, end, loc);
700 case CASTLE_NVERM:
701 return castleRevScanNVerm(c, buf, begin, end, loc);
702 case CASTLE_SHUFTI:
703 return castleRevScanShufti(c, buf, begin, end, loc);
704 case CASTLE_TRUFFLE:
705 return castleRevScanTruffle(c, buf, begin, end, loc);
706 default:
707 DEBUG_PRINTF("unknown scan type!\n");
708 assert(0);
709 return 0;
710 }
711}
712
713static really_inline
714void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp,
715 char stale_checked) {
716 const u32 event = q->items[q->cur].type;
717 switch (event) {
718 case MQE_TOP:
719 assert(0); // should be a numbered top
720 break;
721 case MQE_START:
722 case MQE_END:
723 break;
724 default:
725 assert(event >= MQE_TOP_FIRST);
726 assert(event < MQE_INVALID);
727 u32 top = event - MQE_TOP_FIRST;
728 DEBUG_PRINTF("top %u at offset %llu\n", top, sp);
729 castleProcessTop(c, top, sp, q->state, q->streamState, stale_checked);
730 break;
731 }
732}
733
734static really_inline
735void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) {
736 DEBUG_PRINTF("clearing active repeats due to escape\n");
737 if (c->exclusive) {
738 u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
739 mmbit_clear(groups, c->numGroups);
740 }
741
742 if (c->exclusive != PURE_EXCLUSIVE) {
743 mmbit_clear(active, c->numRepeats);
744 }
745}
746
747static really_inline
748char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end,
749 enum MatchMode mode) {
750 assert(n && q);
751 assert(n->type == CASTLE_NFA);
752
753 DEBUG_PRINTF("state=%p, streamState=%p\n", q->state, q->streamState);
754
755 const struct Castle *c = getImplNfa(n);
756
757 if (q->report_current) {
758 int rv = castleReportCurrent(c, q);
759 q->report_current = 0;
760 if (rv == MO_HALT_MATCHING) {
761 return MO_HALT_MATCHING;
762 }
763 }
764
765 if (q->cur == q->end) {
766 return 1;
767 }
768
769 u8 *active = (u8 *)q->streamState + c->activeOffset;// active multibit
770
771 assert(q->cur + 1 < q->end); // require at least two items
772 assert(q_cur_type(q) == MQE_START);
773 u64a sp = q_cur_offset(q);
774 q->cur++;
775 DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset);
776
777 while (q->cur < q->end) {
778 DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
779 q_cur_offset(q));
780
781 char found = 0;
782 if (c->exclusive) {
783 u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
784 found = mmbit_any(groups, c->numGroups);
785 }
786
787 if (!found && !mmbit_any(active, c->numRepeats)) {
788 DEBUG_PRINTF("no repeats active, skipping scan\n");
789 goto scan_done;
790 }
791
792 u64a ep = q_cur_offset(q);
793 ep = MIN(ep, q->offset + end);
794 if (sp < ep) {
795 size_t eloc = 0;
796 char escape_found = 0;
797 DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep);
798 assert(sp >= q->offset && ep >= q->offset);
799 if (castleScan(c, q->buffer, sp - q->offset, ep - q->offset,
800 &eloc)) {
801 escape_found = 1;
802 ep = q->offset + eloc;
803 DEBUG_PRINTF("escape found at %llu\n", ep);
804 assert(ep >= sp);
805 }
806
807 assert(sp <= ep);
808
809 if (mode == STOP_AT_MATCH) {
810 size_t mloc;
811 if (castleFindMatch(c, sp, ep, q->state, q->streamState,
812 &mloc)) {
813 DEBUG_PRINTF("storing match at %llu\n", sp + mloc);
814 q->cur--;
815 assert(q->cur < MAX_MQE_LEN);
816 q->items[q->cur].type = MQE_START;
817 q->items[q->cur].location = (s64a)(sp - q->offset) + mloc;
818 return MO_MATCHES_PENDING;
819 }
820 } else {
821 assert(mode == CALLBACK_OUTPUT);
822 char rv = castleMatchLoop(c, sp, ep, q->state, q->streamState,
823 q->cb, q->context);
824 if (rv == MO_HALT_MATCHING) {
825 return MO_HALT_MATCHING;
826 }
827 assert(rv == MO_CONTINUE_MATCHING);
828 }
829
830 if (escape_found) {
831 clear_repeats(c, q, active);
832 }
833 }
834
835 scan_done:
836 if (q_cur_loc(q) > end) {
837 q->cur--;
838 assert(q->cur < MAX_MQE_LEN);
839 q->items[q->cur].type = MQE_START;
840 q->items[q->cur].location = end;
841 return MO_ALIVE;
842 }
843
844 sp = q_cur_offset(q);
845 castleHandleEvent(c, q, sp, 1);
846 q->cur++;
847 }
848
849 if (c->exclusive) {
850 u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
851 if (mmbit_any_precise(groups, c->numGroups)) {
852 return 1;
853 }
854 }
855
856 return mmbit_any_precise(active, c->numRepeats);
857}
858
859char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end) {
860 DEBUG_PRINTF("entry\n");
861 return nfaExecCastle_Q_i(n, q, end, CALLBACK_OUTPUT);
862}
863
864char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) {
865 DEBUG_PRINTF("entry\n");
866 return nfaExecCastle_Q_i(n, q, end, STOP_AT_MATCH);
867}
868
869static
870s64a castleLastKillLoc(const struct Castle *c, struct mq *q) {
871 assert(q_cur_type(q) == MQE_START);
872 assert(q_last_type(q) == MQE_END);
873 s64a sp = q_cur_loc(q);
874 s64a ep = q_last_loc(q);
875
876 DEBUG_PRINTF("finding final squash in (%lld, %lld]\n", sp, ep);
877
878 size_t loc;
879
880 if (ep > 0) {
881 if (castleRevScan(c, q->buffer, sp > 0 ? sp : 0, ep, &loc)) {
882 return (s64a)loc;
883 }
884 ep = 0;
885 }
886
887 if (sp < 0) {
888 s64a hlen = q->hlength;
889
890 if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) {
891 return (s64a)loc - hlen;
892 }
893 ep = 0;
894 }
895
896 return sp - 1; /* the repeats are never killed */
897}
898
899char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) {
900 assert(n && q);
901 assert(n->type == CASTLE_NFA);
902 DEBUG_PRINTF("entry\n");
903
904 if (q->cur == q->end) {
905 return 1;
906 }
907
908 assert(q->cur + 1 < q->end); /* require at least two items */
909 assert(q_cur_type(q) == MQE_START);
910
911 const struct Castle *c = getImplNfa(n);
912 u8 *active = (u8 *)q->streamState + c->activeOffset;
913
914 u64a end_offset = q_last_loc(q) + q->offset;
915 s64a last_kill_loc = castleLastKillLoc(c, q);
916 DEBUG_PRINTF("all repeats killed at %lld (exec range %lld, %lld)\n",
917 last_kill_loc, q_cur_loc(q), q_last_loc(q));
918 assert(last_kill_loc < q_last_loc(q));
919
920 if (last_kill_loc != q_cur_loc(q) - 1) {
921 clear_repeats(c, q, active);
922 }
923
924 q->cur++; /* skip start event */
925
926 /* skip events prior to the repeats being squashed */
927 while (q_cur_loc(q) <= last_kill_loc) {
928 DEBUG_PRINTF("skipping moot event at %lld\n", q_cur_loc(q));
929 q->cur++;
930 assert(q->cur < q->end);
931 }
932
933 while (q->cur < q->end) {
934 DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
935 q_cur_offset(q));
936 u64a sp = q_cur_offset(q);
937 castleHandleEvent(c, q, sp, 0);
938 q->cur++;
939 }
940
941 castleDeactivateStaleSubs(c, end_offset, q->state, q->streamState);
942
943 char found = 0;
944 if (c->exclusive) {
945 u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
946 found = mmbit_any_precise(groups, c->numGroups);
947
948 }
949
950 if (!found && !mmbit_any_precise(active, c->numRepeats)) {
951 DEBUG_PRINTF("castle is dead\n");
952 return 0;
953 }
954
955 if (castleInAccept(c, q, report, end_offset)) {
956 return MO_MATCHES_PENDING;
957 }
958
959 return 1;
960}
961
962char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q) {
963 assert(n && q);
964 assert(n->type == CASTLE_NFA);
965 DEBUG_PRINTF("entry\n");
966
967 const struct Castle *c = getImplNfa(n);
968 castleReportCurrent(c, q);
969 return 0;
970}
971
972char nfaExecCastle_inAccept(const struct NFA *n, ReportID report,
973 struct mq *q) {
974 assert(n && q);
975 assert(n->type == CASTLE_NFA);
976 DEBUG_PRINTF("entry\n");
977
978 const struct Castle *c = getImplNfa(n);
979 return castleInAccept(c, q, report, q_cur_offset(q));
980}
981
982char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) {
983 assert(n && q);
984 assert(n->type == CASTLE_NFA);
985 DEBUG_PRINTF("entry\n");
986
987 const struct Castle *c = getImplNfa(n);
988 const u64a offset = q_cur_offset(q);
989 DEBUG_PRINTF("offset=%llu\n", offset);
990
991 if (c->exclusive) {
992 u8 *active = (u8 *)q->streamState;
993 u8 *groups = active + c->groupIterOffset;
994 for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
995 i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
996 u8 *cur = active + i * c->activeIdxSize;
997 const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
998 DEBUG_PRINTF("subcastle %u\n", activeIdx);
999 const struct SubCastle *sub = getSubCastle(c, activeIdx);
1000 if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) {
1001 return 1;
1002 }
1003 }
1004 }
1005
1006 if (c->exclusive != PURE_EXCLUSIVE) {
1007 const u8 *active = (const u8 *)q->streamState + c->activeOffset;
1008 for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
1009 i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
1010 DEBUG_PRINTF("subcastle %u\n", i);
1011 const struct SubCastle *sub = getSubCastle(c, i);
1012 if (subCastleInAccept(c, q, sub->report, offset, i)) {
1013 return 1;
1014 }
1015 }
1016 }
1017
1018 return 0;
1019}
1020
1021
1022char nfaExecCastle_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
1023 assert(n && q);
1024 assert(n->type == CASTLE_NFA);
1025 DEBUG_PRINTF("entry\n");
1026
1027 const struct Castle *c = getImplNfa(n);
1028 assert(q->streamState);
1029 if (c->exclusive) {
1030 u8 *groups = (u8 *)q->streamState + c->groupIterOffset;
1031 mmbit_clear(groups, c->numGroups);
1032 }
1033
1034 if (c->exclusive != PURE_EXCLUSIVE) {
1035 u8 *active = (u8 *)q->streamState + c->activeOffset;
1036 mmbit_clear(active, c->numRepeats);
1037 }
1038 return 0;
1039}
1040
1041char nfaExecCastle_initCompressedState(const struct NFA *n, UNUSED u64a offset,
1042 void *state, UNUSED u8 key) {
1043 assert(n && state);
1044 assert(n->type == CASTLE_NFA);
1045 DEBUG_PRINTF("entry\n");
1046
1047 const struct Castle *c = getImplNfa(n);
1048 if (c->exclusive) {
1049 u8 *groups = (u8 *)state + c->groupIterOffset;
1050 mmbit_clear(groups, c->numGroups);
1051 }
1052
1053 if (c->exclusive != PURE_EXCLUSIVE) {
1054 u8 *active = (u8 *)state + c->activeOffset;
1055 mmbit_clear(active, c->numRepeats);
1056 }
1057 return 0;
1058}
1059
1060static really_inline
1061void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx,
1062 const struct mq *q, const u64a offset) {
1063 const struct SubCastle *sub = getSubCastle(c, subIdx);
1064 const struct RepeatInfo *info = getRepeatInfo(sub);
1065 union RepeatControl *rctrl = getControl(q->state, sub);
1066 char *packed = (char *)q->streamState + sub->streamStateOffset;
1067 DEBUG_PRINTF("sub %u next match %llu\n", subIdx,
1068 repeatNextMatch(info, rctrl,
1069 packed + info->packedCtrlSize, offset));
1070 repeatPack(packed, info, rctrl, offset);
1071}
1072
1073char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q,
1074 s64a loc) {
1075 assert(n && q);
1076 assert(n->type == CASTLE_NFA);
1077 DEBUG_PRINTF("entry, loc=%lld\n", loc);
1078
1079 const struct Castle *c = getImplNfa(n);
1080
1081 // Pack state for all active repeats.
1082 const u64a offset = q->offset + loc;
1083 DEBUG_PRINTF("offset=%llu\n", offset);
1084 if (c->exclusive) {
1085 u8 *active = (u8 *)q->streamState;
1086 u8 *groups = active + c->groupIterOffset;
1087 for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
1088 i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
1089 u8 *cur = active + i * c->activeIdxSize;
1090 const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
1091 DEBUG_PRINTF("packing state for sub %u\n", activeIdx);
1092 subCastleQueueCompressState(c, activeIdx, q, offset);
1093 }
1094 }
1095
1096 if (c->exclusive != PURE_EXCLUSIVE) {
1097 const u8 *active = (const u8 *)q->streamState + c->activeOffset;
1098 for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
1099 i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
1100 DEBUG_PRINTF("packing state for sub %u\n", i);
1101 subCastleQueueCompressState(c, i, q, offset);
1102 }
1103 }
1104 return 0;
1105}
1106
1107static really_inline
1108void subCastleExpandState(const struct Castle *c, const u32 subIdx,
1109 void *dest, const void *src, const u64a offset) {
1110 const struct SubCastle *sub = getSubCastle(c, subIdx);
1111 const struct RepeatInfo *info = getRepeatInfo(sub);
1112 DEBUG_PRINTF("unpacking state for sub %u\n", subIdx);
1113 union RepeatControl *rctrl = getControl(dest, sub);
1114 const char *packed = (const char *)src + sub->streamStateOffset;
1115 repeatUnpack(packed, info, offset, rctrl);
1116 DEBUG_PRINTF("sub %u next match %llu\n", subIdx,
1117 repeatNextMatch(info, rctrl,
1118 packed + info->packedCtrlSize, offset));
1119}
1120
1121char nfaExecCastle_expandState(const struct NFA *n, void *dest, const void *src,
1122 u64a offset, UNUSED u8 key) {
1123 assert(n && dest && src);
1124 assert(n->type == CASTLE_NFA);
1125 DEBUG_PRINTF("entry, src=%p, dest=%p, offset=%llu\n", src, dest, offset);
1126
1127 const struct Castle *c = getImplNfa(n);
1128
1129 if (c->exclusive) {
1130 const u8 *active = (const u8 *)src;
1131 const u8 *groups = active + c->groupIterOffset;
1132 for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
1133 i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
1134 const u8 *cur = active + i * c->activeIdxSize;
1135 const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
1136 subCastleExpandState(c, activeIdx, dest, src, offset);
1137 }
1138 }
1139
1140 if (c->exclusive != PURE_EXCLUSIVE) {
1141 // Unpack state for all active repeats.
1142 const u8 *active = (const u8 *)src + c->activeOffset;
1143 for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
1144 i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
1145 subCastleExpandState(c, i, dest, src, offset);
1146 }
1147 }
1148 return 0;
1149}
1150