1/*
2 * Copyright (c) 2015-2019, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * \file
31 * \brief Rose runtime: program interpreter.
32 */
33
34#include "program_runtime.h"
35
36#include "catchup.h"
37#include "counting_miracle.h"
38#include "infix.h"
39#include "match.h"
40#include "miracle.h"
41#include "report.h"
42#include "rose_common.h"
43#include "rose_internal.h"
44#include "rose_program.h"
45#include "rose_types.h"
46#include "validate_mask.h"
47#include "validate_shufti.h"
48#include "runtime.h"
49#include "util/compare.h"
50#include "util/copybytes.h"
51#include "util/fatbit.h"
52#include "util/multibit.h"
53
54/* Inline implementation follows. */
55
56static rose_inline
57void rosePushDelayedMatch(const struct RoseEngine *t,
58 struct hs_scratch *scratch, u32 delay,
59 u32 delay_index, u64a offset) {
60 assert(delay);
61
62 const u32 src_slot_index = delay;
63 u32 slot_index = (src_slot_index + offset) & DELAY_MASK;
64
65 struct RoseContext *tctxt = &scratch->tctxt;
66 if (offset + src_slot_index <= tctxt->delayLastEndOffset) {
67 DEBUG_PRINTF("skip too late\n");
68 return;
69 }
70
71 const u32 delay_count = t->delay_count;
72 struct fatbit **delaySlots = getDelaySlots(scratch);
73 struct fatbit *slot = delaySlots[slot_index];
74
75 DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index);
76 if (!(tctxt->filledDelayedSlots & (1U << slot_index))) {
77 tctxt->filledDelayedSlots |= 1U << slot_index;
78 fatbit_clear(slot);
79 }
80
81 fatbit_set(slot, delay_count, delay_index);
82}
83
84static rose_inline
85void recordAnchoredLiteralMatch(const struct RoseEngine *t,
86 struct hs_scratch *scratch, u32 anch_id,
87 u64a end) {
88 assert(end);
89
90 if (end <= t->floatingMinLiteralMatchOffset) {
91 return;
92 }
93
94 struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch);
95
96 DEBUG_PRINTF("record %u (of %u) @ %llu\n", anch_id, t->anchored_count, end);
97
98 if (!bf64_set(&scratch->al_log_sum, end - 1)) {
99 // first time, clear row
100 DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count);
101 fatbit_clear(anchoredLiteralRows[end - 1]);
102 }
103
104 assert(anch_id < t->anchored_count);
105 fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, anch_id);
106}
107
108static rose_inline
109char roseLeftfixCheckMiracles(const struct RoseEngine *t,
110 const struct LeftNfaInfo *left,
111 struct core_info *ci, struct mq *q, u64a end,
112 const char is_infix) {
113 if (!is_infix && left->transient) {
114 // Miracles won't help us with transient leftfix engines; they only
115 // scan for a limited time anyway.
116 return 1;
117 }
118
119 if (!left->stopTable) {
120 return 1;
121 }
122
123 DEBUG_PRINTF("looking for miracle on queue %u\n", q->nfa->queueIndex);
124
125 const s64a begin_loc = q_cur_loc(q);
126 const s64a end_loc = end - ci->buf_offset;
127
128 s64a miracle_loc;
129 if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) {
130 goto found_miracle;
131 }
132
133 if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc,
134 &miracle_loc)) {
135 goto found_miracle;
136 }
137
138 return 1;
139
140found_miracle:
141 DEBUG_PRINTF("miracle at %lld\n", miracle_loc);
142 assert(miracle_loc >= begin_loc);
143
144 // If we're a prefix, then a miracle effectively results in us needing to
145 // re-init our state and start fresh.
146 if (!is_infix) {
147 if (miracle_loc != begin_loc) {
148 DEBUG_PRINTF("re-init prefix state\n");
149 q->cur = q->end = 0;
150 pushQueueAt(q, 0, MQE_START, miracle_loc);
151 pushQueueAt(q, 1, MQE_TOP, miracle_loc);
152 nfaQueueInitState(q->nfa, q);
153 }
154 return 1;
155 }
156
157 // Otherwise, we're an infix. Remove tops before the miracle from the queue
158 // and re-init at that location.
159
160 q_skip_forward_to(q, miracle_loc);
161
162 if (q_last_type(q) == MQE_START) {
163 DEBUG_PRINTF("miracle caused infix to die\n");
164 return 0;
165 }
166
167 DEBUG_PRINTF("re-init infix state\n");
168 assert(q->items[q->cur].type == MQE_START);
169 q->items[q->cur].location = miracle_loc;
170 nfaQueueInitState(q->nfa, q);
171
172 return 1;
173}
174
175static rose_inline
176hwlmcb_rv_t roseTriggerSuffix(const struct RoseEngine *t,
177 struct hs_scratch *scratch, u32 qi, u32 top,
178 u64a som, u64a end) {
179 DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top);
180
181 struct core_info *ci = &scratch->core_info;
182 u8 *aa = getActiveLeafArray(t, ci->state);
183 const u32 aaCount = t->activeArrayCount;
184 const u32 qCount = t->queueCount;
185 struct mq *q = &scratch->queues[qi];
186 const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
187 const struct NFA *nfa = getNfaByInfo(t, info);
188
189 s64a loc = (s64a)end - ci->buf_offset;
190 assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen);
191
192 if (!mmbit_set(aa, aaCount, qi)) {
193 initQueue(q, qi, t, scratch);
194 nfaQueueInitState(nfa, q);
195 pushQueueAt(q, 0, MQE_START, loc);
196 fatbit_set(scratch->aqa, qCount, qi);
197 } else if (info->no_retrigger) {
198 DEBUG_PRINTF("yawn\n");
199 /* nfa only needs one top; we can go home now */
200 return HWLM_CONTINUE_MATCHING;
201 } else if (!fatbit_set(scratch->aqa, qCount, qi)) {
202 initQueue(q, qi, t, scratch);
203 loadStreamState(nfa, q, 0);
204 pushQueueAt(q, 0, MQE_START, 0);
205 } else if (isQueueFull(q)) {
206 DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi);
207 if (info->eod) {
208 /* can catch up suffix independently no pq */
209 q->context = NULL;
210 pushQueueNoMerge(q, MQE_END, loc);
211 nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX);
212 q->cur = q->end = 0;
213 pushQueueAt(q, 0, MQE_START, loc);
214 } else if (ensureQueueFlushed(t, scratch, qi, loc)
215 == HWLM_TERMINATE_MATCHING) {
216 return HWLM_TERMINATE_MATCHING;
217 }
218 }
219
220 assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID));
221 pushQueueSom(q, top, loc, som);
222
223 if (q_cur_loc(q) == (s64a)ci->len && !info->eod) {
224 /* we may not run the nfa; need to ensure state is fine */
225 DEBUG_PRINTF("empty run\n");
226 pushQueueNoMerge(q, MQE_END, loc);
227 char alive = nfaQueueExec(nfa, q, loc);
228 if (alive) {
229 q->cur = q->end = 0;
230 pushQueueAt(q, 0, MQE_START, loc);
231 } else {
232 mmbit_unset(aa, aaCount, qi);
233 fatbit_unset(scratch->aqa, qCount, qi);
234 }
235 }
236
237 return HWLM_CONTINUE_MATCHING;
238}
239
240static really_inline
241char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch,
242 u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end,
243 const char is_infix) {
244 struct core_info *ci = &scratch->core_info;
245
246 u32 ri = queueToLeftIndex(t, qi);
247 const struct LeftNfaInfo *left = getLeftTable(t) + ri;
248
249 DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n",
250 (left->transient ? "transient" : "active"),
251 (is_infix ? "infix" : "prefix"),
252 ri, qi, leftfixLag, left->maxLag);
253
254 assert(leftfixLag <= left->maxLag);
255 assert(left->infix == is_infix);
256 assert(!is_infix || !left->transient); // Only prefixes can be transient.
257
258 struct mq *q = scratch->queues + qi;
259 char *state = scratch->core_info.state;
260 u8 *activeLeftArray = getActiveLeftArray(t, state);
261 u32 qCount = t->queueCount;
262 u32 arCount = t->activeLeftCount;
263
264 if (!mmbit_isset(activeLeftArray, arCount, ri)) {
265 DEBUG_PRINTF("engine is dead nothing to see here\n");
266 return 0;
267 }
268
269 if (unlikely(end < leftfixLag)) {
270 assert(0); /* lag is the literal length */
271 return 0;
272 }
273
274 if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset
275 && !fatbit_isset(scratch->aqa, qCount, qi)
276 && isZombie(t, state, left)) {
277 DEBUG_PRINTF("zombie\n");
278 return 1;
279 }
280
281 if (!fatbit_set(scratch->aqa, qCount, qi)) {
282 DEBUG_PRINTF("initing q %u\n", qi);
283 initRoseQueue(t, qi, left, scratch);
284 if (ci->buf_offset) { // there have been writes before us!
285 s32 sp;
286 if (!is_infix && left->transient) {
287 sp = -(s32)ci->hlen;
288 } else {
289 sp = -(s32)loadRoseDelay(t, state, left);
290 }
291
292 /* transient nfas are always started fresh -> state not maintained
293 * at stream boundary */
294
295 pushQueueAt(q, 0, MQE_START, sp);
296 if (is_infix || (ci->buf_offset + sp > 0 && !left->transient)) {
297 loadStreamState(q->nfa, q, sp);
298 } else {
299 pushQueueAt(q, 1, MQE_TOP, sp);
300 nfaQueueInitState(q->nfa, q);
301 }
302 } else { // first write ever
303 pushQueueAt(q, 0, MQE_START, 0);
304 pushQueueAt(q, 1, MQE_TOP, 0);
305 nfaQueueInitState(q->nfa, q);
306 }
307 }
308
309 s64a loc = (s64a)end - ci->buf_offset - leftfixLag;
310 assert(loc >= q_cur_loc(q) || left->eager);
311 assert(leftfixReport != MO_INVALID_IDX);
312
313 if (!is_infix && left->transient) {
314 s64a start_loc = loc - left->transient;
315 if (q_cur_loc(q) < start_loc) {
316 q->cur = q->end = 0;
317 pushQueueAt(q, 0, MQE_START, start_loc);
318 pushQueueAt(q, 1, MQE_TOP, start_loc);
319 nfaQueueInitState(q->nfa, q);
320 }
321 }
322
323 if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) {
324 if (is_infix) {
325 if (infixTooOld(q, loc)) {
326 DEBUG_PRINTF("infix %u died of old age\n", ri);
327 goto nfa_dead;
328 }
329
330 reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth);
331 }
332
333 if (!roseLeftfixCheckMiracles(t, left, ci, q, end, is_infix)) {
334 DEBUG_PRINTF("leftfix %u died due to miracle\n", ri);
335 goto nfa_dead;
336 }
337
338#ifdef DEBUG
339 debugQueue(q);
340#endif
341
342 pushQueueNoMerge(q, MQE_END, loc);
343
344 char rv = nfaQueueExecRose(q->nfa, q, leftfixReport);
345 if (!rv) { /* nfa is dead */
346 DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri);
347 goto nfa_dead;
348 }
349
350 // Queue must have next start loc before we call nfaInAcceptState.
351 q->cur = q->end = 0;
352 pushQueueAt(q, 0, MQE_START, loc);
353
354 DEBUG_PRINTF("checking for report %u\n", leftfixReport);
355 DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
356 return rv == MO_MATCHES_PENDING;
357 } else if (q_cur_loc(q) > loc) {
358 /* an eager leftfix may have already progressed past loc if there is no
359 * match at loc. */
360 assert(left->eager);
361 return 0;
362 } else {
363 assert(q_cur_loc(q) == loc);
364 DEBUG_PRINTF("checking for report %u\n", leftfixReport);
365 char rv = nfaInAcceptState(q->nfa, leftfixReport, q);
366 DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
367 return rv;
368 }
369
370nfa_dead:
371 mmbit_unset(activeLeftArray, arCount, ri);
372 scratch->tctxt.groups &= left->squash_mask;
373 return 0;
374}
375
376static rose_inline
377char roseTestPrefix(const struct RoseEngine *t, struct hs_scratch *scratch,
378 u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) {
379 return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 0);
380}
381
382static rose_inline
383char roseTestInfix(const struct RoseEngine *t, struct hs_scratch *scratch,
384 u32 qi, u32 leftfixLag, ReportID leftfixReport, u64a end) {
385 return roseTestLeftfix(t, scratch, qi, leftfixLag, leftfixReport, end, 1);
386}
387
388static rose_inline
389void roseTriggerInfix(const struct RoseEngine *t, struct hs_scratch *scratch,
390 u64a start, u64a end, u32 qi, u32 topEvent, u8 cancel) {
391 struct core_info *ci = &scratch->core_info;
392 s64a loc = (s64a)end - ci->buf_offset;
393
394 u32 ri = queueToLeftIndex(t, qi);
395 assert(topEvent < MQE_INVALID);
396
397 const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi);
398 assert(!left->transient);
399
400 DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent);
401
402 struct mq *q = scratch->queues + qi;
403 const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
404
405 char *state = ci->state;
406 u8 *activeLeftArray = getActiveLeftArray(t, state);
407 const u32 arCount = t->activeLeftCount;
408 char alive = mmbit_set(activeLeftArray, arCount, ri);
409
410 if (alive && info->no_retrigger) {
411 DEBUG_PRINTF("yawn\n");
412 return;
413 }
414
415 struct fatbit *aqa = scratch->aqa;
416 const u32 qCount = t->queueCount;
417
418 if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset &&
419 !fatbit_isset(aqa, qCount, qi) && isZombie(t, state, left)) {
420 DEBUG_PRINTF("yawn - zombie\n");
421 return;
422 }
423
424 if (cancel) {
425 DEBUG_PRINTF("dominating top: (re)init\n");
426 fatbit_set(aqa, qCount, qi);
427 initRoseQueue(t, qi, left, scratch);
428 pushQueueAt(q, 0, MQE_START, loc);
429 nfaQueueInitState(q->nfa, q);
430 } else if (!fatbit_set(aqa, qCount, qi)) {
431 DEBUG_PRINTF("initing %u\n", qi);
432 initRoseQueue(t, qi, left, scratch);
433 if (alive) {
434 s32 sp = -(s32)loadRoseDelay(t, state, left);
435 pushQueueAt(q, 0, MQE_START, sp);
436 loadStreamState(q->nfa, q, sp);
437 } else {
438 pushQueueAt(q, 0, MQE_START, loc);
439 nfaQueueInitState(q->nfa, q);
440 }
441 } else if (!alive) {
442 q->cur = q->end = 0;
443 pushQueueAt(q, 0, MQE_START, loc);
444 nfaQueueInitState(q->nfa, q);
445 } else if (isQueueFull(q)) {
446 reduceInfixQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth);
447
448 if (isQueueFull(q)) {
449 /* still full - reduceInfixQueue did nothing */
450 DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi,
451 q->end - q->cur);
452 pushQueueNoMerge(q, MQE_END, loc);
453 nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX);
454
455 q->cur = q->end = 0;
456 pushQueueAt(q, 0, MQE_START, loc);
457 }
458 }
459
460 pushQueueSom(q, topEvent, loc, start);
461}
462
463static rose_inline
464hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch,
465 u64a end, ReportID onmatch, s32 offset_adjust,
466 u32 ekey) {
467 DEBUG_PRINTF("firing callback onmatch=%u, end=%llu\n", onmatch, end);
468 updateLastMatchOffset(&scratch->tctxt, end);
469
470 int cb_rv = roseDeliverReport(end, onmatch, offset_adjust, scratch, ekey);
471 if (cb_rv == MO_HALT_MATCHING) {
472 DEBUG_PRINTF("termination requested\n");
473 return HWLM_TERMINATE_MATCHING;
474 }
475
476 if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
477 return HWLM_CONTINUE_MATCHING;
478 }
479
480 return roseHaltIfExhausted(t, scratch);
481}
482
483/* catches up engines enough to ensure any earlier mpv triggers are enqueued
484 * and then adds the trigger to the mpv queue. */
485static rose_inline
486hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t,
487 struct hs_scratch *scratch,
488 u32 event, u64a top_squash_distance,
489 u64a end, const char in_catchup) {
490 if (!in_catchup &&
491 roseCatchUpMpvFeeders(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
492 return HWLM_TERMINATE_MATCHING;
493 }
494 return roseHandleChainMatch(t, scratch, event, top_squash_distance, end,
495 in_catchup);
496}
497
498static rose_inline
499void roseHandleSom(struct hs_scratch *scratch, const struct som_operation *sr,
500 u64a end) {
501 DEBUG_PRINTF("end=%llu, minMatchOffset=%llu\n", end,
502 scratch->tctxt.minMatchOffset);
503
504 updateLastMatchOffset(&scratch->tctxt, end);
505 handleSomInternal(scratch, sr, end);
506}
507
508static rose_inline
509hwlmcb_rv_t roseReportSom(const struct RoseEngine *t,
510 struct hs_scratch *scratch, u64a start, u64a end,
511 ReportID onmatch, s32 offset_adjust, u32 ekey) {
512 DEBUG_PRINTF("firing som callback onmatch=%u, start=%llu, end=%llu\n",
513 onmatch, start, end);
514 updateLastMatchOffset(&scratch->tctxt, end);
515
516 int cb_rv = roseDeliverSomReport(start, end, onmatch, offset_adjust,
517 scratch, ekey);
518 if (cb_rv == MO_HALT_MATCHING) {
519 DEBUG_PRINTF("termination requested\n");
520 return HWLM_TERMINATE_MATCHING;
521 }
522
523 if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
524 return HWLM_CONTINUE_MATCHING;
525 }
526
527 return roseHaltIfExhausted(t, scratch);
528}
529
530static rose_inline
531void roseHandleSomSom(struct hs_scratch *scratch,
532 const struct som_operation *sr, u64a start, u64a end) {
533 DEBUG_PRINTF("start=%llu, end=%llu, minMatchOffset=%llu\n", start, end,
534 scratch->tctxt.minMatchOffset);
535
536 updateLastMatchOffset(&scratch->tctxt, end);
537 setSomFromSomAware(scratch, sr, start, end);
538}
539
540static rose_inline
541hwlmcb_rv_t roseSetExhaust(const struct RoseEngine *t,
542 struct hs_scratch *scratch, u32 ekey) {
543 assert(scratch);
544 assert(scratch->magic == SCRATCH_MAGIC);
545
546 struct core_info *ci = &scratch->core_info;
547
548 assert(!can_stop_matching(scratch));
549 assert(!isExhausted(ci->rose, ci->exhaustionVector, ekey));
550
551 markAsMatched(ci->rose, ci->exhaustionVector, ekey);
552
553 return roseHaltIfExhausted(t, scratch);
554}
555
556static really_inline
557int reachHasBit(const u8 *reach, u8 c) {
558 return !!(reach[c / 8U] & (u8)1U << (c % 8U));
559}
560
561/*
562 * Generate a 8-byte valid_mask with #high bytes 0 from the highest side
563 * and #low bytes 0 from the lowest side
564 * and (8 - high - low) bytes '0xff' in the middle.
565 */
566static rose_inline
567u64a generateValidMask(const s32 high, const s32 low) {
568 assert(high + low < 8);
569 DEBUG_PRINTF("high %d low %d\n", high, low);
570 const u64a ones = ~0ull;
571 return (ones << ((high + low) * 8)) >> (high * 8);
572}
573
574/*
575 * Do the single-byte check if only one lookaround entry exists
576 * and it's a single mask.
577 * Return success if the byte is in the future or before history
578 * (offset is greater than (history) buffer length).
579 */
580static rose_inline
581int roseCheckByte(const struct core_info *ci, u8 and_mask, u8 cmp_mask,
582 u8 negation, s32 checkOffset, u64a end) {
583 DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
584 ci->buf_offset, ci->buf_offset + ci->len);
585 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
586 DEBUG_PRINTF("too early, fail\n");
587 return 0;
588 }
589
590 const s64a base_offset = end - ci->buf_offset;
591 s64a offset = base_offset + checkOffset;
592 DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset);
593 u8 c;
594 if (offset >= 0) {
595 if (offset >= (s64a)ci->len) {
596 DEBUG_PRINTF("in the future\n");
597 return 1;
598 } else {
599 assert(offset < (s64a)ci->len);
600 DEBUG_PRINTF("check byte in buffer\n");
601 c = ci->buf[offset];
602 }
603 } else {
604 if (offset >= -(s64a) ci->hlen) {
605 DEBUG_PRINTF("check byte in history\n");
606 c = ci->hbuf[ci->hlen + offset];
607 } else {
608 DEBUG_PRINTF("before history and return\n");
609 return 1;
610 }
611 }
612
613 if (((and_mask & c) != cmp_mask) ^ negation) {
614 DEBUG_PRINTF("char 0x%02x at offset %lld failed byte check\n",
615 c, offset);
616 return 0;
617 }
618
619 DEBUG_PRINTF("real offset=%lld char=%02x\n", offset, c);
620 DEBUG_PRINTF("OK :)\n");
621 return 1;
622}
623
624static rose_inline
625int roseCheckMask(const struct core_info *ci, u64a and_mask, u64a cmp_mask,
626 u64a neg_mask, s32 checkOffset, u64a end) {
627 const s64a base_offset = (s64a)end - ci->buf_offset;
628 s64a offset = base_offset + checkOffset;
629 DEBUG_PRINTF("rel offset %lld\n",base_offset);
630 DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
631 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
632 DEBUG_PRINTF("too early, fail\n");
633 return 0;
634 }
635
636 u64a data = 0;
637 u64a valid_data_mask = ~0ULL; // mask for validate check.
638 //A 0xff byte means that this byte is in the buffer.
639 s32 shift_l = 0; // size of bytes in the future.
640 s32 shift_r = 0; // size of bytes before the history.
641 s32 h_len = 0; // size of bytes in the history buffer.
642 s32 c_len = 8; // size of bytes in the current buffer.
643 if (offset < 0) {
644 // in or before history buffer.
645 if (offset + 8 <= -(s64a)ci->hlen) {
646 DEBUG_PRINTF("before history and return\n");
647 return 1;
648 }
649 const u8 *h_start = ci->hbuf; // start pointer in history buffer.
650 if (offset < -(s64a)ci->hlen) {
651 // some bytes are before history.
652 shift_r = -(offset + (s64a)ci->hlen);
653 DEBUG_PRINTF("shift_r %d", shift_r);
654 } else {
655 h_start += ci->hlen + offset;
656 }
657 if (offset + 7 < 0) {
658 DEBUG_PRINTF("all in history buffer\n");
659 data = partial_load_u64a(h_start, 8 - shift_r);
660 } else {
661 // history part
662 c_len = offset + 8;
663 h_len = -offset - shift_r;
664 DEBUG_PRINTF("%d bytes in history\n", h_len);
665 s64a data_h = 0;
666 data_h = partial_load_u64a(h_start, h_len);
667 // current part
668 if (c_len > (s64a)ci->len) {
669 shift_l = c_len - ci->len;
670 c_len = ci->len;
671 }
672 data = partial_load_u64a(ci->buf, c_len);
673 data <<= h_len << 3;
674 data |= data_h;
675 }
676 if (shift_r) {
677 data <<= shift_r << 3;
678 }
679 } else {
680 // current buffer.
681 if (offset + c_len > (s64a)ci->len) {
682 if (offset >= (s64a)ci->len) {
683 DEBUG_PRINTF("all in the future\n");
684 return 1;
685 }
686 // some bytes in the future.
687 shift_l = offset + c_len - ci->len;
688 c_len = ci->len - offset;
689 data = partial_load_u64a(ci->buf + offset, c_len);
690 } else {
691 data = unaligned_load_u64a(ci->buf + offset);
692 }
693 }
694
695 if (shift_l || shift_r) {
696 valid_data_mask = generateValidMask(shift_l, shift_r);
697 }
698 DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask);
699
700 if (validateMask(data, valid_data_mask,
701 and_mask, cmp_mask, neg_mask)) {
702 DEBUG_PRINTF("check mask successfully\n");
703 return 1;
704 } else {
705 return 0;
706 }
707}
708
709static rose_inline
710int roseCheckMask32(const struct core_info *ci, const u8 *and_mask,
711 const u8 *cmp_mask, const u32 neg_mask,
712 s32 checkOffset, u64a end) {
713 const s64a base_offset = (s64a)end - ci->buf_offset;
714 s64a offset = base_offset + checkOffset;
715 DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
716 DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
717
718 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
719 DEBUG_PRINTF("too early, fail\n");
720 return 0;
721 }
722
723 m256 data = zeroes256(); // consists of the following four parts.
724 s32 c_shift = 0; // blank bytes after current.
725 s32 h_shift = 0; // blank bytes before history.
726 s32 h_len = 32; // number of bytes from history buffer.
727 s32 c_len = 0; // number of bytes from current buffer.
728 /* h_shift + h_len + c_len + c_shift = 32 need to be hold.*/
729
730 if (offset < 0) {
731 s32 h_offset = 0; // the start offset in history buffer.
732 if (offset < -(s64a)ci->hlen) {
733 if (offset + 32 <= -(s64a)ci->hlen) {
734 DEBUG_PRINTF("all before history\n");
735 return 1;
736 }
737 h_shift = -(offset + (s64a)ci->hlen);
738 h_len = 32 - h_shift;
739 } else {
740 h_offset = ci->hlen + offset;
741 }
742 if (offset + 32 > 0) {
743 // part in current buffer.
744 c_len = offset + 32;
745 h_len = -(offset + h_shift);
746 if (c_len > (s64a)ci->len) {
747 // out of current buffer.
748 c_shift = c_len - ci->len;
749 c_len = ci->len;
750 }
751 copy_upto_32_bytes((u8 *)&data - offset, ci->buf, c_len);
752 }
753 assert(h_shift + h_len + c_len + c_shift == 32);
754 copy_upto_32_bytes((u8 *)&data + h_shift, ci->hbuf + h_offset, h_len);
755 } else {
756 if (offset + 32 > (s64a)ci->len) {
757 if (offset >= (s64a)ci->len) {
758 DEBUG_PRINTF("all in the future.\n");
759 return 1;
760 }
761 c_len = ci->len - offset;
762 c_shift = 32 - c_len;
763 copy_upto_32_bytes((u8 *)&data, ci->buf + offset, c_len);
764 } else {
765 data = loadu256(ci->buf + offset);
766 }
767 }
768 DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
769 DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
770 // we use valid_data_mask to blind bytes before history/in the future.
771 u32 valid_data_mask;
772 valid_data_mask = (~0u) << (h_shift + c_shift) >> (c_shift);
773
774 m256 and_mask_m256 = loadu256(and_mask);
775 m256 cmp_mask_m256 = loadu256(cmp_mask);
776 if (validateMask32(data, valid_data_mask, and_mask_m256,
777 cmp_mask_m256, neg_mask)) {
778 DEBUG_PRINTF("Mask32 passed\n");
779 return 1;
780 }
781 return 0;
782}
783
784// get 128/256 bits data from history and current buffer.
785// return data and valid_data_mask.
786static rose_inline
787u32 getBufferDataComplex(const struct core_info *ci, const s64a loc,
788 u8 *data, const u32 data_len) {
789 assert(data_len == 16 || data_len == 32);
790 s32 c_shift = 0; // blank bytes after current.
791 s32 h_shift = 0; // blank bytes before history.
792 s32 h_len = data_len; // number of bytes from history buffer.
793 s32 c_len = 0; // number of bytes from current buffer.
794 if (loc < 0) {
795 s32 h_offset = 0; // the start offset in history buffer.
796 if (loc < -(s64a)ci->hlen) {
797 if (loc + data_len <= -(s64a)ci->hlen) {
798 DEBUG_PRINTF("all before history\n");
799 return 0;
800 }
801 h_shift = -(loc + (s64a)ci->hlen);
802 h_len = data_len - h_shift;
803 } else {
804 h_offset = ci->hlen + loc;
805 }
806 if (loc + data_len > 0) {
807 // part in current buffer.
808 c_len = loc + data_len;
809 h_len = -(loc + h_shift);
810 if (c_len > (s64a)ci->len) {
811 // out of current buffer.
812 c_shift = c_len - ci->len;
813 c_len = ci->len;
814 }
815 copy_upto_32_bytes(data - loc, ci->buf, c_len);
816 }
817 assert(h_shift + h_len + c_len + c_shift == (s32)data_len);
818 copy_upto_32_bytes(data + h_shift, ci->hbuf + h_offset, h_len);
819 } else {
820 if (loc + data_len > (s64a)ci->len) {
821 if (loc >= (s64a)ci->len) {
822 DEBUG_PRINTF("all in the future.\n");
823 return 0;
824 }
825 c_len = ci->len - loc;
826 c_shift = data_len - c_len;
827 copy_upto_32_bytes(data, ci->buf + loc, c_len);
828 } else {
829 if (data_len == 16) {
830 storeu128(data, loadu128(ci->buf + loc));
831 return 0xffff;
832 } else {
833 storeu256(data, loadu256(ci->buf + loc));
834 return 0xffffffff;
835 }
836 }
837 }
838 DEBUG_PRINTF("h_shift %d c_shift %d\n", h_shift, c_shift);
839 DEBUG_PRINTF("h_len %d c_len %d\n", h_len, c_len);
840
841 if (data_len == 16) {
842 return (u16)(0xffff << (h_shift + c_shift)) >> c_shift;
843 } else {
844 return (~0u) << (h_shift + c_shift) >> c_shift;
845 }
846}
847
848static rose_inline
849m128 getData128(const struct core_info *ci, s64a offset, u32 *valid_data_mask) {
850 if (offset > 0 && offset + sizeof(m128) <= ci->len) {
851 *valid_data_mask = 0xffff;
852 return loadu128(ci->buf + offset);
853 }
854 ALIGN_DIRECTIVE u8 data[sizeof(m128)];
855 *valid_data_mask = getBufferDataComplex(ci, offset, data, 16);
856 return *(m128 *)data;
857}
858
859static rose_inline
860m256 getData256(const struct core_info *ci, s64a offset, u32 *valid_data_mask) {
861 if (offset > 0 && offset + sizeof(m256) <= ci->len) {
862 *valid_data_mask = ~0u;
863 return loadu256(ci->buf + offset);
864 }
865 ALIGN_AVX_DIRECTIVE u8 data[sizeof(m256)];
866 *valid_data_mask = getBufferDataComplex(ci, offset, data, 32);
867 return *(m256 *)data;
868}
869
870static rose_inline
871int roseCheckShufti16x8(const struct core_info *ci, const u8 *nib_mask,
872 const u8 *bucket_select_mask, u32 neg_mask,
873 s32 checkOffset, u64a end) {
874 const s64a base_offset = (s64a)end - ci->buf_offset;
875 s64a offset = base_offset + checkOffset;
876 DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
877 DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
878
879 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
880 DEBUG_PRINTF("too early, fail\n");
881 return 0;
882 }
883
884 u32 valid_data_mask = 0;
885 m128 data = getData128(ci, offset, &valid_data_mask);
886 if (unlikely(!valid_data_mask)) {
887 return 1;
888 }
889
890 m256 nib_mask_m256 = loadu256(nib_mask);
891 m128 bucket_select_mask_m128 = loadu128(bucket_select_mask);
892 if (validateShuftiMask16x8(data, nib_mask_m256,
893 bucket_select_mask_m128,
894 neg_mask, valid_data_mask)) {
895 DEBUG_PRINTF("check shufti 16x8 successfully\n");
896 return 1;
897 } else {
898 return 0;
899 }
900}
901
902static rose_inline
903int roseCheckShufti16x16(const struct core_info *ci, const u8 *hi_mask,
904 const u8 *lo_mask, const u8 *bucket_select_mask,
905 u32 neg_mask, s32 checkOffset, u64a end) {
906 const s64a base_offset = (s64a)end - ci->buf_offset;
907 s64a offset = base_offset + checkOffset;
908 DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
909 DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
910
911 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
912 DEBUG_PRINTF("too early, fail\n");
913 return 0;
914 }
915
916 u32 valid_data_mask = 0;
917 m128 data = getData128(ci, offset, &valid_data_mask);
918 if (unlikely(!valid_data_mask)) {
919 return 1;
920 }
921
922 m256 data_m256 = set2x128(data);
923 m256 hi_mask_m256 = loadu256(hi_mask);
924 m256 lo_mask_m256 = loadu256(lo_mask);
925 m256 bucket_select_mask_m256 = loadu256(bucket_select_mask);
926 if (validateShuftiMask16x16(data_m256, hi_mask_m256, lo_mask_m256,
927 bucket_select_mask_m256,
928 neg_mask, valid_data_mask)) {
929 DEBUG_PRINTF("check shufti 16x16 successfully\n");
930 return 1;
931 } else {
932 return 0;
933 }
934}
935
936static rose_inline
937int roseCheckShufti32x8(const struct core_info *ci, const u8 *hi_mask,
938 const u8 *lo_mask, const u8 *bucket_select_mask,
939 u32 neg_mask, s32 checkOffset, u64a end) {
940 const s64a base_offset = (s64a)end - ci->buf_offset;
941 s64a offset = base_offset + checkOffset;
942 DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
943 DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
944
945 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
946 DEBUG_PRINTF("too early, fail\n");
947 return 0;
948 }
949
950 u32 valid_data_mask = 0;
951 m256 data = getData256(ci, offset, &valid_data_mask);
952 if (unlikely(!valid_data_mask)) {
953 return 1;
954 }
955
956 m128 hi_mask_m128 = loadu128(hi_mask);
957 m128 lo_mask_m128 = loadu128(lo_mask);
958 m256 hi_mask_m256 = set2x128(hi_mask_m128);
959 m256 lo_mask_m256 = set2x128(lo_mask_m128);
960 m256 bucket_select_mask_m256 = loadu256(bucket_select_mask);
961 if (validateShuftiMask32x8(data, hi_mask_m256, lo_mask_m256,
962 bucket_select_mask_m256,
963 neg_mask, valid_data_mask)) {
964 DEBUG_PRINTF("check shufti 32x8 successfully\n");
965 return 1;
966 } else {
967 return 0;
968 }
969}
970
971static rose_inline
972int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask,
973 const u8 *lo_mask, const u8 *bucket_select_mask_hi,
974 const u8 *bucket_select_mask_lo, u32 neg_mask,
975 s32 checkOffset, u64a end) {
976 const s64a base_offset = (s64a)end - ci->buf_offset;
977 s64a offset = base_offset + checkOffset;
978 DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
979 DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
980
981 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
982 DEBUG_PRINTF("too early, fail\n");
983 return 0;
984 }
985
986 u32 valid_data_mask = 0;
987 m256 data = getData256(ci, offset, &valid_data_mask);
988 if (unlikely(!valid_data_mask)) {
989 return 1;
990 }
991
992 m256 hi_mask_1 = loadu2x128(hi_mask);
993 m256 hi_mask_2 = loadu2x128(hi_mask + 16);
994 m256 lo_mask_1 = loadu2x128(lo_mask);
995 m256 lo_mask_2 = loadu2x128(lo_mask + 16);
996
997 m256 bucket_mask_hi = loadu256(bucket_select_mask_hi);
998 m256 bucket_mask_lo = loadu256(bucket_select_mask_lo);
999 if (validateShuftiMask32x16(data, hi_mask_1, hi_mask_2,
1000 lo_mask_1, lo_mask_2, bucket_mask_hi,
1001 bucket_mask_lo, neg_mask, valid_data_mask)) {
1002 DEBUG_PRINTF("check shufti 32x16 successfully\n");
1003 return 1;
1004 } else {
1005 return 0;
1006 }
1007}
1008
1009static rose_inline
1010int roseCheckSingleLookaround(const struct RoseEngine *t,
1011 const struct hs_scratch *scratch,
1012 s8 checkOffset, u32 lookaroundReachIndex,
1013 u64a end) {
1014 assert(lookaroundReachIndex != MO_INVALID_IDX);
1015 const struct core_info *ci = &scratch->core_info;
1016 DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
1017 ci->buf_offset, ci->buf_offset + ci->len);
1018
1019 const s64a base_offset = end - ci->buf_offset;
1020 const s64a offset = base_offset + checkOffset;
1021 DEBUG_PRINTF("base_offset=%lld\n", base_offset);
1022 DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset);
1023
1024 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
1025 DEBUG_PRINTF("too early, fail\n");
1026 return 0;
1027 }
1028
1029 const u8 *reach = getByOffset(t, lookaroundReachIndex);
1030
1031 u8 c;
1032 if (offset >= 0 && offset < (s64a)ci->len) {
1033 c = ci->buf[offset];
1034 } else if (offset < 0 && offset >= -(s64a)ci->hlen) {
1035 c = ci->hbuf[ci->hlen + offset];
1036 } else {
1037 return 1;
1038 }
1039
1040 if (!reachHasBit(reach, c)) {
1041 DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
1042 return 0;
1043 }
1044
1045 DEBUG_PRINTF("OK :)\n");
1046 return 1;
1047}
1048
1049/**
1050 * \brief Scan around a literal, checking that that "lookaround" reach masks
1051 * are satisfied.
1052 */
1053static rose_inline
1054int roseCheckLookaround(const struct RoseEngine *t,
1055 const struct hs_scratch *scratch,
1056 u32 lookaroundLookIndex, u32 lookaroundReachIndex,
1057 u32 lookaroundCount, u64a end) {
1058 assert(lookaroundLookIndex != MO_INVALID_IDX);
1059 assert(lookaroundReachIndex != MO_INVALID_IDX);
1060 assert(lookaroundCount > 0);
1061
1062 const struct core_info *ci = &scratch->core_info;
1063 DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
1064 ci->buf_offset, ci->buf_offset + ci->len);
1065
1066 const s8 *look = getByOffset(t, lookaroundLookIndex);
1067 const s8 *look_end = look + lookaroundCount;
1068 assert(look < look_end);
1069
1070 const u8 *reach = getByOffset(t, lookaroundReachIndex);
1071
1072 // The following code assumes that the lookaround structures are ordered by
1073 // increasing offset.
1074
1075 const s64a base_offset = end - ci->buf_offset;
1076 DEBUG_PRINTF("base_offset=%lld\n", base_offset);
1077 DEBUG_PRINTF("first look has offset %d\n", *look);
1078
1079 // If our first check tells us we need to look at an offset before the
1080 // start of the stream, this role cannot match.
1081 if (unlikely(*look < 0 && (u64a)(0 - *look) > end)) {
1082 DEBUG_PRINTF("too early, fail\n");
1083 return 0;
1084 }
1085
1086 // Skip over offsets that are before the history buffer.
1087 do {
1088 s64a offset = base_offset + *look;
1089 if (offset >= -(s64a)ci->hlen) {
1090 goto in_history;
1091 }
1092 DEBUG_PRINTF("look=%d before history\n", *look);
1093 look++;
1094 reach += REACH_BITVECTOR_LEN;
1095 } while (look < look_end);
1096
1097 // History buffer.
1098 DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look);
1099 for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) {
1100 in_history:
1101 ;
1102 s64a offset = base_offset + *look;
1103 DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset);
1104
1105 if (offset >= 0) {
1106 DEBUG_PRINTF("in buffer\n");
1107 goto in_buffer;
1108 }
1109
1110 assert(offset >= -(s64a)ci->hlen && offset < 0);
1111 u8 c = ci->hbuf[ci->hlen + offset];
1112 if (!reachHasBit(reach, c)) {
1113 DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
1114 return 0;
1115 }
1116 }
1117 // Current buffer.
1118 DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look);
1119 for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) {
1120 in_buffer:
1121 ;
1122 s64a offset = base_offset + *look;
1123 DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset);
1124
1125 if (offset >= (s64a)ci->len) {
1126 DEBUG_PRINTF("in the future\n");
1127 break;
1128 }
1129
1130 assert(offset >= 0 && offset < (s64a)ci->len);
1131 u8 c = ci->buf[offset];
1132 if (!reachHasBit(reach, c)) {
1133 DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
1134 return 0;
1135 }
1136 }
1137
1138 DEBUG_PRINTF("OK :)\n");
1139 return 1;
1140}
1141
1142/**
1143 * \brief Trying to find a matching path by the corresponding path mask of
1144 * every lookaround location.
1145 */
1146static rose_inline
1147int roseMultipathLookaround(const struct RoseEngine *t,
1148 const struct hs_scratch *scratch,
1149 u32 multipathLookaroundLookIndex,
1150 u32 multipathLookaroundReachIndex,
1151 u32 multipathLookaroundCount,
1152 s32 last_start, const u8 *start_mask,
1153 u64a end) {
1154 assert(multipathLookaroundCount > 0);
1155
1156 const struct core_info *ci = &scratch->core_info;
1157 DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
1158 ci->buf_offset, ci->buf_offset + ci->len);
1159
1160 const s8 *look = getByOffset(t, multipathLookaroundLookIndex);
1161 const s8 *look_end = look + multipathLookaroundCount;
1162 assert(look < look_end);
1163
1164 const u8 *reach = getByOffset(t, multipathLookaroundReachIndex);
1165
1166 const s64a base_offset = (s64a)end - ci->buf_offset;
1167 DEBUG_PRINTF("base_offset=%lld\n", base_offset);
1168
1169 u8 path = 0xff;
1170
1171 assert(last_start < 0);
1172
1173 if (unlikely((u64a)(0 - last_start) > end)) {
1174 DEBUG_PRINTF("too early, fail\n");
1175 return 0;
1176 }
1177
1178 s8 base_look_offset = *look;
1179 do {
1180 s64a offset = base_offset + *look;
1181 u32 start_offset = (u32)(*look - base_look_offset);
1182 DEBUG_PRINTF("start_mask[%u] = %x\n", start_offset,
1183 start_mask[start_offset]);
1184 path = start_mask[start_offset];
1185 if (offset >= -(s64a)ci->hlen) {
1186 break;
1187 }
1188 DEBUG_PRINTF("look=%d before history\n", *look);
1189 look++;
1190 reach += MULTI_REACH_BITVECTOR_LEN;
1191 } while (look < look_end);
1192
1193 DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look);
1194 for (; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) {
1195 s64a offset = base_offset + *look;
1196 DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset);
1197
1198 if (offset >= 0) {
1199 DEBUG_PRINTF("in buffer\n");
1200 break;
1201 }
1202
1203 assert(offset >= -(s64a)ci->hlen && offset < 0);
1204 u8 c = ci->hbuf[ci->hlen + offset];
1205 path &= reach[c];
1206 DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path);
1207 if (!path) {
1208 DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
1209 return 0;
1210 }
1211 }
1212
1213 DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look);
1214 for(; look < look_end; ++look, reach += MULTI_REACH_BITVECTOR_LEN) {
1215 s64a offset = base_offset + *look;
1216 DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset);
1217
1218 if (offset >= (s64a)ci->len) {
1219 DEBUG_PRINTF("in the future\n");
1220 break;
1221 }
1222
1223 assert(offset >= 0 && offset < (s64a)ci->len);
1224 u8 c = ci->buf[offset];
1225 path &= reach[c];
1226 DEBUG_PRINTF("reach[%x] = %02x path = %0xx\n", c, reach[c], path);
1227 if (!path) {
1228 DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
1229 return 0;
1230 }
1231 }
1232
1233 DEBUG_PRINTF("OK :)\n");
1234 return 1;
1235}
1236
1237static never_inline
1238int roseCheckMultipathShufti16x8(const struct hs_scratch *scratch,
1239 const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 *ri,
1240 u64a end) {
1241 const struct core_info *ci = &scratch->core_info;
1242 s32 checkOffset = ri->base_offset;
1243 const s64a base_offset = (s64a)end - ci->buf_offset;
1244 s64a offset = base_offset + checkOffset;
1245 DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
1246 DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
1247
1248 assert(ri->last_start <= 0);
1249 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
1250 if ((u64a)(0 - ri->last_start) > end) {
1251 DEBUG_PRINTF("too early, fail\n");
1252 return 0;
1253 }
1254 }
1255
1256 u32 valid_data_mask;
1257 m128 data_init = getData128(ci, offset, &valid_data_mask);
1258 m128 data_select_mask = loadu128(ri->data_select_mask);
1259
1260 u32 valid_path_mask = 0;
1261 if (unlikely(!(valid_data_mask & 1))) {
1262 DEBUG_PRINTF("lose part of backward data\n");
1263 DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask);
1264
1265 m128 expand_valid;
1266 u64a expand_mask = 0x8080808080808080ULL;
1267 u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask);
1268 u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
1269 DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
1270 DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
1271 expand_valid = set64x2(valid_hi, valid_lo);
1272 valid_path_mask = ~movemask128(pshufb_m128(expand_valid,
1273 data_select_mask));
1274 }
1275
1276 m128 data = pshufb_m128(data_init, data_select_mask);
1277 m256 nib_mask = loadu256(ri->nib_mask);
1278 m128 bucket_select_mask = loadu128(ri->bucket_select_mask);
1279
1280 u32 hi_bits_mask = ri->hi_bits_mask;
1281 u32 lo_bits_mask = ri->lo_bits_mask;
1282 u32 neg_mask = ri->neg_mask;
1283
1284 if (validateMultipathShuftiMask16x8(data, nib_mask,
1285 bucket_select_mask,
1286 hi_bits_mask, lo_bits_mask,
1287 neg_mask, valid_path_mask)) {
1288 DEBUG_PRINTF("check multi-path shufti-16x8 successfully\n");
1289 return 1;
1290 } else {
1291 return 0;
1292 }
1293}
1294
1295static never_inline
1296int roseCheckMultipathShufti32x8(const struct hs_scratch *scratch,
1297 const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 *ri,
1298 u64a end) {
1299 const struct core_info *ci = &scratch->core_info;
1300 s32 checkOffset = ri->base_offset;
1301 const s64a base_offset = (s64a)end - ci->buf_offset;
1302 s64a offset = base_offset + checkOffset;
1303 DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
1304 DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
1305
1306 assert(ri->last_start <= 0);
1307 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
1308 if ((u64a)(0 - ri->last_start) > end) {
1309 DEBUG_PRINTF("too early, fail\n");
1310 return 0;
1311 }
1312 }
1313
1314 u32 valid_data_mask;
1315 m128 data_m128 = getData128(ci, offset, &valid_data_mask);
1316 m256 data_double = set2x128(data_m128);
1317 m256 data_select_mask = loadu256(ri->data_select_mask);
1318
1319 u32 valid_path_mask = 0;
1320 m256 expand_valid;
1321 if (unlikely(!(valid_data_mask & 1))) {
1322 DEBUG_PRINTF("lose part of backward data\n");
1323 DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask);
1324
1325 u64a expand_mask = 0x8080808080808080ULL;
1326 u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask);
1327 u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
1328 DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
1329 DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
1330 expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
1331 valid_lo);
1332 valid_path_mask = ~movemask256(pshufb_m256(expand_valid,
1333 data_select_mask));
1334 }
1335
1336 m256 data = pshufb_m256(data_double, data_select_mask);
1337 m256 hi_mask = loadu2x128(ri->hi_mask);
1338 m256 lo_mask = loadu2x128(ri->lo_mask);
1339 m256 bucket_select_mask = loadu256(ri->bucket_select_mask);
1340
1341 u32 hi_bits_mask = ri->hi_bits_mask;
1342 u32 lo_bits_mask = ri->lo_bits_mask;
1343 u32 neg_mask = ri->neg_mask;
1344
1345 if (validateMultipathShuftiMask32x8(data, hi_mask, lo_mask,
1346 bucket_select_mask,
1347 hi_bits_mask, lo_bits_mask,
1348 neg_mask, valid_path_mask)) {
1349 DEBUG_PRINTF("check multi-path shufti-32x8 successfully\n");
1350 return 1;
1351 } else {
1352 return 0;
1353 }
1354}
1355
1356static never_inline
1357int roseCheckMultipathShufti32x16(const struct hs_scratch *scratch,
1358 const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 *ri,
1359 u64a end) {
1360 const struct core_info *ci = &scratch->core_info;
1361 const s64a base_offset = (s64a)end - ci->buf_offset;
1362 s32 checkOffset = ri->base_offset;
1363 s64a offset = base_offset + checkOffset;
1364 DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
1365 DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
1366
1367 assert(ri->last_start <= 0);
1368 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
1369 if ((u64a)(0 - ri->last_start) > end) {
1370 DEBUG_PRINTF("too early, fail\n");
1371 return 0;
1372 }
1373 }
1374
1375 u32 valid_data_mask;
1376 m128 data_m128 = getData128(ci, offset, &valid_data_mask);
1377 m256 data_double = set2x128(data_m128);
1378 m256 data_select_mask = loadu256(ri->data_select_mask);
1379
1380 u32 valid_path_mask = 0;
1381 m256 expand_valid;
1382 if (unlikely(!(valid_data_mask & 1))) {
1383 DEBUG_PRINTF("lose part of backward data\n");
1384 DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask);
1385
1386 u64a expand_mask = 0x8080808080808080ULL;
1387 u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask);
1388 u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
1389 DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
1390 DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
1391 expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
1392 valid_lo);
1393 valid_path_mask = ~movemask256(pshufb_m256(expand_valid,
1394 data_select_mask));
1395 }
1396
1397 m256 data = pshufb_m256(data_double, data_select_mask);
1398
1399 m256 hi_mask_1 = loadu2x128(ri->hi_mask);
1400 m256 hi_mask_2 = loadu2x128(ri->hi_mask + 16);
1401 m256 lo_mask_1 = loadu2x128(ri->lo_mask);
1402 m256 lo_mask_2 = loadu2x128(ri->lo_mask + 16);
1403
1404 m256 bucket_select_mask_hi = loadu256(ri->bucket_select_mask_hi);
1405 m256 bucket_select_mask_lo = loadu256(ri->bucket_select_mask_lo);
1406
1407 u32 hi_bits_mask = ri->hi_bits_mask;
1408 u32 lo_bits_mask = ri->lo_bits_mask;
1409 u32 neg_mask = ri->neg_mask;
1410
1411 if (validateMultipathShuftiMask32x16(data, hi_mask_1, hi_mask_2,
1412 lo_mask_1, lo_mask_2,
1413 bucket_select_mask_hi,
1414 bucket_select_mask_lo,
1415 hi_bits_mask, lo_bits_mask,
1416 neg_mask, valid_path_mask)) {
1417 DEBUG_PRINTF("check multi-path shufti-32x16 successfully\n");
1418 return 1;
1419 } else {
1420 return 0;
1421 }
1422}
1423
1424static never_inline
1425int roseCheckMultipathShufti64(const struct hs_scratch *scratch,
1426 const struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 *ri,
1427 u64a end) {
1428 const struct core_info *ci = &scratch->core_info;
1429 const s64a base_offset = (s64a)end - ci->buf_offset;
1430 s32 checkOffset = ri->base_offset;
1431 s64a offset = base_offset + checkOffset;
1432 DEBUG_PRINTF("end %lld base_offset %lld\n", end, base_offset);
1433 DEBUG_PRINTF("checkOffset %d offset %lld\n", checkOffset, offset);
1434
1435 if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
1436 if ((u64a)(0 - ri->last_start) > end) {
1437 DEBUG_PRINTF("too early, fail\n");
1438 return 0;
1439 }
1440 }
1441
1442 u32 valid_data_mask;
1443 m128 data_m128 = getData128(ci, offset, &valid_data_mask);
1444 m256 data_m256 = set2x128(data_m128);
1445 m256 data_select_mask_1 = loadu256(ri->data_select_mask);
1446 m256 data_select_mask_2 = loadu256(ri->data_select_mask + 32);
1447
1448 u64a valid_path_mask = 0;
1449 m256 expand_valid;
1450 if (unlikely(!(valid_data_mask & 1))) {
1451 DEBUG_PRINTF("lose part of backward data\n");
1452 DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask);
1453
1454 u64a expand_mask = 0x8080808080808080ULL;
1455 u64a valid_lo = expand64(valid_data_mask & 0xff, expand_mask);
1456 u64a valid_hi = expand64(valid_data_mask >> 8, expand_mask);
1457 DEBUG_PRINTF("expand_hi %llx\n", valid_hi);
1458 DEBUG_PRINTF("expand_lo %llx\n", valid_lo);
1459 expand_valid = set64x4(valid_hi, valid_lo, valid_hi,
1460 valid_lo);
1461 u32 valid_path_1 = movemask256(pshufb_m256(expand_valid,
1462 data_select_mask_1));
1463 u32 valid_path_2 = movemask256(pshufb_m256(expand_valid,
1464 data_select_mask_2));
1465 valid_path_mask = ~((u64a)valid_path_1 | (u64a)valid_path_2 << 32);
1466 }
1467
1468 m256 data_1 = pshufb_m256(data_m256, data_select_mask_1);
1469 m256 data_2 = pshufb_m256(data_m256, data_select_mask_2);
1470
1471 m256 hi_mask = loadu2x128(ri->hi_mask);
1472 m256 lo_mask = loadu2x128(ri->lo_mask);
1473
1474 m256 bucket_select_mask_1 = loadu256(ri->bucket_select_mask);
1475 m256 bucket_select_mask_2 = loadu256(ri->bucket_select_mask + 32);
1476
1477 u64a hi_bits_mask = ri->hi_bits_mask;
1478 u64a lo_bits_mask = ri->lo_bits_mask;
1479 u64a neg_mask = ri->neg_mask;
1480
1481 if (validateMultipathShuftiMask64(data_1, data_2, hi_mask, lo_mask,
1482 bucket_select_mask_1,
1483 bucket_select_mask_2, hi_bits_mask,
1484 lo_bits_mask, neg_mask,
1485 valid_path_mask)) {
1486 DEBUG_PRINTF("check multi-path shufti-64 successfully\n");
1487 return 1;
1488 } else {
1489 return 0;
1490 }
1491}
1492
1493static rose_inline
1494int roseNfaEarliestSom(u64a start, UNUSED u64a end, UNUSED ReportID id,
1495 void *context) {
1496 assert(context);
1497 u64a *som = context;
1498 *som = MIN(*som, start);
1499 return MO_CONTINUE_MATCHING;
1500}
1501
1502static rose_inline
1503u64a roseGetHaigSom(const struct RoseEngine *t, struct hs_scratch *scratch,
1504 const u32 qi, UNUSED const u32 leftfixLag) {
1505 u32 ri = queueToLeftIndex(t, qi);
1506
1507 UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri;
1508
1509 DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n",
1510 left->transient ? "transient" : "active", ri, qi,
1511 leftfixLag, left->maxLag);
1512
1513 assert(leftfixLag <= left->maxLag);
1514
1515 struct mq *q = scratch->queues + qi;
1516
1517 u64a start = ~0ULL;
1518
1519 /* switch the callback + context for a fun one */
1520 q->cb = roseNfaEarliestSom;
1521 q->context = &start;
1522
1523 nfaReportCurrentMatches(q->nfa, q);
1524
1525 /* restore the old callback + context */
1526 q->cb = roseNfaAdaptor;
1527 q->context = NULL;
1528 DEBUG_PRINTF("earliest som is %llu\n", start);
1529 return start;
1530}
1531
1532static rose_inline
1533char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) {
1534 DEBUG_PRINTF("check offset=%llu against bounds [%llu,%llu]\n", end,
1535 min_bound, max_bound);
1536 assert(min_bound <= max_bound);
1537 return end >= min_bound && end <= max_bound;
1538}
1539
1540static rose_inline
1541hwlmcb_rv_t roseEnginesEod(const struct RoseEngine *rose,
1542 struct hs_scratch *scratch, u64a offset,
1543 u32 iter_offset) {
1544 const char is_streaming = rose->mode != HS_MODE_BLOCK;
1545
1546 /* data, len is used for state decompress, should be full available data */
1547 u8 key = 0;
1548 if (is_streaming) {
1549 const u8 *eod_data = scratch->core_info.hbuf;
1550 size_t eod_len = scratch->core_info.hlen;
1551 key = eod_len ? eod_data[eod_len - 1] : 0;
1552 }
1553
1554 const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state);
1555 const u32 aaCount = rose->activeArrayCount;
1556 const u32 qCount = rose->queueCount;
1557 struct fatbit *aqa = scratch->aqa;
1558
1559 const struct mmbit_sparse_iter *it = getByOffset(rose, iter_offset);
1560 assert(ISALIGNED(it));
1561
1562 u32 idx = 0;
1563 struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
1564
1565 for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state);
1566 qi != MMB_INVALID;
1567 qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) {
1568 DEBUG_PRINTF("checking nfa %u\n", qi);
1569 struct mq *q = scratch->queues + qi;
1570 if (!fatbit_set(aqa, qCount, qi)) {
1571 initQueue(q, qi, rose, scratch);
1572 }
1573
1574 assert(q->nfa == getNfaByQueue(rose, qi));
1575 assert(nfaAcceptsEod(q->nfa));
1576
1577 if (is_streaming) {
1578 // Decompress stream state.
1579 nfaExpandState(q->nfa, q->state, q->streamState, offset, key);
1580 }
1581
1582 if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset,
1583 roseReportAdaptor,
1584 scratch) == MO_HALT_MATCHING) {
1585 DEBUG_PRINTF("user instructed us to stop\n");
1586 return HWLM_TERMINATE_MATCHING;
1587 }
1588 }
1589
1590 return HWLM_CONTINUE_MATCHING;
1591}
1592
1593static rose_inline
1594hwlmcb_rv_t roseSuffixesEod(const struct RoseEngine *rose,
1595 struct hs_scratch *scratch, u64a offset) {
1596 const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state);
1597 const u32 aaCount = rose->activeArrayCount;
1598
1599 for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
1600 qi = mmbit_iterate(aa, aaCount, qi)) {
1601 DEBUG_PRINTF("checking nfa %u\n", qi);
1602 struct mq *q = scratch->queues + qi;
1603 assert(q->nfa == getNfaByQueue(rose, qi));
1604 assert(nfaAcceptsEod(q->nfa));
1605
1606 /* We have just been triggered. */
1607 assert(fatbit_isset(scratch->aqa, rose->queueCount, qi));
1608
1609 pushQueueNoMerge(q, MQE_END, scratch->core_info.len);
1610 q->context = NULL;
1611
1612 /* rose exec is used as we don't want to / can't raise matches in the
1613 * history buffer. */
1614 if (!nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX)) {
1615 DEBUG_PRINTF("nfa is dead\n");
1616 continue;
1617 }
1618 if (nfaCheckFinalState(q->nfa, q->state, q->streamState, offset,
1619 roseReportAdaptor,
1620 scratch) == MO_HALT_MATCHING) {
1621 DEBUG_PRINTF("user instructed us to stop\n");
1622 return HWLM_TERMINATE_MATCHING;
1623 }
1624 }
1625 return HWLM_CONTINUE_MATCHING;
1626}
1627
1628static rose_inline
1629hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose,
1630 struct hs_scratch *scratch, u64a offset) {
1631 assert(rose->ematcherOffset);
1632 assert(rose->ematcherRegionSize);
1633
1634 // Clear role state and active engines, since we have already handled all
1635 // outstanding work there.
1636 DEBUG_PRINTF("clear role state and active leaf array\n");
1637 char *state = scratch->core_info.state;
1638 mmbit_clear(getRoleState(state), rose->rolesWithStateCount);
1639 mmbit_clear(getActiveLeafArray(rose, state), rose->activeArrayCount);
1640
1641 const char is_streaming = rose->mode != HS_MODE_BLOCK;
1642
1643 size_t eod_len;
1644 const u8 *eod_data;
1645 if (!is_streaming) { /* Block */
1646 eod_data = scratch->core_info.buf;
1647 eod_len = scratch->core_info.len;
1648 } else { /* Streaming */
1649 eod_len = scratch->core_info.hlen;
1650 eod_data = scratch->core_info.hbuf;
1651 }
1652
1653 assert(eod_data);
1654 assert(eod_len);
1655
1656 DEBUG_PRINTF("%zu bytes of eod data to scan at offset %llu\n", eod_len,
1657 offset);
1658
1659 // If we don't have enough bytes to produce a match from an EOD table scan,
1660 // there's no point scanning.
1661 if (eod_len < rose->eodmatcherMinWidth) {
1662 DEBUG_PRINTF("too short for min width %u\n", rose->eodmatcherMinWidth);
1663 return HWLM_CONTINUE_MATCHING;
1664 }
1665
1666 // Ensure that we only need scan the last N bytes, where N is the length of
1667 // the eod-anchored matcher region.
1668 size_t adj = eod_len - MIN(eod_len, rose->ematcherRegionSize);
1669
1670 const struct HWLM *etable = getByOffset(rose, rose->ematcherOffset);
1671 hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch,
1672 scratch->tctxt.groups);
1673
1674 // We may need to fire delayed matches.
1675 if (cleanUpDelayed(rose, scratch, 0, offset) == HWLM_TERMINATE_MATCHING) {
1676 DEBUG_PRINTF("user instructed us to stop\n");
1677 return HWLM_TERMINATE_MATCHING;
1678 }
1679
1680 roseFlushLastByteHistory(rose, scratch, offset);
1681 return HWLM_CONTINUE_MATCHING;
1682}
1683
1684static rose_inline
1685int roseCheckLongLiteral(const struct RoseEngine *t,
1686 const struct hs_scratch *scratch, u64a end,
1687 u32 lit_offset, u32 lit_length, char nocase) {
1688 const struct core_info *ci = &scratch->core_info;
1689 const u8 *lit = getByOffset(t, lit_offset);
1690
1691 DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length);
1692 DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset);
1693
1694 if (end < lit_length) {
1695 DEBUG_PRINTF("too short!\n");
1696 return 0;
1697 }
1698
1699 // If any portion of the literal matched in the current buffer, check it.
1700 if (end > ci->buf_offset) {
1701 u32 scan_len = MIN(end - ci->buf_offset, lit_length);
1702 u64a scan_start = end - ci->buf_offset - scan_len;
1703 DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len,
1704 scan_start, end);
1705 if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len,
1706 scan_len, nocase)) {
1707 DEBUG_PRINTF("cmp of suffix failed\n");
1708 return 0;
1709 }
1710 }
1711
1712 // If the entirety of the literal was in the current block, we are done.
1713 if (end - lit_length >= ci->buf_offset) {
1714 DEBUG_PRINTF("literal confirmed in current block\n");
1715 return 1;
1716 }
1717
1718 // We still have a prefix which we must test against the buffer prepared by
1719 // the long literal table. This is only done in streaming mode.
1720
1721 assert(t->mode != HS_MODE_BLOCK);
1722
1723 const u8 *ll_buf;
1724 size_t ll_len;
1725 if (nocase) {
1726 ll_buf = scratch->tctxt.ll_buf_nocase;
1727 ll_len = scratch->tctxt.ll_len_nocase;
1728 } else {
1729 ll_buf = scratch->tctxt.ll_buf;
1730 ll_len = scratch->tctxt.ll_len;
1731 }
1732
1733 assert(ll_buf);
1734
1735 u64a lit_start_offset = end - lit_length;
1736 u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset);
1737 u32 hist_rewind = ci->buf_offset - lit_start_offset;
1738 DEBUG_PRINTF("ll_len=%zu, hist_rewind=%u\n", ll_len, hist_rewind);
1739 if (hist_rewind > ll_len) {
1740 DEBUG_PRINTF("not enough history\n");
1741 return 0;
1742 }
1743
1744 DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n",
1745 prefix_len, ll_len, hist_rewind);
1746 assert(hist_rewind <= ll_len);
1747 if (cmpForward(ll_buf + ll_len - hist_rewind, lit, prefix_len, nocase)) {
1748 DEBUG_PRINTF("cmp of prefix failed\n");
1749 return 0;
1750 }
1751
1752 DEBUG_PRINTF("cmp succeeded\n");
1753 return 1;
1754}
1755
1756static rose_inline
1757int roseCheckMediumLiteral(const struct RoseEngine *t,
1758 const struct hs_scratch *scratch, u64a end,
1759 u32 lit_offset, u32 lit_length, char nocase) {
1760 const struct core_info *ci = &scratch->core_info;
1761 const u8 *lit = getByOffset(t, lit_offset);
1762
1763 DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length);
1764 DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset);
1765
1766 if (end < lit_length) {
1767 DEBUG_PRINTF("too short!\n");
1768 return 0;
1769 }
1770
1771 // If any portion of the literal matched in the current buffer, check it.
1772 if (end > ci->buf_offset) {
1773 u32 scan_len = MIN(end - ci->buf_offset, lit_length);
1774 u64a scan_start = end - ci->buf_offset - scan_len;
1775 DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len,
1776 scan_start, end);
1777 if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len,
1778 scan_len, nocase)) {
1779 DEBUG_PRINTF("cmp of suffix failed\n");
1780 return 0;
1781 }
1782 }
1783
1784 // If the entirety of the literal was in the current block, we are done.
1785 if (end - lit_length >= ci->buf_offset) {
1786 DEBUG_PRINTF("literal confirmed in current block\n");
1787 return 1;
1788 }
1789
1790 // We still have a prefix which we must test against the history buffer.
1791 assert(t->mode != HS_MODE_BLOCK);
1792
1793 u64a lit_start_offset = end - lit_length;
1794 u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset);
1795 u32 hist_rewind = ci->buf_offset - lit_start_offset;
1796 DEBUG_PRINTF("hlen=%zu, hist_rewind=%u\n", ci->hlen, hist_rewind);
1797
1798 // History length check required for confirm in the EOD and delayed
1799 // rebuild paths.
1800 if (hist_rewind > ci->hlen) {
1801 DEBUG_PRINTF("not enough history\n");
1802 return 0;
1803 }
1804
1805 DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n",
1806 prefix_len, ci->hlen, hist_rewind);
1807 assert(hist_rewind <= ci->hlen);
1808 if (cmpForward(ci->hbuf + ci->hlen - hist_rewind, lit, prefix_len,
1809 nocase)) {
1810 DEBUG_PRINTF("cmp of prefix failed\n");
1811 return 0;
1812 }
1813
1814 DEBUG_PRINTF("cmp succeeded\n");
1815 return 1;
1816}
1817
1818static
1819void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
1820 const char from_mpv) {
1821 if (from_mpv) {
1822 updateMinMatchOffsetFromMpv(tctxt, offset);
1823 } else {
1824 updateMinMatchOffset(tctxt, offset);
1825 }
1826}
1827
1828static rose_inline
1829hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t,
1830 struct hs_scratch *scratch) {
1831 u8 *cvec = (u8 *)scratch->core_info.combVector;
1832 if (!mmbit_any(cvec, t->ckeyCount)) {
1833 return HWLM_CONTINUE_MATCHING;
1834 }
1835 u64a end = scratch->tctxt.lastCombMatchOffset;
1836 for (u32 i = mmbit_iterate(cvec, t->ckeyCount, MMB_INVALID);
1837 i != MMB_INVALID; i = mmbit_iterate(cvec, t->ckeyCount, i)) {
1838 const struct CombInfo *combInfoMap = (const struct CombInfo *)
1839 ((const char *)t + t->combInfoMapOffset);
1840 const struct CombInfo *ci = combInfoMap + i;
1841 if ((ci->min_offset != 0) && (end < ci->min_offset)) {
1842 DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset);
1843 continue;
1844 }
1845 if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) {
1846 DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset);
1847 continue;
1848 }
1849
1850 DEBUG_PRINTF("check ekey %u\n", ci->ekey);
1851 if (ci->ekey != INVALID_EKEY) {
1852 assert(ci->ekey < t->ekeyCount);
1853 const char *evec = scratch->core_info.exhaustionVector;
1854 if (isExhausted(t, evec, ci->ekey)) {
1855 DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
1856 ci->ekey);
1857 continue;
1858 }
1859 }
1860
1861 DEBUG_PRINTF("check ckey %u\n", i);
1862 char *lvec = scratch->core_info.logicalVector;
1863 if (!isLogicalCombination(t, lvec, ci->start, ci->result)) {
1864 DEBUG_PRINTF("Logical Combination Failed!\n");
1865 continue;
1866 }
1867
1868 DEBUG_PRINTF("Logical Combination Passed!\n");
1869 if (roseReport(t, scratch, end, ci->id, 0,
1870 ci->ekey) == HWLM_TERMINATE_MATCHING) {
1871 return HWLM_TERMINATE_MATCHING;
1872 }
1873 }
1874 clearCvec(t, (char *)cvec);
1875 return HWLM_CONTINUE_MATCHING;
1876}
1877
1878#if !defined(_WIN32)
1879#define PROGRAM_CASE(name) \
1880 case ROSE_INSTR_##name: { \
1881 LABEL_ROSE_INSTR_##name: \
1882 DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \
1883 programOffset + (u32)(pc - pc_base)); \
1884 const struct ROSE_STRUCT_##name *ri = \
1885 (const struct ROSE_STRUCT_##name *)pc;
1886
1887#define PROGRAM_NEXT_INSTRUCTION \
1888 pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
1889 goto *(next_instr[*(const u8 *)pc]); \
1890 }
1891
1892#define PROGRAM_NEXT_INSTRUCTION_JUMP \
1893 goto *(next_instr[*(const u8 *)pc]);
1894#else
1895#define PROGRAM_CASE(name) \
1896 case ROSE_INSTR_##name: { \
1897 DEBUG_PRINTF("instruction: " #name " (pc=%u)\n", \
1898 programOffset + (u32)(pc - pc_base)); \
1899 const struct ROSE_STRUCT_##name *ri = \
1900 (const struct ROSE_STRUCT_##name *)pc;
1901
1902#define PROGRAM_NEXT_INSTRUCTION \
1903 pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
1904 break; \
1905 }
1906
1907#define PROGRAM_NEXT_INSTRUCTION_JUMP continue;
1908#endif
1909
1910hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
1911 struct hs_scratch *scratch, u32 programOffset,
1912 u64a som, u64a end, u8 prog_flags) {
1913 DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset,
1914 som, end, prog_flags);
1915
1916 assert(programOffset != ROSE_INVALID_PROG_OFFSET);
1917 assert(programOffset >= sizeof(struct RoseEngine));
1918 assert(programOffset < t->size);
1919
1920 const char in_anchored = prog_flags & ROSE_PROG_FLAG_IN_ANCHORED;
1921 const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP;
1922 const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
1923 const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP;
1924
1925 const char *pc_base = getByOffset(t, programOffset);
1926 const char *pc = pc_base;
1927
1928 // Local sparse iterator state for programs that use the SPARSE_ITER_BEGIN
1929 // and SPARSE_ITER_NEXT instructions.
1930 struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
1931
1932 // If this program has an effect, work_done will be set to one (which may
1933 // allow the program to squash groups).
1934 int work_done = 0;
1935
1936 struct RoseContext *tctxt = &scratch->tctxt;
1937
1938 assert(*(const u8 *)pc != ROSE_INSTR_END);
1939
1940#if !defined(_WIN32)
1941 static const void *next_instr[] = {
1942 &&LABEL_ROSE_INSTR_END, //!< End of program.
1943 &&LABEL_ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher.
1944 &&LABEL_ROSE_INSTR_CHECK_LIT_EARLY, //!< Skip matches before floating min offset.
1945 &&LABEL_ROSE_INSTR_CHECK_GROUPS, //!< Check that literal groups are on.
1946 &&LABEL_ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD.
1947 &&LABEL_ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0.
1948 &&LABEL_ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
1949 &&LABEL_ROSE_INSTR_CHECK_SINGLE_LOOKAROUND, //!< Single lookaround check.
1950 &&LABEL_ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check.
1951 &&LABEL_ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check.
1952 &&LABEL_ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check.
1953 &&LABEL_ROSE_INSTR_CHECK_BYTE, //!< Single Byte check.
1954 &&LABEL_ROSE_INSTR_CHECK_SHUFTI_16x8, //!< Check 16-byte data by 8-bucket shufti.
1955 &&LABEL_ROSE_INSTR_CHECK_SHUFTI_32x8, //!< Check 32-byte data by 8-bucket shufti.
1956 &&LABEL_ROSE_INSTR_CHECK_SHUFTI_16x16, //!< Check 16-byte data by 16-bucket shufti.
1957 &&LABEL_ROSE_INSTR_CHECK_SHUFTI_32x16, //!< Check 32-byte data by 16-bucket shufti.
1958 &&LABEL_ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state.
1959 &&LABEL_ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state.
1960 &&LABEL_ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches.
1961 &&LABEL_ROSE_INSTR_DUMMY_NOP, //!< NOP. Should not exist in build programs.
1962 &&LABEL_ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches.
1963 &&LABEL_ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV.
1964 &&LABEL_ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM.
1965 &&LABEL_ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine.
1966 &&LABEL_ROSE_INSTR_SOM_FROM_REPORT, //!< Acquire SOM from a som_operation.
1967 &&LABEL_ROSE_INSTR_SOM_ZERO, //!< Set SOM to zero.
1968 &&LABEL_ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine.
1969 &&LABEL_ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine.
1970 &&LABEL_ROSE_INSTR_DEDUPE, //!< Run deduplication for report.
1971 &&LABEL_ROSE_INSTR_DEDUPE_SOM, //!< Run deduplication for SOM report.
1972 &&LABEL_ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV).
1973 &&LABEL_ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only.
1974 &&LABEL_ROSE_INSTR_REPORT_SOM_AWARE, //!< Manipulate SOM from SOM-aware source.
1975 &&LABEL_ROSE_INSTR_REPORT,
1976 &&LABEL_ROSE_INSTR_REPORT_EXHAUST,
1977 &&LABEL_ROSE_INSTR_REPORT_SOM,
1978 &&LABEL_ROSE_INSTR_REPORT_SOM_EXHAUST,
1979 &&LABEL_ROSE_INSTR_DEDUPE_AND_REPORT,
1980 &&LABEL_ROSE_INSTR_FINAL_REPORT,
1981 &&LABEL_ROSE_INSTR_CHECK_EXHAUSTED, //!< Check if an ekey has already been set.
1982 &&LABEL_ROSE_INSTR_CHECK_MIN_LENGTH, //!< Check (EOM - SOM) against min length.
1983 &&LABEL_ROSE_INSTR_SET_STATE, //!< Switch a state index on.
1984 &&LABEL_ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits.
1985 &&LABEL_ROSE_INSTR_SQUASH_GROUPS, //!< Conditionally turn off some groups.
1986 &&LABEL_ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit.
1987 &&LABEL_ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states.
1988 &&LABEL_ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states.
1989 &&LABEL_ROSE_INSTR_SPARSE_ITER_ANY, //!< Test for any bit in the sparse iterator.
1990 &&LABEL_ROSE_INSTR_ENGINES_EOD,
1991 &&LABEL_ROSE_INSTR_SUFFIXES_EOD,
1992 &&LABEL_ROSE_INSTR_MATCHER_EOD,
1993 &&LABEL_ROSE_INSTR_CHECK_LONG_LIT,
1994 &&LABEL_ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
1995 &&LABEL_ROSE_INSTR_CHECK_MED_LIT,
1996 &&LABEL_ROSE_INSTR_CHECK_MED_LIT_NOCASE,
1997 &&LABEL_ROSE_INSTR_CLEAR_WORK_DONE,
1998 &&LABEL_ROSE_INSTR_MULTIPATH_LOOKAROUND,
1999 &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8,
2000 &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8,
2001 &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16,
2002 &&LABEL_ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64,
2003 &&LABEL_ROSE_INSTR_INCLUDED_JUMP,
2004 &&LABEL_ROSE_INSTR_SET_LOGICAL,
2005 &&LABEL_ROSE_INSTR_SET_COMBINATION,
2006 &&LABEL_ROSE_INSTR_FLUSH_COMBINATION,
2007 &&LABEL_ROSE_INSTR_SET_EXHAUST
2008 };
2009#endif
2010
2011 for (;;) {
2012 assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN));
2013 assert(pc >= pc_base);
2014 assert((size_t)(pc - pc_base) < t->size);
2015 const u8 code = *(const u8 *)pc;
2016 assert(code <= LAST_ROSE_INSTRUCTION);
2017
2018 switch ((enum RoseInstructionCode)code) {
2019 PROGRAM_CASE(END) {
2020 DEBUG_PRINTF("finished\n");
2021 return HWLM_CONTINUE_MATCHING;
2022 }
2023 PROGRAM_NEXT_INSTRUCTION
2024
2025 PROGRAM_CASE(ANCHORED_DELAY) {
2026 if (in_anchored && end > t->floatingMinLiteralMatchOffset) {
2027 DEBUG_PRINTF("delay until playback\n");
2028 tctxt->groups |= ri->groups;
2029 work_done = 1;
2030 recordAnchoredLiteralMatch(t, scratch, ri->anch_id, end);
2031
2032 assert(ri->done_jump); // must progress
2033 pc += ri->done_jump;
2034 PROGRAM_NEXT_INSTRUCTION_JUMP
2035 }
2036 }
2037 PROGRAM_NEXT_INSTRUCTION
2038
2039 PROGRAM_CASE(CHECK_LIT_EARLY) {
2040 if (end < ri->min_offset) {
2041 DEBUG_PRINTF("halt: before min_offset=%u\n",
2042 ri->min_offset);
2043 assert(ri->fail_jump); // must progress
2044 pc += ri->fail_jump;
2045 PROGRAM_NEXT_INSTRUCTION_JUMP
2046 }
2047 }
2048 PROGRAM_NEXT_INSTRUCTION
2049
2050 PROGRAM_CASE(CHECK_GROUPS) {
2051 DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n",
2052 tctxt->groups, ri->groups);
2053 if (!(ri->groups & tctxt->groups)) {
2054 DEBUG_PRINTF("halt: no groups are set\n");
2055 return HWLM_CONTINUE_MATCHING;
2056 }
2057 }
2058 PROGRAM_NEXT_INSTRUCTION
2059
2060 PROGRAM_CASE(CHECK_ONLY_EOD) {
2061 struct core_info *ci = &scratch->core_info;
2062 if (end != ci->buf_offset + ci->len) {
2063 DEBUG_PRINTF("should only match at end of data\n");
2064 assert(ri->fail_jump); // must progress
2065 pc += ri->fail_jump;
2066 PROGRAM_NEXT_INSTRUCTION_JUMP
2067 }
2068 }
2069 PROGRAM_NEXT_INSTRUCTION
2070
2071 PROGRAM_CASE(CHECK_BOUNDS) {
2072 if (!roseCheckBounds(end, ri->min_bound, ri->max_bound)) {
2073 DEBUG_PRINTF("failed bounds check\n");
2074 assert(ri->fail_jump); // must progress
2075 pc += ri->fail_jump;
2076 PROGRAM_NEXT_INSTRUCTION_JUMP
2077 }
2078 }
2079 PROGRAM_NEXT_INSTRUCTION
2080
2081 PROGRAM_CASE(CHECK_NOT_HANDLED) {
2082 struct fatbit *handled = scratch->handled_roles;
2083 if (fatbit_set(handled, t->handledKeyCount, ri->key)) {
2084 DEBUG_PRINTF("key %u already set\n", ri->key);
2085 assert(ri->fail_jump); // must progress
2086 pc += ri->fail_jump;
2087 PROGRAM_NEXT_INSTRUCTION_JUMP
2088 }
2089 }
2090 PROGRAM_NEXT_INSTRUCTION
2091
2092 PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) {
2093 if (!roseCheckSingleLookaround(t, scratch, ri->offset,
2094 ri->reach_index, end)) {
2095 DEBUG_PRINTF("failed lookaround check\n");
2096 assert(ri->fail_jump); // must progress
2097 pc += ri->fail_jump;
2098 PROGRAM_NEXT_INSTRUCTION_JUMP
2099 }
2100 }
2101 PROGRAM_NEXT_INSTRUCTION
2102
2103 PROGRAM_CASE(CHECK_LOOKAROUND) {
2104 if (!roseCheckLookaround(t, scratch, ri->look_index,
2105 ri->reach_index, ri->count, end)) {
2106 DEBUG_PRINTF("failed lookaround check\n");
2107 assert(ri->fail_jump); // must progress
2108 pc += ri->fail_jump;
2109 PROGRAM_NEXT_INSTRUCTION_JUMP
2110 }
2111 }
2112 PROGRAM_NEXT_INSTRUCTION
2113
2114 PROGRAM_CASE(CHECK_MASK) {
2115 struct core_info *ci = &scratch->core_info;
2116 if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask,
2117 ri->neg_mask, ri->offset, end)) {
2118 DEBUG_PRINTF("failed mask check\n");
2119 assert(ri->fail_jump); // must progress
2120 pc += ri->fail_jump;
2121 PROGRAM_NEXT_INSTRUCTION_JUMP
2122 }
2123 }
2124 PROGRAM_NEXT_INSTRUCTION
2125
2126 PROGRAM_CASE(CHECK_MASK_32) {
2127 struct core_info *ci = &scratch->core_info;
2128 if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask,
2129 ri->neg_mask, ri->offset, end)) {
2130 assert(ri->fail_jump);
2131 pc += ri->fail_jump;
2132 PROGRAM_NEXT_INSTRUCTION_JUMP
2133 }
2134 }
2135 PROGRAM_NEXT_INSTRUCTION
2136
2137 PROGRAM_CASE(CHECK_BYTE) {
2138 const struct core_info *ci = &scratch->core_info;
2139 if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask,
2140 ri->negation, ri->offset, end)) {
2141 DEBUG_PRINTF("failed byte check\n");
2142 assert(ri->fail_jump); // must progress
2143 pc += ri->fail_jump;
2144 PROGRAM_NEXT_INSTRUCTION_JUMP
2145 }
2146 }
2147 PROGRAM_NEXT_INSTRUCTION
2148
2149 PROGRAM_CASE(CHECK_SHUFTI_16x8) {
2150 const struct core_info *ci = &scratch->core_info;
2151 if (!roseCheckShufti16x8(ci, ri->nib_mask,
2152 ri->bucket_select_mask,
2153 ri->neg_mask, ri->offset, end)) {
2154 assert(ri->fail_jump);
2155 pc += ri-> fail_jump;
2156 PROGRAM_NEXT_INSTRUCTION_JUMP
2157 }
2158 }
2159 PROGRAM_NEXT_INSTRUCTION
2160
2161 PROGRAM_CASE(CHECK_SHUFTI_32x8) {
2162 const struct core_info *ci = &scratch->core_info;
2163 if (!roseCheckShufti32x8(ci, ri->hi_mask, ri->lo_mask,
2164 ri->bucket_select_mask,
2165 ri->neg_mask, ri->offset, end)) {
2166 assert(ri->fail_jump);
2167 pc += ri-> fail_jump;
2168 PROGRAM_NEXT_INSTRUCTION_JUMP
2169 }
2170 }
2171 PROGRAM_NEXT_INSTRUCTION
2172
2173 PROGRAM_CASE(CHECK_SHUFTI_16x16) {
2174 const struct core_info *ci = &scratch->core_info;
2175 if (!roseCheckShufti16x16(ci, ri->hi_mask, ri->lo_mask,
2176 ri->bucket_select_mask,
2177 ri->neg_mask, ri->offset, end)) {
2178 assert(ri->fail_jump);
2179 pc += ri-> fail_jump;
2180 PROGRAM_NEXT_INSTRUCTION_JUMP
2181 }
2182 }
2183 PROGRAM_NEXT_INSTRUCTION
2184
2185 PROGRAM_CASE(CHECK_SHUFTI_32x16) {
2186 const struct core_info *ci = &scratch->core_info;
2187 if (!roseCheckShufti32x16(ci, ri->hi_mask, ri->lo_mask,
2188 ri->bucket_select_mask_hi,
2189 ri->bucket_select_mask_lo,
2190 ri->neg_mask, ri->offset, end)) {
2191 assert(ri->fail_jump);
2192 pc += ri-> fail_jump;
2193 PROGRAM_NEXT_INSTRUCTION_JUMP
2194 }
2195 }
2196 PROGRAM_NEXT_INSTRUCTION
2197
2198 PROGRAM_CASE(CHECK_INFIX) {
2199 if (!roseTestInfix(t, scratch, ri->queue, ri->lag, ri->report,
2200 end)) {
2201 DEBUG_PRINTF("failed infix check\n");
2202 assert(ri->fail_jump); // must progress
2203 pc += ri->fail_jump;
2204 PROGRAM_NEXT_INSTRUCTION_JUMP
2205 }
2206 }
2207 PROGRAM_NEXT_INSTRUCTION
2208
2209 PROGRAM_CASE(CHECK_PREFIX) {
2210 if (!roseTestPrefix(t, scratch, ri->queue, ri->lag, ri->report,
2211 end)) {
2212 DEBUG_PRINTF("failed prefix check\n");
2213 assert(ri->fail_jump); // must progress
2214 pc += ri->fail_jump;
2215 PROGRAM_NEXT_INSTRUCTION_JUMP
2216 }
2217 }
2218 PROGRAM_NEXT_INSTRUCTION
2219
2220 PROGRAM_CASE(PUSH_DELAYED) {
2221 rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end);
2222 }
2223 PROGRAM_NEXT_INSTRUCTION
2224
2225 PROGRAM_CASE(DUMMY_NOP) {
2226 assert(0);
2227 }
2228 PROGRAM_NEXT_INSTRUCTION
2229
2230 PROGRAM_CASE(CATCH_UP) {
2231 if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
2232 return HWLM_TERMINATE_MATCHING;
2233 }
2234 }
2235 PROGRAM_NEXT_INSTRUCTION
2236
2237 PROGRAM_CASE(CATCH_UP_MPV) {
2238 if (from_mpv || skip_mpv_catchup) {
2239 DEBUG_PRINTF("skipping mpv catchup\n");
2240 } else if (roseCatchUpMPV(t,
2241 end - scratch->core_info.buf_offset,
2242 scratch) == HWLM_TERMINATE_MATCHING) {
2243 return HWLM_TERMINATE_MATCHING;
2244 }
2245 }
2246 PROGRAM_NEXT_INSTRUCTION
2247
2248 PROGRAM_CASE(SOM_ADJUST) {
2249 assert(ri->distance <= end);
2250 som = end - ri->distance;
2251 DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, som);
2252 }
2253 PROGRAM_NEXT_INSTRUCTION
2254
2255 PROGRAM_CASE(SOM_LEFTFIX) {
2256 som = roseGetHaigSom(t, scratch, ri->queue, ri->lag);
2257 DEBUG_PRINTF("som from leftfix is %llu\n", som);
2258 }
2259 PROGRAM_NEXT_INSTRUCTION
2260
2261 PROGRAM_CASE(SOM_FROM_REPORT) {
2262 som = handleSomExternal(scratch, &ri->som, end);
2263 DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch,
2264 som);
2265 }
2266 PROGRAM_NEXT_INSTRUCTION
2267
2268 PROGRAM_CASE(SOM_ZERO) {
2269 DEBUG_PRINTF("setting SOM to zero\n");
2270 som = 0;
2271 }
2272 PROGRAM_NEXT_INSTRUCTION
2273
2274 PROGRAM_CASE(TRIGGER_INFIX) {
2275 roseTriggerInfix(t, scratch, som, end, ri->queue, ri->event,
2276 ri->cancel);
2277 work_done = 1;
2278 }
2279 PROGRAM_NEXT_INSTRUCTION
2280
2281 PROGRAM_CASE(TRIGGER_SUFFIX) {
2282 if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som,
2283 end) == HWLM_TERMINATE_MATCHING) {
2284 return HWLM_TERMINATE_MATCHING;
2285 }
2286 work_done = 1;
2287 }
2288 PROGRAM_NEXT_INSTRUCTION
2289
2290 PROGRAM_CASE(DEDUPE) {
2291 updateSeqPoint(tctxt, end, from_mpv);
2292 const char do_som = t->hasSom; // TODO: constant propagate
2293 const char is_external_report = 1;
2294 enum DedupeResult rv =
2295 dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
2296 ri->dkey, ri->offset_adjust,
2297 is_external_report, ri->quash_som, do_som);
2298 switch (rv) {
2299 case DEDUPE_HALT:
2300 return HWLM_TERMINATE_MATCHING;
2301 case DEDUPE_SKIP:
2302 assert(ri->fail_jump); // must progress
2303 pc += ri->fail_jump;
2304 PROGRAM_NEXT_INSTRUCTION_JUMP
2305 case DEDUPE_CONTINUE:
2306 break;
2307 }
2308 }
2309 PROGRAM_NEXT_INSTRUCTION
2310
2311 PROGRAM_CASE(DEDUPE_SOM) {
2312 updateSeqPoint(tctxt, end, from_mpv);
2313 const char is_external_report = 0;
2314 const char do_som = 1;
2315 enum DedupeResult rv =
2316 dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
2317 ri->dkey, ri->offset_adjust,
2318 is_external_report, ri->quash_som, do_som);
2319 switch (rv) {
2320 case DEDUPE_HALT:
2321 return HWLM_TERMINATE_MATCHING;
2322 case DEDUPE_SKIP:
2323 assert(ri->fail_jump); // must progress
2324 pc += ri->fail_jump;
2325 PROGRAM_NEXT_INSTRUCTION_JUMP
2326 case DEDUPE_CONTINUE:
2327 break;
2328 }
2329 }
2330 PROGRAM_NEXT_INSTRUCTION
2331
2332 PROGRAM_CASE(REPORT_CHAIN) {
2333 // Note: sequence points updated inside this function.
2334 if (roseCatchUpAndHandleChainMatch(
2335 t, scratch, ri->event, ri->top_squash_distance, end,
2336 in_catchup) == HWLM_TERMINATE_MATCHING) {
2337 return HWLM_TERMINATE_MATCHING;
2338 }
2339 work_done = 1;
2340 }
2341 PROGRAM_NEXT_INSTRUCTION
2342
2343 PROGRAM_CASE(REPORT_SOM_INT) {
2344 updateSeqPoint(tctxt, end, from_mpv);
2345 roseHandleSom(scratch, &ri->som, end);
2346 work_done = 1;
2347 }
2348 PROGRAM_NEXT_INSTRUCTION
2349
2350 PROGRAM_CASE(REPORT_SOM_AWARE) {
2351 updateSeqPoint(tctxt, end, from_mpv);
2352 roseHandleSomSom(scratch, &ri->som, som, end);
2353 work_done = 1;
2354 }
2355 PROGRAM_NEXT_INSTRUCTION
2356
2357 PROGRAM_CASE(REPORT) {
2358 updateSeqPoint(tctxt, end, from_mpv);
2359 if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
2360 INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
2361 return HWLM_TERMINATE_MATCHING;
2362 }
2363 work_done = 1;
2364 }
2365 PROGRAM_NEXT_INSTRUCTION
2366
2367 PROGRAM_CASE(REPORT_EXHAUST) {
2368 updateSeqPoint(tctxt, end, from_mpv);
2369 if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
2370 ri->ekey) == HWLM_TERMINATE_MATCHING) {
2371 return HWLM_TERMINATE_MATCHING;
2372 }
2373 work_done = 1;
2374 }
2375 PROGRAM_NEXT_INSTRUCTION
2376
2377 PROGRAM_CASE(REPORT_SOM) {
2378 updateSeqPoint(tctxt, end, from_mpv);
2379 if (roseReportSom(t, scratch, som, end, ri->onmatch,
2380 ri->offset_adjust,
2381 INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
2382 return HWLM_TERMINATE_MATCHING;
2383 }
2384 work_done = 1;
2385 }
2386 PROGRAM_NEXT_INSTRUCTION
2387
2388 PROGRAM_CASE(REPORT_SOM_EXHAUST) {
2389 updateSeqPoint(tctxt, end, from_mpv);
2390 if (roseReportSom(t, scratch, som, end, ri->onmatch,
2391 ri->offset_adjust,
2392 ri->ekey) == HWLM_TERMINATE_MATCHING) {
2393 return HWLM_TERMINATE_MATCHING;
2394 }
2395 work_done = 1;
2396 }
2397 PROGRAM_NEXT_INSTRUCTION
2398
2399 PROGRAM_CASE(DEDUPE_AND_REPORT) {
2400 updateSeqPoint(tctxt, end, from_mpv);
2401 const char do_som = t->hasSom; // TODO: constant propagate
2402 const char is_external_report = 1;
2403 enum DedupeResult rv =
2404 dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
2405 ri->dkey, ri->offset_adjust,
2406 is_external_report, ri->quash_som, do_som);
2407 switch (rv) {
2408 case DEDUPE_HALT:
2409 return HWLM_TERMINATE_MATCHING;
2410 case DEDUPE_SKIP:
2411 assert(ri->fail_jump); // must progress
2412 pc += ri->fail_jump;
2413 PROGRAM_NEXT_INSTRUCTION_JUMP
2414 case DEDUPE_CONTINUE:
2415 break;
2416 }
2417
2418 const u32 ekey = INVALID_EKEY;
2419 if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
2420 ekey) == HWLM_TERMINATE_MATCHING) {
2421 return HWLM_TERMINATE_MATCHING;
2422 }
2423 work_done = 1;
2424 }
2425 PROGRAM_NEXT_INSTRUCTION
2426
2427 PROGRAM_CASE(FINAL_REPORT) {
2428 updateSeqPoint(tctxt, end, from_mpv);
2429 if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
2430 INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
2431 return HWLM_TERMINATE_MATCHING;
2432 }
2433 /* One-shot specialisation: this instruction always terminates
2434 * execution of the program. */
2435 return HWLM_CONTINUE_MATCHING;
2436 }
2437 PROGRAM_NEXT_INSTRUCTION
2438
2439 PROGRAM_CASE(CHECK_EXHAUSTED) {
2440 DEBUG_PRINTF("check ekey %u\n", ri->ekey);
2441 assert(ri->ekey != INVALID_EKEY);
2442 assert(ri->ekey < t->ekeyCount);
2443 const char *evec = scratch->core_info.exhaustionVector;
2444 if (isExhausted(t, evec, ri->ekey)) {
2445 DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
2446 ri->ekey);
2447 assert(ri->fail_jump); // must progress
2448 pc += ri->fail_jump;
2449 PROGRAM_NEXT_INSTRUCTION_JUMP
2450 }
2451 }
2452 PROGRAM_NEXT_INSTRUCTION
2453
2454 PROGRAM_CASE(CHECK_MIN_LENGTH) {
2455 DEBUG_PRINTF("check min length %llu (adj %d)\n", ri->min_length,
2456 ri->end_adj);
2457 assert(ri->min_length > 0);
2458 assert(ri->end_adj == 0 || ri->end_adj == -1);
2459 assert(som == HS_OFFSET_PAST_HORIZON || som <= end);
2460 if (som != HS_OFFSET_PAST_HORIZON &&
2461 ((end + ri->end_adj) - som < ri->min_length)) {
2462 DEBUG_PRINTF("failed check, match len %llu\n",
2463 (u64a)((end + ri->end_adj) - som));
2464 assert(ri->fail_jump); // must progress
2465 pc += ri->fail_jump;
2466 PROGRAM_NEXT_INSTRUCTION_JUMP
2467 }
2468 }
2469 PROGRAM_NEXT_INSTRUCTION
2470
2471 PROGRAM_CASE(SET_STATE) {
2472 DEBUG_PRINTF("set state index %u\n", ri->index);
2473 mmbit_set(getRoleState(scratch->core_info.state),
2474 t->rolesWithStateCount, ri->index);
2475 work_done = 1;
2476 }
2477 PROGRAM_NEXT_INSTRUCTION
2478
2479 PROGRAM_CASE(SET_GROUPS) {
2480 tctxt->groups |= ri->groups;
2481 DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups,
2482 tctxt->groups);
2483 }
2484 PROGRAM_NEXT_INSTRUCTION
2485
2486 PROGRAM_CASE(SQUASH_GROUPS) {
2487 assert(popcount64(ri->groups) == 63); // Squash only one group.
2488 if (work_done) {
2489 tctxt->groups &= ri->groups;
2490 DEBUG_PRINTF("squash groups 0x%llx -> 0x%llx\n", ri->groups,
2491 tctxt->groups);
2492 }
2493 }
2494 PROGRAM_NEXT_INSTRUCTION
2495
2496 PROGRAM_CASE(CHECK_STATE) {
2497 DEBUG_PRINTF("check state %u\n", ri->index);
2498 const u8 *roles = getRoleState(scratch->core_info.state);
2499 if (!mmbit_isset(roles, t->rolesWithStateCount, ri->index)) {
2500 DEBUG_PRINTF("state not on\n");
2501 assert(ri->fail_jump); // must progress
2502 pc += ri->fail_jump;
2503 PROGRAM_NEXT_INSTRUCTION_JUMP
2504 }
2505 }
2506 PROGRAM_NEXT_INSTRUCTION
2507
2508 PROGRAM_CASE(SPARSE_ITER_BEGIN) {
2509 DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset);
2510 const struct mmbit_sparse_iter *it =
2511 getByOffset(t, ri->iter_offset);
2512 assert(ISALIGNED(it));
2513
2514 const u8 *roles = getRoleState(scratch->core_info.state);
2515
2516 u32 idx = 0;
2517 u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount,
2518 &idx, it, si_state);
2519 if (i == MMB_INVALID) {
2520 DEBUG_PRINTF("no states in sparse iter are on\n");
2521 assert(ri->fail_jump); // must progress
2522 pc += ri->fail_jump;
2523 PROGRAM_NEXT_INSTRUCTION_JUMP
2524 }
2525
2526 fatbit_clear(scratch->handled_roles);
2527
2528 const u32 *jumps = getByOffset(t, ri->jump_table);
2529 DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx,
2530 jumps[idx]);
2531 pc = pc_base + jumps[idx];
2532 PROGRAM_NEXT_INSTRUCTION_JUMP
2533 }
2534 PROGRAM_NEXT_INSTRUCTION
2535
2536 PROGRAM_CASE(SPARSE_ITER_NEXT) {
2537 DEBUG_PRINTF("iter_offset=%u, state=%u\n", ri->iter_offset,
2538 ri->state);
2539 const struct mmbit_sparse_iter *it =
2540 getByOffset(t, ri->iter_offset);
2541 assert(ISALIGNED(it));
2542
2543 const u8 *roles = getRoleState(scratch->core_info.state);
2544
2545 u32 idx = 0;
2546 u32 i = mmbit_sparse_iter_next(roles, t->rolesWithStateCount,
2547 ri->state, &idx, it, si_state);
2548 if (i == MMB_INVALID) {
2549 DEBUG_PRINTF("no more states in sparse iter are on\n");
2550 assert(ri->fail_jump); // must progress
2551 pc += ri->fail_jump;
2552 PROGRAM_NEXT_INSTRUCTION_JUMP
2553 }
2554
2555 const u32 *jumps = getByOffset(t, ri->jump_table);
2556 DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx,
2557 jumps[idx]);
2558 pc = pc_base + jumps[idx];
2559 PROGRAM_NEXT_INSTRUCTION_JUMP
2560 }
2561 PROGRAM_NEXT_INSTRUCTION
2562
2563 PROGRAM_CASE(SPARSE_ITER_ANY) {
2564 DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset);
2565 const struct mmbit_sparse_iter *it =
2566 getByOffset(t, ri->iter_offset);
2567 assert(ISALIGNED(it));
2568
2569 const u8 *roles = getRoleState(scratch->core_info.state);
2570
2571 u32 idx = 0;
2572 u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount,
2573 &idx, it, si_state);
2574 if (i == MMB_INVALID) {
2575 DEBUG_PRINTF("no states in sparse iter are on\n");
2576 assert(ri->fail_jump); // must progress
2577 pc += ri->fail_jump;
2578 PROGRAM_NEXT_INSTRUCTION_JUMP
2579 }
2580 DEBUG_PRINTF("state %u (idx=%u) is on\n", i, idx);
2581 fatbit_clear(scratch->handled_roles);
2582 }
2583 PROGRAM_NEXT_INSTRUCTION
2584
2585 PROGRAM_CASE(ENGINES_EOD) {
2586 if (roseEnginesEod(t, scratch, end, ri->iter_offset) ==
2587 HWLM_TERMINATE_MATCHING) {
2588 return HWLM_TERMINATE_MATCHING;
2589 }
2590 }
2591 PROGRAM_NEXT_INSTRUCTION
2592
2593 PROGRAM_CASE(SUFFIXES_EOD) {
2594 if (roseSuffixesEod(t, scratch, end) ==
2595 HWLM_TERMINATE_MATCHING) {
2596 return HWLM_TERMINATE_MATCHING;
2597 }
2598 }
2599 PROGRAM_NEXT_INSTRUCTION
2600
2601 PROGRAM_CASE(MATCHER_EOD) {
2602 if (roseMatcherEod(t, scratch, end) ==
2603 HWLM_TERMINATE_MATCHING) {
2604 return HWLM_TERMINATE_MATCHING;
2605 }
2606 }
2607 PROGRAM_NEXT_INSTRUCTION
2608
2609 PROGRAM_CASE(CHECK_LONG_LIT) {
2610 const char nocase = 0;
2611 if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
2612 ri->lit_length, nocase)) {
2613 DEBUG_PRINTF("failed long lit check\n");
2614 assert(ri->fail_jump); // must progress
2615 pc += ri->fail_jump;
2616 PROGRAM_NEXT_INSTRUCTION_JUMP
2617 }
2618 }
2619 PROGRAM_NEXT_INSTRUCTION
2620
2621 PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
2622 const char nocase = 1;
2623 if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
2624 ri->lit_length, nocase)) {
2625 DEBUG_PRINTF("failed nocase long lit check\n");
2626 assert(ri->fail_jump); // must progress
2627 pc += ri->fail_jump;
2628 PROGRAM_NEXT_INSTRUCTION_JUMP
2629 }
2630 }
2631 PROGRAM_NEXT_INSTRUCTION
2632
2633 PROGRAM_CASE(CHECK_MED_LIT) {
2634 const char nocase = 0;
2635 if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
2636 ri->lit_length, nocase)) {
2637 DEBUG_PRINTF("failed lit check\n");
2638 assert(ri->fail_jump); // must progress
2639 pc += ri->fail_jump;
2640 PROGRAM_NEXT_INSTRUCTION_JUMP
2641 }
2642 }
2643 PROGRAM_NEXT_INSTRUCTION
2644
2645 PROGRAM_CASE(CHECK_MED_LIT_NOCASE) {
2646 const char nocase = 1;
2647 if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
2648 ri->lit_length, nocase)) {
2649 DEBUG_PRINTF("failed long lit check\n");
2650 assert(ri->fail_jump); // must progress
2651 pc += ri->fail_jump;
2652 PROGRAM_NEXT_INSTRUCTION_JUMP
2653 }
2654 }
2655 PROGRAM_NEXT_INSTRUCTION
2656
2657 PROGRAM_CASE(CLEAR_WORK_DONE) {
2658 DEBUG_PRINTF("clear work_done flag\n");
2659 work_done = 0;
2660 }
2661 PROGRAM_NEXT_INSTRUCTION
2662
2663 PROGRAM_CASE(MULTIPATH_LOOKAROUND) {
2664 if (!roseMultipathLookaround(t, scratch, ri->look_index,
2665 ri->reach_index, ri->count,
2666 ri->last_start, ri->start_mask,
2667 end)) {
2668 DEBUG_PRINTF("failed multi-path lookaround check\n");
2669 assert(ri->fail_jump); // must progress
2670 pc += ri->fail_jump;
2671 PROGRAM_NEXT_INSTRUCTION_JUMP
2672 }
2673 }
2674 PROGRAM_NEXT_INSTRUCTION
2675
2676 PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_16x8) {
2677 if (!roseCheckMultipathShufti16x8(scratch, ri, end)) {
2678 DEBUG_PRINTF("failed multi-path shufti 16x8 check\n");
2679 assert(ri->fail_jump); // must progress
2680 pc += ri->fail_jump;
2681 PROGRAM_NEXT_INSTRUCTION_JUMP
2682 }
2683 }
2684 PROGRAM_NEXT_INSTRUCTION
2685
2686 PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x8) {
2687 if (!roseCheckMultipathShufti32x8(scratch, ri, end)) {
2688 DEBUG_PRINTF("failed multi-path shufti 32x8 check\n");
2689 assert(ri->fail_jump); // must progress
2690 pc += ri->fail_jump;
2691 PROGRAM_NEXT_INSTRUCTION_JUMP
2692 }
2693 }
2694 PROGRAM_NEXT_INSTRUCTION
2695
2696 PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_32x16) {
2697 if (!roseCheckMultipathShufti32x16(scratch, ri, end)) {
2698 DEBUG_PRINTF("failed multi-path shufti 32x16 check\n");
2699 assert(ri->fail_jump); // must progress
2700 pc += ri->fail_jump;
2701 PROGRAM_NEXT_INSTRUCTION_JUMP
2702 }
2703 }
2704 PROGRAM_NEXT_INSTRUCTION
2705
2706 PROGRAM_CASE(CHECK_MULTIPATH_SHUFTI_64) {
2707 if (!roseCheckMultipathShufti64(scratch, ri, end)) {
2708 DEBUG_PRINTF("failed multi-path shufti 64 check\n");
2709 assert(ri->fail_jump); // must progress
2710 pc += ri->fail_jump;
2711 PROGRAM_NEXT_INSTRUCTION_JUMP
2712 }
2713 }
2714 PROGRAM_NEXT_INSTRUCTION
2715
2716 PROGRAM_CASE(INCLUDED_JUMP) {
2717 if (scratch->fdr_conf) {
2718 // squash the bucket of included literal
2719 u8 shift = scratch->fdr_conf_offset & ~7U;
2720 u64a mask = ((~(u64a)ri->squash) << shift);
2721 *(scratch->fdr_conf) &= mask;
2722
2723 pc = getByOffset(t, ri->child_offset);
2724 pc_base = pc;
2725 programOffset = (const u8 *)pc_base -(const u8 *)t;
2726 DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n",
2727 pc_base, pc, ri->child_offset, ri->squash);
2728 work_done = 0;
2729 PROGRAM_NEXT_INSTRUCTION_JUMP
2730 }
2731 }
2732 PROGRAM_NEXT_INSTRUCTION
2733
2734 PROGRAM_CASE(SET_LOGICAL) {
2735 DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
2736 ri->lkey, ri->offset_adjust);
2737 assert(ri->lkey != INVALID_LKEY);
2738 assert(ri->lkey < t->lkeyCount);
2739 char *lvec = scratch->core_info.logicalVector;
2740 setLogicalVal(t, lvec, ri->lkey, 1);
2741 updateLastCombMatchOffset(tctxt, end + ri->offset_adjust);
2742 }
2743 PROGRAM_NEXT_INSTRUCTION
2744
2745 PROGRAM_CASE(SET_COMBINATION) {
2746 DEBUG_PRINTF("set ckey %u as active\n", ri->ckey);
2747 assert(ri->ckey != INVALID_CKEY);
2748 assert(ri->ckey < t->ckeyCount);
2749 char *cvec = scratch->core_info.combVector;
2750 setCombinationActive(t, cvec, ri->ckey);
2751 }
2752 PROGRAM_NEXT_INSTRUCTION
2753
2754 PROGRAM_CASE(FLUSH_COMBINATION) {
2755 assert(end >= tctxt->lastCombMatchOffset);
2756 if (end > tctxt->lastCombMatchOffset) {
2757 if (flushActiveCombinations(t, scratch)
2758 == HWLM_TERMINATE_MATCHING) {
2759 return HWLM_TERMINATE_MATCHING;
2760 }
2761 }
2762 }
2763 PROGRAM_NEXT_INSTRUCTION
2764
2765 PROGRAM_CASE(SET_EXHAUST) {
2766 updateSeqPoint(tctxt, end, from_mpv);
2767 if (roseSetExhaust(t, scratch, ri->ekey)
2768 == HWLM_TERMINATE_MATCHING) {
2769 return HWLM_TERMINATE_MATCHING;
2770 }
2771 work_done = 1;
2772 }
2773 PROGRAM_NEXT_INSTRUCTION
2774
2775 default: {
2776 assert(0); // unreachable
2777 scratch->core_info.status |= STATUS_ERROR;
2778 return HWLM_TERMINATE_MATCHING;
2779 }
2780 }
2781 }
2782
2783 assert(0); // unreachable
2784 return HWLM_CONTINUE_MATCHING;
2785}
2786
2787#define L_PROGRAM_CASE(name) \
2788 case ROSE_INSTR_##name: { \
2789 DEBUG_PRINTF("l_instruction: " #name " (pc=%u)\n", \
2790 programOffset + (u32)(pc - pc_base)); \
2791 const struct ROSE_STRUCT_##name *ri = \
2792 (const struct ROSE_STRUCT_##name *)pc;
2793
2794#define L_PROGRAM_NEXT_INSTRUCTION \
2795 pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \
2796 break; \
2797 }
2798
2799#define L_PROGRAM_NEXT_INSTRUCTION_JUMP continue;
2800
2801hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
2802 struct hs_scratch *scratch, u32 programOffset,
2803 u64a som, u64a end, u8 prog_flags) {
2804 DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset,
2805 som, end, prog_flags);
2806
2807 assert(programOffset != ROSE_INVALID_PROG_OFFSET);
2808 assert(programOffset >= sizeof(struct RoseEngine));
2809 assert(programOffset < t->size);
2810
2811 const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
2812
2813 const char *pc_base = getByOffset(t, programOffset);
2814 const char *pc = pc_base;
2815
2816 // If this program has an effect, work_done will be set to one (which may
2817 // allow the program to squash groups).
2818 int work_done = 0;
2819
2820 struct RoseContext *tctxt = &scratch->tctxt;
2821
2822 assert(*(const u8 *)pc != ROSE_INSTR_END);
2823
2824 for (;;) {
2825 assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN));
2826 assert(pc >= pc_base);
2827 assert((size_t)(pc - pc_base) < t->size);
2828 const u8 code = *(const u8 *)pc;
2829 assert(code <= LAST_ROSE_INSTRUCTION);
2830
2831 switch ((enum RoseInstructionCode)code) {
2832 L_PROGRAM_CASE(END) {
2833 DEBUG_PRINTF("finished\n");
2834 return HWLM_CONTINUE_MATCHING;
2835 }
2836 L_PROGRAM_NEXT_INSTRUCTION
2837
2838 L_PROGRAM_CASE(CATCH_UP) {
2839 if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) {
2840 return HWLM_TERMINATE_MATCHING;
2841 }
2842 }
2843 L_PROGRAM_NEXT_INSTRUCTION
2844
2845 L_PROGRAM_CASE(SOM_FROM_REPORT) {
2846 som = handleSomExternal(scratch, &ri->som, end);
2847 DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch,
2848 som);
2849 }
2850 L_PROGRAM_NEXT_INSTRUCTION
2851
2852 L_PROGRAM_CASE(DEDUPE) {
2853 updateSeqPoint(tctxt, end, from_mpv);
2854 const char do_som = t->hasSom; // TODO: constant propagate
2855 const char is_external_report = 1;
2856 enum DedupeResult rv =
2857 dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
2858 ri->dkey, ri->offset_adjust,
2859 is_external_report, ri->quash_som, do_som);
2860 switch (rv) {
2861 case DEDUPE_HALT:
2862 return HWLM_TERMINATE_MATCHING;
2863 case DEDUPE_SKIP:
2864 assert(ri->fail_jump); // must progress
2865 pc += ri->fail_jump;
2866 L_PROGRAM_NEXT_INSTRUCTION_JUMP
2867 case DEDUPE_CONTINUE:
2868 break;
2869 }
2870 }
2871 L_PROGRAM_NEXT_INSTRUCTION
2872
2873 L_PROGRAM_CASE(DEDUPE_SOM) {
2874 updateSeqPoint(tctxt, end, from_mpv);
2875 const char is_external_report = 0;
2876 const char do_som = 1;
2877 enum DedupeResult rv =
2878 dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
2879 ri->dkey, ri->offset_adjust,
2880 is_external_report, ri->quash_som, do_som);
2881 switch (rv) {
2882 case DEDUPE_HALT:
2883 return HWLM_TERMINATE_MATCHING;
2884 case DEDUPE_SKIP:
2885 assert(ri->fail_jump); // must progress
2886 pc += ri->fail_jump;
2887 L_PROGRAM_NEXT_INSTRUCTION_JUMP
2888 case DEDUPE_CONTINUE:
2889 break;
2890 }
2891 }
2892 L_PROGRAM_NEXT_INSTRUCTION
2893
2894 L_PROGRAM_CASE(REPORT) {
2895 updateSeqPoint(tctxt, end, from_mpv);
2896 if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
2897 INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
2898 return HWLM_TERMINATE_MATCHING;
2899 }
2900 work_done = 1;
2901 }
2902 L_PROGRAM_NEXT_INSTRUCTION
2903
2904 L_PROGRAM_CASE(REPORT_EXHAUST) {
2905 updateSeqPoint(tctxt, end, from_mpv);
2906 if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
2907 ri->ekey) == HWLM_TERMINATE_MATCHING) {
2908 return HWLM_TERMINATE_MATCHING;
2909 }
2910 work_done = 1;
2911 }
2912 L_PROGRAM_NEXT_INSTRUCTION
2913
2914 L_PROGRAM_CASE(REPORT_SOM) {
2915 updateSeqPoint(tctxt, end, from_mpv);
2916 if (roseReportSom(t, scratch, som, end, ri->onmatch,
2917 ri->offset_adjust,
2918 INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
2919 return HWLM_TERMINATE_MATCHING;
2920 }
2921 work_done = 1;
2922 }
2923 L_PROGRAM_NEXT_INSTRUCTION
2924
2925 L_PROGRAM_CASE(DEDUPE_AND_REPORT) {
2926 updateSeqPoint(tctxt, end, from_mpv);
2927 const char do_som = t->hasSom; // TODO: constant propagate
2928 const char is_external_report = 1;
2929 enum DedupeResult rv =
2930 dedupeCatchup(t, scratch, end, som, end + ri->offset_adjust,
2931 ri->dkey, ri->offset_adjust,
2932 is_external_report, ri->quash_som, do_som);
2933 switch (rv) {
2934 case DEDUPE_HALT:
2935 return HWLM_TERMINATE_MATCHING;
2936 case DEDUPE_SKIP:
2937 assert(ri->fail_jump); // must progress
2938 pc += ri->fail_jump;
2939 L_PROGRAM_NEXT_INSTRUCTION_JUMP
2940 case DEDUPE_CONTINUE:
2941 break;
2942 }
2943
2944 const u32 ekey = INVALID_EKEY;
2945 if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
2946 ekey) == HWLM_TERMINATE_MATCHING) {
2947 return HWLM_TERMINATE_MATCHING;
2948 }
2949 work_done = 1;
2950 }
2951 L_PROGRAM_NEXT_INSTRUCTION
2952
2953 L_PROGRAM_CASE(FINAL_REPORT) {
2954 updateSeqPoint(tctxt, end, from_mpv);
2955 if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust,
2956 INVALID_EKEY) == HWLM_TERMINATE_MATCHING) {
2957 return HWLM_TERMINATE_MATCHING;
2958 }
2959 /* One-shot specialisation: this instruction always terminates
2960 * execution of the program. */
2961 return HWLM_CONTINUE_MATCHING;
2962 }
2963 L_PROGRAM_NEXT_INSTRUCTION
2964
2965 L_PROGRAM_CASE(CHECK_EXHAUSTED) {
2966 DEBUG_PRINTF("check ekey %u\n", ri->ekey);
2967 assert(ri->ekey != INVALID_EKEY);
2968 assert(ri->ekey < t->ekeyCount);
2969 const char *evec = scratch->core_info.exhaustionVector;
2970 if (isExhausted(t, evec, ri->ekey)) {
2971 DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
2972 ri->ekey);
2973 assert(ri->fail_jump); // must progress
2974 pc += ri->fail_jump;
2975 L_PROGRAM_NEXT_INSTRUCTION_JUMP
2976 }
2977 }
2978 L_PROGRAM_NEXT_INSTRUCTION
2979
2980 L_PROGRAM_CASE(SQUASH_GROUPS) {
2981 assert(popcount64(ri->groups) == 63); // Squash only one group.
2982 if (work_done) {
2983 tctxt->groups &= ri->groups;
2984 DEBUG_PRINTF("squash groups 0x%llx -> 0x%llx\n", ri->groups,
2985 tctxt->groups);
2986 }
2987 }
2988 L_PROGRAM_NEXT_INSTRUCTION
2989
2990 L_PROGRAM_CASE(CHECK_LONG_LIT) {
2991 const char nocase = 0;
2992 if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
2993 ri->lit_length, nocase)) {
2994 DEBUG_PRINTF("failed long lit check\n");
2995 assert(ri->fail_jump); // must progress
2996 pc += ri->fail_jump;
2997 L_PROGRAM_NEXT_INSTRUCTION_JUMP
2998 }
2999 }
3000 L_PROGRAM_NEXT_INSTRUCTION
3001
3002 L_PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
3003 const char nocase = 1;
3004 if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
3005 ri->lit_length, nocase)) {
3006 DEBUG_PRINTF("failed nocase long lit check\n");
3007 assert(ri->fail_jump); // must progress
3008 pc += ri->fail_jump;
3009 L_PROGRAM_NEXT_INSTRUCTION_JUMP
3010 }
3011 }
3012 L_PROGRAM_NEXT_INSTRUCTION
3013
3014 L_PROGRAM_CASE(CHECK_MED_LIT) {
3015 const char nocase = 0;
3016 if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
3017 ri->lit_length, nocase)) {
3018 DEBUG_PRINTF("failed lit check\n");
3019 assert(ri->fail_jump); // must progress
3020 pc += ri->fail_jump;
3021 L_PROGRAM_NEXT_INSTRUCTION_JUMP
3022 }
3023 }
3024 L_PROGRAM_NEXT_INSTRUCTION
3025
3026 L_PROGRAM_CASE(CHECK_MED_LIT_NOCASE) {
3027 const char nocase = 1;
3028 if (!roseCheckMediumLiteral(t, scratch, end, ri->lit_offset,
3029 ri->lit_length, nocase)) {
3030 DEBUG_PRINTF("failed long lit check\n");
3031 assert(ri->fail_jump); // must progress
3032 pc += ri->fail_jump;
3033 L_PROGRAM_NEXT_INSTRUCTION_JUMP
3034 }
3035 }
3036 L_PROGRAM_NEXT_INSTRUCTION
3037
3038 L_PROGRAM_CASE(CLEAR_WORK_DONE) {
3039 DEBUG_PRINTF("clear work_done flag\n");
3040 work_done = 0;
3041 }
3042 L_PROGRAM_NEXT_INSTRUCTION
3043
3044 L_PROGRAM_CASE(SET_LOGICAL) {
3045 DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n",
3046 ri->lkey, ri->offset_adjust);
3047 assert(ri->lkey != INVALID_LKEY);
3048 assert(ri->lkey < t->lkeyCount);
3049 char *lvec = scratch->core_info.logicalVector;
3050 setLogicalVal(t, lvec, ri->lkey, 1);
3051 updateLastCombMatchOffset(tctxt, end + ri->offset_adjust);
3052 }
3053 L_PROGRAM_NEXT_INSTRUCTION
3054
3055 L_PROGRAM_CASE(SET_COMBINATION) {
3056 DEBUG_PRINTF("set ckey %u as active\n", ri->ckey);
3057 assert(ri->ckey != INVALID_CKEY);
3058 assert(ri->ckey < t->ckeyCount);
3059 char *cvec = scratch->core_info.combVector;
3060 setCombinationActive(t, cvec, ri->ckey);
3061 }
3062 L_PROGRAM_NEXT_INSTRUCTION
3063
3064 L_PROGRAM_CASE(FLUSH_COMBINATION) {
3065 assert(end >= tctxt->lastCombMatchOffset);
3066 if (end > tctxt->lastCombMatchOffset) {
3067 if (flushActiveCombinations(t, scratch)
3068 == HWLM_TERMINATE_MATCHING) {
3069 return HWLM_TERMINATE_MATCHING;
3070 }
3071 }
3072 }
3073 L_PROGRAM_NEXT_INSTRUCTION
3074
3075 L_PROGRAM_CASE(SET_EXHAUST) {
3076 updateSeqPoint(tctxt, end, from_mpv);
3077 if (roseSetExhaust(t, scratch, ri->ekey)
3078 == HWLM_TERMINATE_MATCHING) {
3079 return HWLM_TERMINATE_MATCHING;
3080 }
3081 work_done = 1;
3082 }
3083 L_PROGRAM_NEXT_INSTRUCTION
3084
3085 default: {
3086 assert(0); // unreachable
3087 scratch->core_info.status |= STATUS_ERROR;
3088 return HWLM_TERMINATE_MATCHING;
3089 }
3090 }
3091 }
3092
3093 assert(0); // unreachable
3094 return HWLM_CONTINUE_MATCHING;
3095}
3096
3097#undef L_PROGRAM_CASE
3098#undef L_PROGRAM_NEXT_INSTRUCTION
3099#undef L_PROGRAM_NEXT_INSTRUCTION_JUMP
3100
3101#undef PROGRAM_CASE
3102#undef PROGRAM_NEXT_INSTRUCTION
3103#undef PROGRAM_NEXT_INSTRUCTION_JUMP
3104