1/*
2 * Copyright (c) 2016-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "mcsheng.h"
30
31#include "accel.h"
32#include "mcsheng_internal.h"
33#include "nfa_api.h"
34#include "nfa_api_queue.h"
35#include "nfa_internal.h"
36#include "util/arch.h"
37#include "util/bitutils.h"
38#include "util/compare.h"
39#include "util/simd_utils.h"
40#include "ue2common.h"
41
42enum MatchMode {
43 CALLBACK_OUTPUT,
44 STOP_AT_MATCH,
45 NO_MATCHES
46};
47
48static really_inline
49const struct mstate_aux *get_aux(const struct mcsheng *m, u32 s) {
50 const char *nfa = (const char *)m - sizeof(struct NFA);
51 const struct mstate_aux *aux
52 = s + (const struct mstate_aux *)(nfa + m->aux_offset);
53
54 assert(ISALIGNED(aux));
55 return aux;
56}
57
58static really_inline
59u32 mcshengEnableStarts(const struct mcsheng *m, u32 s) {
60 const struct mstate_aux *aux = get_aux(m, s);
61
62 DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top);
63 return aux->top;
64}
65
66static really_inline
67u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table,
68 u32 as) {
69 assert(ISALIGNED_N(sherman_state, 16));
70
71 u8 len = *(const u8 *)(sherman_state + SHERMAN_LEN_OFFSET);
72
73 if (len) {
74 m128 ss_char = load128(sherman_state);
75 m128 cur_char = set16x8(cprime);
76
77 u32 z = movemask128(eq128(ss_char, cur_char));
78
79 /* remove header cruft: type 1, len 1, daddy 2*/
80 z &= ~0xf;
81 z &= (1U << (len + 4)) - 1;
82
83 if (z) {
84 u32 i = ctz32(z & ~0xf) - 4;
85
86 u32 s_out = unaligned_load_u16((const u8 *)sherman_state
87 + SHERMAN_STATES_OFFSET(len)
88 + sizeof(u16) * i);
89 DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i,
90 len, cprime, s_out);
91 return s_out;
92 }
93 }
94
95 u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET);
96 return succ_table[(daddy << as) + cprime];
97}
98
99static really_inline
100char doComplexReport(NfaCallback cb, void *ctxt, const struct mcsheng *m,
101 u32 s, u64a loc, char eod, u32 *cached_accept_state,
102 u32 *cached_accept_id) {
103 DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n",
104 s & STATE_MASK, loc, eod);
105
106 if (!eod && s == *cached_accept_state) {
107 if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) {
108 return MO_HALT_MATCHING; /* termination requested */
109 }
110
111 return MO_CONTINUE_MATCHING; /* continue execution */
112 }
113
114 const struct mstate_aux *aux = get_aux(m, s);
115 size_t offset = eod ? aux->accept_eod : aux->accept;
116
117 assert(offset);
118 const struct report_list *rl
119 = (const void *)((const char *)m + offset - sizeof(struct NFA));
120 assert(ISALIGNED(rl));
121
122 DEBUG_PRINTF("report list size %u\n", rl->count);
123 u32 count = rl->count;
124
125 if (!eod && count == 1) {
126 *cached_accept_state = s;
127 *cached_accept_id = rl->report[0];
128
129 DEBUG_PRINTF("reporting %u\n", rl->report[0]);
130 if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) {
131 return MO_HALT_MATCHING; /* termination requested */
132 }
133
134 return MO_CONTINUE_MATCHING; /* continue execution */
135 }
136
137 for (u32 i = 0; i < count; i++) {
138 DEBUG_PRINTF("reporting %u\n", rl->report[i]);
139 if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) {
140 return MO_HALT_MATCHING; /* termination requested */
141 }
142 }
143
144 return MO_CONTINUE_MATCHING; /* continue execution */
145}
146
147#define SHENG_CHUNK 8
148
149static really_inline
150u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end,
151 const u8 *hard_c_end, u32 s_in, char do_accel) {
152 assert(s_in < m->sheng_end);
153 assert(s_in); /* should not already be dead */
154 assert(soft_c_end <= hard_c_end);
155 DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1);
156 m128 s = set16x8(s_in - 1);
157 const u8 *c = *c_inout;
158 const u8 *c_end = hard_c_end - SHENG_CHUNK + 1;
159 if (!do_accel) {
160 c_end = MIN(soft_c_end, hard_c_end - SHENG_CHUNK + 1);
161 }
162 const m128 *masks = m->sheng_masks;
163 u8 sheng_limit = m->sheng_end - 1; /* - 1: no dead state */
164 u8 sheng_stop_limit = do_accel ? m->sheng_accel_limit : sheng_limit;
165
166 /* When we use movd to get a u32 containing our state, it will have 4 lanes
167 * all duplicating the state. We can create versions of our limits with 4
168 * copies to directly compare against, this prevents us generating code to
169 * extract a single copy of the state from the u32 for checking. */
170 u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101;
171
172#if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
173 u32 sheng_limit_x4 = sheng_limit * 0x01010101;
174 m128 simd_stop_limit = set4x32(sheng_stop_limit_x4);
175 m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit);
176 DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit,
177 m->sheng_accel_limit, sheng_stop_limit);
178#endif
179
180#define SHENG_SINGLE_ITER do { \
181 m128 shuffle_mask = masks[*(c++)]; \
182 s = pshufb_m128(shuffle_mask, s); \
183 u32 s_gpr_x4 = movd(s); /* convert to u8 */ \
184 DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \
185 if (s_gpr_x4 >= sheng_stop_limit_x4) { \
186 s_gpr = s_gpr_x4; \
187 goto exit; \
188 } \
189 } while (0)
190
191 u8 s_gpr;
192 while (c < c_end) {
193#if defined(HAVE_BMI2) && defined(ARCH_64_BIT)
194 /* This version uses pext for efficiently bitbashing out scaled
195 * versions of the bytes to process from a u64a */
196
197 u64a data_bytes = unaligned_load_u64a(c);
198 u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */
199 data_bytes &= ~0xffULL; /* clear low bits for scale space */
200 m128 shuffle_mask0 = load128((const char *)masks + cc0);
201 s = pshufb_m128(shuffle_mask0, s);
202 m128 s_max = s;
203 m128 s_max0 = s_max;
204 DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 4, movd(s));
205
206#define SHENG_SINGLE_UNROLL_ITER(iter) \
207 assert(iter); \
208 u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \
209 assert(cc##iter == (u64a)c[iter] << 4); \
210 m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \
211 s = pshufb_m128(shuffle_mask##iter, s); \
212 if (do_accel && iter == 7) { \
213 /* in the final iteration we also have to check against accel */ \
214 m128 s_temp = sadd_u8_m128(s, accel_delta); \
215 s_max = max_u8_m128(s_max, s_temp); \
216 } else { \
217 s_max = max_u8_m128(s_max, s); \
218 } \
219 m128 s_max##iter = s_max; \
220 DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 4, \
221 movd(s), movd(s_max));
222
223 SHENG_SINGLE_UNROLL_ITER(1);
224
225 SHENG_SINGLE_UNROLL_ITER(2);
226 SHENG_SINGLE_UNROLL_ITER(3);
227
228 SHENG_SINGLE_UNROLL_ITER(4);
229 SHENG_SINGLE_UNROLL_ITER(5);
230
231 SHENG_SINGLE_UNROLL_ITER(6);
232 SHENG_SINGLE_UNROLL_ITER(7);
233
234 if (movd(s_max7) >= sheng_limit_x4) {
235 DEBUG_PRINTF("exit found\n");
236
237 /* Explicitly check the last byte as it is more likely as it also
238 * checks for acceleration. */
239 if (movd(s_max6) < sheng_limit_x4) {
240 c += SHENG_CHUNK;
241 s_gpr = movq(s);
242 assert(s_gpr >= sheng_stop_limit);
243 goto exit;
244 }
245
246 /* use shift-xor to create a register containing all of the max
247 * values */
248 m128 blended = rshift64_m128(s_max0, 56);
249 blended = xor128(blended, rshift64_m128(s_max1, 48));
250 blended = xor128(blended, rshift64_m128(s_max2, 40));
251 blended = xor128(blended, rshift64_m128(s_max3, 32));
252 blended = xor128(blended, rshift64_m128(s_max4, 24));
253 blended = xor128(blended, rshift64_m128(s_max5, 16));
254 blended = xor128(blended, rshift64_m128(s_max6, 8));
255 blended = xor128(blended, s);
256 blended = xor128(blended, rshift64_m128(blended, 8));
257 DEBUG_PRINTF("blended %016llx\n", movq(blended));
258
259 m128 final = min_u8_m128(blended, simd_stop_limit);
260 m128 cmp = sub_u8_m128(final, simd_stop_limit);
261 u64a stops = ~movemask128(cmp);
262 assert(stops);
263 u32 earliest = ctz32(stops);
264 DEBUG_PRINTF("stops %02llx, earliest %u\n", stops, earliest);
265 assert(earliest < 8);
266 c += earliest + 1;
267 s_gpr = movq(blended) >> (earliest * 8);
268 assert(s_gpr >= sheng_stop_limit);
269 goto exit;
270 } else {
271 c += SHENG_CHUNK;
272 }
273#else
274 SHENG_SINGLE_ITER;
275 SHENG_SINGLE_ITER;
276 SHENG_SINGLE_ITER;
277 SHENG_SINGLE_ITER;
278
279 SHENG_SINGLE_ITER;
280 SHENG_SINGLE_ITER;
281 SHENG_SINGLE_ITER;
282 SHENG_SINGLE_ITER;
283#endif
284 }
285
286 assert(c_end - c < SHENG_CHUNK);
287 if (c < soft_c_end) {
288 assert(soft_c_end - c < SHENG_CHUNK);
289 switch (soft_c_end - c) {
290 case 7:
291 SHENG_SINGLE_ITER; // fallthrough
292 case 6:
293 SHENG_SINGLE_ITER; // fallthrough
294 case 5:
295 SHENG_SINGLE_ITER; // fallthrough
296 case 4:
297 SHENG_SINGLE_ITER; // fallthrough
298 case 3:
299 SHENG_SINGLE_ITER; // fallthrough
300 case 2:
301 SHENG_SINGLE_ITER; // fallthrough
302 case 1:
303 SHENG_SINGLE_ITER; // fallthrough
304 }
305 }
306
307 assert(c >= soft_c_end);
308
309 s_gpr = movd(s);
310exit:
311 assert(c <= hard_c_end);
312 DEBUG_PRINTF("%zu from end; s %hhu\n", c_end - c, s_gpr);
313 assert(c >= soft_c_end || s_gpr >= sheng_stop_limit);
314 /* undo state adjustment to match mcclellan view */
315 if (s_gpr == sheng_limit) {
316 s_gpr = 0;
317 } else if (s_gpr < sheng_limit) {
318 s_gpr++;
319 }
320
321 *c_inout = c;
322 return s_gpr;
323}
324
325static really_inline
326const char *findShermanState(UNUSED const struct mcsheng *m,
327 const char *sherman_base_offset, u32 sherman_base,
328 u32 s) {
329 const char *rv
330 = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base);
331 assert(rv < (const char *)m + m->length - sizeof(struct NFA));
332 UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET);
333 assert(type == SHERMAN_STATE);
334 return rv;
335}
336
337static really_inline
338const u8 *run_mcsheng_accel(const struct mcsheng *m,
339 const struct mstate_aux *aux, u32 s,
340 const u8 **min_accel_offset,
341 const u8 *c, const u8 *c_end) {
342 DEBUG_PRINTF("skipping\n");
343 u32 accel_offset = aux[s].accel_offset;
344
345 assert(aux[s].accel_offset);
346 assert(accel_offset >= m->aux_offset);
347 assert(!m->sherman_offset || accel_offset < m->sherman_offset);
348
349 const union AccelAux *aaux = (const void *)((const char *)m + accel_offset);
350 const u8 *c2 = run_accel(aaux, c, c_end);
351
352 if (c2 < *min_accel_offset + BAD_ACCEL_DIST) {
353 *min_accel_offset = c2 + BIG_ACCEL_PENALTY;
354 } else {
355 *min_accel_offset = c2 + SMALL_ACCEL_PENALTY;
356 }
357
358 if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) {
359 *min_accel_offset = c_end;
360 }
361
362 DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n",
363 c2 - c, *min_accel_offset - c2, c_end - c2);
364
365 return c2;
366}
367
368static really_inline
369u32 doNormal16(const struct mcsheng *m, const u8 **c_inout, const u8 *end,
370 u32 s, char do_accel, enum MatchMode mode) {
371 const u8 *c = *c_inout;
372
373 const u16 *succ_table
374 = (const u16 *)((const char *)m + sizeof(struct mcsheng));
375 assert(ISALIGNED_N(succ_table, 2));
376 u32 sheng_end = m->sheng_end;
377 u32 sherman_base = m->sherman_limit;
378 const char *sherman_base_offset
379 = (const char *)m - sizeof(struct NFA) + m->sherman_offset;
380 u32 as = m->alphaShift;
381
382 /* Adjust start of succ table so we can index into using state id (rather
383 * than adjust to normal id). As we will not be processing states with low
384 * state ids, we will not be accessing data before the succ table. Note: due
385 * to the size of the sheng tables, the succ_table pointer will still be
386 * inside the engine.*/
387 succ_table -= sheng_end << as;
388
389 s &= STATE_MASK;
390
391 while (c < end && s >= sheng_end) {
392 u8 cprime = m->remap[*c];
393 DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c,
394 ourisprint(*c) ? *c : '?', cprime, s);
395 if (s < sherman_base) {
396 DEBUG_PRINTF("doing normal\n");
397 assert(s < m->state_count);
398 s = succ_table[(s << as) + cprime];
399 } else {
400 const char *sherman_state
401 = findShermanState(m, sherman_base_offset, sherman_base, s);
402 DEBUG_PRINTF("doing sherman (%u)\n", s);
403 s = doSherman16(sherman_state, cprime, succ_table, as);
404 }
405
406 DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK);
407 c++;
408
409 if (do_accel && (s & ACCEL_FLAG)) {
410 break;
411 }
412 if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) {
413 break;
414 }
415
416 s &= STATE_MASK;
417 }
418
419 *c_inout = c;
420 return s;
421}
422
423static really_inline
424char mcshengExec16_i(const struct mcsheng *m, u32 *state, const u8 *buf,
425 size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
426 char single, const u8 **c_final, enum MatchMode mode) {
427 assert(ISALIGNED_N(state, 2));
428 if (!len) {
429 if (mode == STOP_AT_MATCH) {
430 *c_final = buf;
431 }
432 return MO_ALIVE;
433 }
434
435 u32 s = *state;
436 const u8 *c = buf;
437 const u8 *c_end = buf + len;
438 const u8 sheng_end = m->sheng_end;
439 const struct mstate_aux *aux
440 = (const struct mstate_aux *)((const char *)m + m->aux_offset
441 - sizeof(struct NFA));
442
443 s &= STATE_MASK;
444
445 u32 cached_accept_id = 0;
446 u32 cached_accept_state = 0;
447
448 DEBUG_PRINTF("s: %u, len %zu\n", s, len);
449
450 const u8 *min_accel_offset = c;
451 if (!m->has_accel || len < ACCEL_MIN_LEN) {
452 min_accel_offset = c_end;
453 goto without_accel;
454 }
455
456 goto with_accel;
457
458without_accel:
459 do {
460 assert(c < min_accel_offset);
461 int do_accept;
462 if (!s) {
463 goto exit;
464 } else if (s < sheng_end) {
465 s = doSheng(m, &c, min_accel_offset, c_end, s, 0);
466 do_accept = mode != NO_MATCHES && get_aux(m, s)->accept;
467 } else {
468 s = doNormal16(m, &c, min_accel_offset, s, 0, mode);
469
470 do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG);
471 }
472
473 if (do_accept) {
474 if (mode == STOP_AT_MATCH) {
475 *state = s & STATE_MASK;
476 *c_final = c - 1;
477 return MO_MATCHES_PENDING;
478 }
479
480 u64a loc = (c - 1) - buf + offAdj + 1;
481
482 if (single) {
483 DEBUG_PRINTF("reporting %u\n", m->arb_report);
484 if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
485 return MO_DEAD; /* termination requested */
486 }
487 } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
488 &cached_accept_state, &cached_accept_id)
489 == MO_HALT_MATCHING) {
490 return MO_DEAD;
491 }
492 }
493
494 assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */
495 } while (c < min_accel_offset);
496
497 if (c == c_end) {
498 goto exit;
499 }
500
501with_accel:
502 do {
503 assert(c < c_end);
504 int do_accept;
505
506 if (!s) {
507 goto exit;
508 } else if (s < sheng_end) {
509 if (s > m->sheng_accel_limit) {
510 c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end);
511 if (c == c_end) {
512 goto exit;
513 } else {
514 goto without_accel;
515 }
516 }
517 s = doSheng(m, &c, c_end, c_end, s, 1);
518 do_accept = mode != NO_MATCHES && get_aux(m, s)->accept;
519 } else {
520 if (s & ACCEL_FLAG) {
521 DEBUG_PRINTF("skipping\n");
522 s &= STATE_MASK;
523 c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end);
524 if (c == c_end) {
525 goto exit;
526 } else {
527 goto without_accel;
528 }
529 }
530
531 s = doNormal16(m, &c, c_end, s, 1, mode);
532 do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG);
533 }
534
535 if (do_accept) {
536 if (mode == STOP_AT_MATCH) {
537 *state = s & STATE_MASK;
538 *c_final = c - 1;
539 return MO_MATCHES_PENDING;
540 }
541
542 u64a loc = (c - 1) - buf + offAdj + 1;
543
544 if (single) {
545 DEBUG_PRINTF("reporting %u\n", m->arb_report);
546 if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
547 return MO_DEAD; /* termination requested */
548 }
549 } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0,
550 &cached_accept_state, &cached_accept_id)
551 == MO_HALT_MATCHING) {
552 return MO_DEAD;
553 }
554 }
555
556 assert(c <= c_end);
557 } while (c < c_end);
558
559exit:
560 s &= STATE_MASK;
561
562 if (mode == STOP_AT_MATCH) {
563 *c_final = c_end;
564 }
565 *state = s;
566
567 return MO_ALIVE;
568}
569
570static never_inline
571char mcshengExec16_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf,
572 size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
573 char single, const u8 **final_point) {
574 return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
575 final_point, CALLBACK_OUTPUT);
576}
577
578static never_inline
579char mcshengExec16_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf,
580 size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
581 char single, const u8 **final_point) {
582 return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
583 final_point, STOP_AT_MATCH);
584}
585
586static never_inline
587char mcshengExec16_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf,
588 size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
589 char single, const u8 **final_point) {
590 return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single,
591 final_point, NO_MATCHES);
592}
593
594static really_inline
595char mcshengExec16_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf,
596 size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
597 char single, const u8 **final_point,
598 enum MatchMode mode) {
599 if (mode == CALLBACK_OUTPUT) {
600 return mcshengExec16_i_cb(m, state, buf, len, offAdj, cb, ctxt,
601 single, final_point);
602 } else if (mode == STOP_AT_MATCH) {
603 return mcshengExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt,
604 single, final_point);
605 } else {
606 assert (mode == NO_MATCHES);
607 return mcshengExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt,
608 single, final_point);
609 }
610}
611
612static really_inline
613u32 doNormal8(const struct mcsheng *m, const u8 **c_inout, const u8 *end, u32 s,
614 char do_accel, enum MatchMode mode) {
615 const u8 *c = *c_inout;
616 u32 sheng_end = m->sheng_end;
617 u32 accel_limit = m->accel_limit_8;
618 u32 accept_limit = m->accept_limit_8;
619
620 const u32 as = m->alphaShift;
621 const u8 *succ_table = (const u8 *)((const char *)m
622 + sizeof(struct mcsheng));
623 /* Adjust start of succ table so we can index into using state id (rather
624 * than adjust to normal id). As we will not be processing states with low
625 * state ids, we will not be accessing data before the succ table. Note: due
626 * to the size of the sheng tables, the succ_table pointer will still be
627 * inside the engine.*/
628 succ_table -= sheng_end << as;
629
630 assert(s >= sheng_end);
631
632 while (c < end && s >= sheng_end) {
633 u8 cprime = m->remap[*c];
634 DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c,
635 ourisprint(*c) ? *c : '?', cprime);
636 s = succ_table[(s << as) + cprime];
637
638 DEBUG_PRINTF("s: %u\n", s);
639 c++;
640 if (do_accel) {
641 if (s >= accel_limit) {
642 break;
643 }
644 } else {
645 if (mode != NO_MATCHES && s >= accept_limit) {
646 break;
647 }
648 }
649 }
650 *c_inout = c;
651 return s;
652}
653
654static really_inline
655char mcshengExec8_i(const struct mcsheng *m, u32 *state, const u8 *buf,
656 size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
657 char single, const u8 **c_final, enum MatchMode mode) {
658 if (!len) {
659 *c_final = buf;
660 return MO_ALIVE;
661 }
662 u32 s = *state;
663 const u8 *c = buf;
664 const u8 *c_end = buf + len;
665 const u8 sheng_end = m->sheng_end;
666
667 const struct mstate_aux *aux
668 = (const struct mstate_aux *)((const char *)m + m->aux_offset
669 - sizeof(struct NFA));
670 u32 accept_limit = m->accept_limit_8;
671
672 u32 cached_accept_id = 0;
673 u32 cached_accept_state = 0;
674
675 DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit);
676
677 DEBUG_PRINTF("s: %u, len %zu\n", s, len);
678
679 const u8 *min_accel_offset = c;
680 if (!m->has_accel || len < ACCEL_MIN_LEN) {
681 min_accel_offset = c_end;
682 goto without_accel;
683 }
684
685 goto with_accel;
686
687without_accel:
688 do {
689 assert(c < min_accel_offset);
690 if (!s) {
691 goto exit;
692 } else if (s < sheng_end) {
693 s = doSheng(m, &c, min_accel_offset, c_end, s, 0);
694 } else {
695 s = doNormal8(m, &c, min_accel_offset, s, 0, mode);
696 assert(c <= min_accel_offset);
697 }
698
699 if (mode != NO_MATCHES && s >= accept_limit) {
700 if (mode == STOP_AT_MATCH) {
701 DEBUG_PRINTF("match - pausing\n");
702 *state = s;
703 *c_final = c - 1;
704 return MO_MATCHES_PENDING;
705 }
706
707 u64a loc = (c - 1) - buf + offAdj + 1;
708 if (single) {
709 DEBUG_PRINTF("reporting %u\n", m->arb_report);
710 if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
711 return MO_DEAD;
712 }
713 } else if (doComplexReport(cb, ctxt, m, s, loc, 0,
714 &cached_accept_state, &cached_accept_id)
715 == MO_HALT_MATCHING) {
716 return MO_DEAD;
717 }
718 }
719
720 assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */
721 } while (c < min_accel_offset);
722
723 if (c == c_end) {
724 goto exit;
725 }
726
727with_accel:
728 do {
729 u32 accel_limit = m->accel_limit_8;
730
731 assert(c < c_end);
732 if (!s) {
733 goto exit;
734 } else if (s < sheng_end) {
735 if (s > m->sheng_accel_limit) {
736 c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end);
737 if (c == c_end) {
738 goto exit;
739 } else {
740 goto without_accel;
741 }
742 }
743 s = doSheng(m, &c, c_end, c_end, s, 1);
744 } else {
745 if (s >= accel_limit && aux[s].accel_offset) {
746 c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end);
747 if (c == c_end) {
748 goto exit;
749 } else {
750 goto without_accel;
751 }
752 }
753 s = doNormal8(m, &c, c_end, s, 1, mode);
754 }
755
756 if (mode != NO_MATCHES && s >= accept_limit) {
757 if (mode == STOP_AT_MATCH) {
758 DEBUG_PRINTF("match - pausing\n");
759 *state = s;
760 *c_final = c - 1;
761 return MO_MATCHES_PENDING;
762 }
763
764 u64a loc = (c - 1) - buf + offAdj + 1;
765 if (single) {
766 DEBUG_PRINTF("reporting %u\n", m->arb_report);
767 if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) {
768 return MO_DEAD;
769 }
770 } else if (doComplexReport(cb, ctxt, m, s, loc, 0,
771 &cached_accept_state, &cached_accept_id)
772 == MO_HALT_MATCHING) {
773 return MO_DEAD;
774 }
775 }
776
777 assert(c <= c_end);
778 } while (c < c_end);
779
780exit:
781 *state = s;
782 if (mode == STOP_AT_MATCH) {
783 *c_final = c_end;
784 }
785 return MO_ALIVE;
786}
787
788static never_inline
789char mcshengExec8_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf,
790 size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
791 char single, const u8 **final_point) {
792 return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
793 final_point, CALLBACK_OUTPUT);
794}
795
796static never_inline
797char mcshengExec8_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf,
798 size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
799 char single, const u8 **final_point) {
800 return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
801 final_point, STOP_AT_MATCH);
802}
803
804static never_inline
805char mcshengExec8_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf,
806 size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
807 char single, const u8 **final_point) {
808 return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single,
809 final_point, NO_MATCHES);
810}
811
812static really_inline
813char mcshengExec8_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf,
814 size_t len, u64a offAdj, NfaCallback cb, void *ctxt,
815 char single, const u8 **final_point,
816 enum MatchMode mode) {
817 if (mode == CALLBACK_OUTPUT) {
818 return mcshengExec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single,
819 final_point);
820 } else if (mode == STOP_AT_MATCH) {
821 return mcshengExec8_i_sam(m, state, buf, len, offAdj, cb, ctxt,
822 single, final_point);
823 } else {
824 assert(mode == NO_MATCHES);
825 return mcshengExec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single,
826 final_point);
827 }
828}
829
830static really_inline
831char mcshengCheckEOD(const struct NFA *nfa, u32 s, u64a offset,
832 NfaCallback cb, void *ctxt) {
833 const struct mcsheng *m = getImplNfa(nfa);
834 const struct mstate_aux *aux = get_aux(m, s);
835
836 if (!aux->accept_eod) {
837 return MO_CONTINUE_MATCHING;
838 }
839 return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL);
840}
841
842static really_inline
843char nfaExecMcSheng16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
844 const u8 *hend, NfaCallback cb, void *context,
845 struct mq *q, char single, s64a end,
846 enum MatchMode mode) {
847 assert(n->type == MCSHENG_NFA_16);
848 const struct mcsheng *m = getImplNfa(n);
849 s64a sp;
850
851 assert(ISALIGNED_N(q->state, 2));
852 u32 s = *(u16 *)q->state;
853
854 if (q->report_current) {
855 assert(s);
856 assert(get_aux(m, s)->accept);
857
858 int rv;
859 if (single) {
860 DEBUG_PRINTF("reporting %u\n", m->arb_report);
861 rv = cb(0, q_cur_offset(q), m->arb_report, context);
862 } else {
863 u32 cached_accept_id = 0;
864 u32 cached_accept_state = 0;
865
866 rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0,
867 &cached_accept_state, &cached_accept_id);
868 }
869
870 q->report_current = 0;
871
872 if (rv == MO_HALT_MATCHING) {
873 return MO_DEAD;
874 }
875 }
876
877 sp = q_cur_loc(q);
878 q->cur++;
879
880 const u8 *cur_buf = sp < 0 ? hend : buffer;
881
882 assert(q->cur);
883 if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
884 DEBUG_PRINTF("this is as far as we go\n");
885 q->cur--;
886 q->items[q->cur].type = MQE_START;
887 q->items[q->cur].location = end;
888 *(u16 *)q->state = s;
889 return MO_ALIVE;
890 }
891
892 while (1) {
893 assert(q->cur < q->end);
894 s64a ep = q->items[q->cur].location;
895 if (mode != NO_MATCHES) {
896 ep = MIN(ep, end);
897 }
898
899 assert(ep >= sp);
900
901 s64a local_ep = ep;
902 if (sp < 0) {
903 local_ep = MIN(0, ep);
904 }
905
906 /* do main buffer region */
907 const u8 *final_look;
908 char rv = mcshengExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp,
909 offset + sp, cb, context, single,
910 &final_look, mode);
911 if (rv == MO_DEAD) {
912 *(u16 *)q->state = 0;
913 return MO_DEAD;
914 }
915 if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
916 DEBUG_PRINTF("this is as far as we go\n");
917 DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
918
919 assert(q->cur);
920 assert(final_look != cur_buf + local_ep);
921
922 q->cur--;
923 q->items[q->cur].type = MQE_START;
924 q->items[q->cur].location = final_look - cur_buf + 1; /* due to
925 * early -1 */
926 *(u16 *)q->state = s;
927 return MO_MATCHES_PENDING;
928 }
929
930 assert(rv == MO_ALIVE);
931 assert(q->cur);
932 if (mode != NO_MATCHES && q->items[q->cur].location > end) {
933 DEBUG_PRINTF("this is as far as we go\n");
934 q->cur--;
935 q->items[q->cur].type = MQE_START;
936 q->items[q->cur].location = end;
937 *(u16 *)q->state = s;
938 return MO_ALIVE;
939 }
940
941 sp = local_ep;
942
943 if (sp == 0) {
944 cur_buf = buffer;
945 }
946
947 if (sp != ep) {
948 continue;
949 }
950
951 switch (q->items[q->cur].type) {
952 case MQE_TOP:
953 assert(sp + offset || !s);
954 if (sp + offset == 0) {
955 s = m->start_anchored;
956 break;
957 }
958 s = mcshengEnableStarts(m, s);
959 break;
960 case MQE_END:
961 *(u16 *)q->state = s;
962 q->cur++;
963 return s ? MO_ALIVE : MO_DEAD;
964 default:
965 assert(!"invalid queue event");
966 }
967
968 q->cur++;
969 }
970}
971
972static really_inline
973char nfaExecMcSheng8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer,
974 const u8 *hend, NfaCallback cb, void *context,
975 struct mq *q, char single, s64a end,
976 enum MatchMode mode) {
977 assert(n->type == MCSHENG_NFA_8);
978 const struct mcsheng *m = getImplNfa(n);
979 s64a sp;
980
981 u32 s = *(u8 *)q->state;
982
983 if (q->report_current) {
984 assert(s);
985 assert(s >= m->accept_limit_8);
986
987 int rv;
988 if (single) {
989 DEBUG_PRINTF("reporting %u\n", m->arb_report);
990 rv = cb(0, q_cur_offset(q), m->arb_report, context);
991 } else {
992 u32 cached_accept_id = 0;
993 u32 cached_accept_state = 0;
994
995 rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0,
996 &cached_accept_state, &cached_accept_id);
997 }
998
999 q->report_current = 0;
1000
1001 if (rv == MO_HALT_MATCHING) {
1002 return MO_DEAD;
1003 }
1004 }
1005
1006 sp = q_cur_loc(q);
1007 q->cur++;
1008
1009 const u8 *cur_buf = sp < 0 ? hend : buffer;
1010
1011 if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) {
1012 DEBUG_PRINTF("this is as far as we go\n");
1013 q->cur--;
1014 q->items[q->cur].type = MQE_START;
1015 q->items[q->cur].location = end;
1016 *(u8 *)q->state = s;
1017 return MO_ALIVE;
1018 }
1019
1020 while (1) {
1021 DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" :
1022 q->items[q->cur].type == MQE_END ? "END" : "???",
1023 q->items[q->cur].location + offset);
1024 assert(q->cur < q->end);
1025 s64a ep = q->items[q->cur].location;
1026 if (mode != NO_MATCHES) {
1027 ep = MIN(ep, end);
1028 }
1029
1030 assert(ep >= sp);
1031
1032 s64a local_ep = ep;
1033 if (sp < 0) {
1034 local_ep = MIN(0, ep);
1035 }
1036
1037 const u8 *final_look;
1038 char rv = mcshengExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp,
1039 offset + sp, cb, context, single,
1040 &final_look, mode);
1041 if (rv == MO_HALT_MATCHING) {
1042 *(u8 *)q->state = 0;
1043 return MO_DEAD;
1044 }
1045 if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) {
1046 DEBUG_PRINTF("this is as far as we go\n");
1047 DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf);
1048
1049 assert(q->cur);
1050 assert(final_look != cur_buf + local_ep);
1051
1052 q->cur--;
1053 q->items[q->cur].type = MQE_START;
1054 q->items[q->cur].location = final_look - cur_buf + 1; /* due to
1055 * early -1 */
1056 *(u8 *)q->state = s;
1057 return MO_MATCHES_PENDING;
1058 }
1059
1060 assert(rv == MO_ALIVE);
1061 assert(q->cur);
1062 if (mode != NO_MATCHES && q->items[q->cur].location > end) {
1063 DEBUG_PRINTF("this is as far as we go\n");
1064 assert(q->cur);
1065 q->cur--;
1066 q->items[q->cur].type = MQE_START;
1067 q->items[q->cur].location = end;
1068 *(u8 *)q->state = s;
1069 return MO_ALIVE;
1070 }
1071
1072 sp = local_ep;
1073
1074 if (sp == 0) {
1075 cur_buf = buffer;
1076 }
1077
1078 if (sp != ep) {
1079 continue;
1080 }
1081
1082 switch (q->items[q->cur].type) {
1083 case MQE_TOP:
1084 assert(sp + offset || !s);
1085 if (sp + offset == 0) {
1086 s = (u8)m->start_anchored;
1087 break;
1088 }
1089 s = mcshengEnableStarts(m, s);
1090 break;
1091 case MQE_END:
1092 *(u8 *)q->state = s;
1093 q->cur++;
1094 return s ? MO_ALIVE : MO_DEAD;
1095 default:
1096 assert(!"invalid queue event");
1097 }
1098
1099 q->cur++;
1100 }
1101}
1102
1103char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end) {
1104 u64a offset = q->offset;
1105 const u8 *buffer = q->buffer;
1106 NfaCallback cb = q->cb;
1107 void *context = q->context;
1108 assert(n->type == MCSHENG_NFA_8);
1109 const struct mcsheng *m = getImplNfa(n);
1110 const u8 *hend = q->history + q->hlength;
1111
1112 return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q,
1113 m->flags & MCSHENG_FLAG_SINGLE, end,
1114 CALLBACK_OUTPUT);
1115}
1116
1117char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end) {
1118 u64a offset = q->offset;
1119 const u8 *buffer = q->buffer;
1120 NfaCallback cb = q->cb;
1121 void *context = q->context;
1122 assert(n->type == MCSHENG_NFA_16);
1123 const struct mcsheng *m = getImplNfa(n);
1124 const u8 *hend = q->history + q->hlength;
1125
1126 return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q,
1127 m->flags & MCSHENG_FLAG_SINGLE, end,
1128 CALLBACK_OUTPUT);
1129}
1130
1131char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q) {
1132 const struct mcsheng *m = getImplNfa(n);
1133 NfaCallback cb = q->cb;
1134 void *ctxt = q->context;
1135 u32 s = *(u8 *)q->state;
1136 u8 single = m->flags & MCSHENG_FLAG_SINGLE;
1137 u64a offset = q_cur_offset(q);
1138 assert(q_cur_type(q) == MQE_START);
1139 assert(s);
1140
1141 if (s >= m->accept_limit_8) {
1142 if (single) {
1143 DEBUG_PRINTF("reporting %u\n", m->arb_report);
1144 cb(0, offset, m->arb_report, ctxt);
1145 } else {
1146 u32 cached_accept_id = 0;
1147 u32 cached_accept_state = 0;
1148
1149 doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state,
1150 &cached_accept_id);
1151 }
1152 }
1153
1154 return 0;
1155}
1156
1157char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q) {
1158 const struct mcsheng *m = getImplNfa(n);
1159 NfaCallback cb = q->cb;
1160 void *ctxt = q->context;
1161 u32 s = *(u16 *)q->state;
1162 const struct mstate_aux *aux = get_aux(m, s);
1163 u8 single = m->flags & MCSHENG_FLAG_SINGLE;
1164 u64a offset = q_cur_offset(q);
1165 assert(q_cur_type(q) == MQE_START);
1166 DEBUG_PRINTF("state %u\n", s);
1167 assert(s);
1168
1169 if (aux->accept) {
1170 if (single) {
1171 DEBUG_PRINTF("reporting %u\n", m->arb_report);
1172 cb(0, offset, m->arb_report, ctxt);
1173 } else {
1174 u32 cached_accept_id = 0;
1175 u32 cached_accept_state = 0;
1176
1177 doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state,
1178 &cached_accept_id);
1179 }
1180 }
1181
1182 return 0;
1183}
1184
1185static
1186char mcshengHasAccept(const struct mcsheng *m, const struct mstate_aux *aux,
1187 ReportID report) {
1188 assert(m && aux);
1189
1190 if (!aux->accept) {
1191 return 0;
1192 }
1193
1194 const struct report_list *rl = (const struct report_list *)
1195 ((const char *)m + aux->accept - sizeof(struct NFA));
1196 assert(ISALIGNED_N(rl, 4));
1197
1198 DEBUG_PRINTF("report list has %u entries\n", rl->count);
1199
1200 for (u32 i = 0; i < rl->count; i++) {
1201 if (rl->report[i] == report) {
1202 return 1;
1203 }
1204 }
1205
1206 return 0;
1207}
1208
1209char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report,
1210 struct mq *q) {
1211 assert(n && q);
1212
1213 const struct mcsheng *m = getImplNfa(n);
1214 u8 s = *(u8 *)q->state;
1215 DEBUG_PRINTF("checking accepts for %hhu\n", s);
1216
1217 return mcshengHasAccept(m, get_aux(m, s), report);
1218}
1219
1220char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q) {
1221 assert(n && q);
1222
1223 const struct mcsheng *m = getImplNfa(n);
1224 u8 s = *(u8 *)q->state;
1225 DEBUG_PRINTF("checking accepts for %hhu\n", s);
1226
1227 return !!get_aux(m, s)->accept;
1228}
1229
1230char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report,
1231 struct mq *q) {
1232 assert(n && q);
1233
1234 const struct mcsheng *m = getImplNfa(n);
1235 u16 s = *(u16 *)q->state;
1236 DEBUG_PRINTF("checking accepts for %hu\n", s);
1237
1238 return mcshengHasAccept(m, get_aux(m, s), report);
1239}
1240
1241char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q) {
1242 assert(n && q);
1243
1244 const struct mcsheng *m = getImplNfa(n);
1245 u16 s = *(u16 *)q->state;
1246 DEBUG_PRINTF("checking accepts for %hu\n", s);
1247
1248 return !!get_aux(m, s)->accept;
1249}
1250
1251char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end) {
1252 u64a offset = q->offset;
1253 const u8 *buffer = q->buffer;
1254 NfaCallback cb = q->cb;
1255 void *context = q->context;
1256 assert(n->type == MCSHENG_NFA_8);
1257 const struct mcsheng *m = getImplNfa(n);
1258 const u8 *hend = q->history + q->hlength;
1259
1260 return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q,
1261 m->flags & MCSHENG_FLAG_SINGLE, end,
1262 STOP_AT_MATCH);
1263}
1264
1265char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end) {
1266 u64a offset = q->offset;
1267 const u8 *buffer = q->buffer;
1268 NfaCallback cb = q->cb;
1269 void *context = q->context;
1270 assert(n->type == MCSHENG_NFA_16);
1271 const struct mcsheng *m = getImplNfa(n);
1272 const u8 *hend = q->history + q->hlength;
1273
1274 return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q,
1275 m->flags & MCSHENG_FLAG_SINGLE, end,
1276 STOP_AT_MATCH);
1277}
1278
1279char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report) {
1280 u64a offset = q->offset;
1281 const u8 *buffer = q->buffer;
1282 NfaCallback cb = q->cb;
1283 void *context = q->context;
1284 assert(n->type == MCSHENG_NFA_8);
1285 const struct mcsheng *m = getImplNfa(n);
1286 const u8 *hend = q->history + q->hlength;
1287
1288 char rv = nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q,
1289 m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */,
1290 NO_MATCHES);
1291 if (rv && nfaExecMcSheng8_inAccept(n, report, q)) {
1292 return MO_MATCHES_PENDING;
1293 } else {
1294 return rv;
1295 }
1296}
1297
1298char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report) {
1299 u64a offset = q->offset;
1300 const u8 *buffer = q->buffer;
1301 NfaCallback cb = q->cb;
1302 void *context = q->context;
1303 assert(n->type == MCSHENG_NFA_16);
1304 const struct mcsheng *m = getImplNfa(n);
1305 const u8 *hend = q->history + q->hlength;
1306
1307 char rv = nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q,
1308 m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */,
1309 NO_MATCHES);
1310
1311 if (rv && nfaExecMcSheng16_inAccept(n, report, q)) {
1312 return MO_MATCHES_PENDING;
1313 } else {
1314 return rv;
1315 }
1316}
1317
1318char nfaExecMcSheng8_initCompressedState(const struct NFA *nfa, u64a offset,
1319 void *state, UNUSED u8 key) {
1320 const struct mcsheng *m = getImplNfa(nfa);
1321 u8 s = offset ? m->start_floating : m->start_anchored;
1322 if (s) {
1323 *(u8 *)state = s;
1324 return 1;
1325 }
1326 return 0;
1327}
1328
1329char nfaExecMcSheng16_initCompressedState(const struct NFA *nfa, u64a offset,
1330 void *state, UNUSED u8 key) {
1331 const struct mcsheng *m = getImplNfa(nfa);
1332 u16 s = offset ? m->start_floating : m->start_anchored;
1333 if (s) {
1334 unaligned_store_u16(state, s);
1335 return 1;
1336 }
1337 return 0;
1338}
1339
1340char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state,
1341 UNUSED const char *streamState, u64a offset,
1342 NfaCallback callback, void *context) {
1343 return mcshengCheckEOD(nfa, *(const u8 *)state, offset, callback,
1344 context);
1345}
1346
1347char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state,
1348 UNUSED const char *streamState, u64a offset,
1349 NfaCallback callback, void *context) {
1350 assert(ISALIGNED_N(state, 2));
1351 return mcshengCheckEOD(nfa, *(const u16 *)state, offset, callback,
1352 context);
1353}
1354
1355char nfaExecMcSheng8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
1356 assert(nfa->scratchStateSize == 1);
1357 *(u8 *)q->state = 0;
1358 return 0;
1359}
1360
1361char nfaExecMcSheng16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) {
1362 assert(nfa->scratchStateSize == 2);
1363 assert(ISALIGNED_N(q->state, 2));
1364 *(u16 *)q->state = 0;
1365 return 0;
1366}
1367
1368char nfaExecMcSheng8_queueCompressState(UNUSED const struct NFA *nfa,
1369 const struct mq *q, UNUSED s64a loc) {
1370 void *dest = q->streamState;
1371 const void *src = q->state;
1372 assert(nfa->scratchStateSize == 1);
1373 assert(nfa->streamStateSize == 1);
1374 *(u8 *)dest = *(const u8 *)src;
1375 return 0;
1376}
1377
1378char nfaExecMcSheng8_expandState(UNUSED const struct NFA *nfa, void *dest,
1379 const void *src, UNUSED u64a offset,
1380 UNUSED u8 key) {
1381 assert(nfa->scratchStateSize == 1);
1382 assert(nfa->streamStateSize == 1);
1383 *(u8 *)dest = *(const u8 *)src;
1384 return 0;
1385}
1386
1387char nfaExecMcSheng16_queueCompressState(UNUSED const struct NFA *nfa,
1388 const struct mq *q,
1389 UNUSED s64a loc) {
1390 void *dest = q->streamState;
1391 const void *src = q->state;
1392 assert(nfa->scratchStateSize == 2);
1393 assert(nfa->streamStateSize == 2);
1394 assert(ISALIGNED_N(src, 2));
1395 unaligned_store_u16(dest, *(const u16 *)(src));
1396 return 0;
1397}
1398
1399char nfaExecMcSheng16_expandState(UNUSED const struct NFA *nfa, void *dest,
1400 const void *src, UNUSED u64a offset,
1401 UNUSED u8 key) {
1402 assert(nfa->scratchStateSize == 2);
1403 assert(nfa->streamStateSize == 2);
1404 assert(ISALIGNED_N(dest, 2));
1405 *(u16 *)dest = unaligned_load_u16(src);
1406 return 0;
1407}
1408