1 | /* |
2 | * Copyright (c) 2016-2018, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "sheng.h" |
30 | |
31 | #include "accel.h" |
32 | #include "sheng_internal.h" |
33 | #include "nfa_api.h" |
34 | #include "nfa_api_queue.h" |
35 | #include "nfa_internal.h" |
36 | #include "util/bitutils.h" |
37 | #include "util/compare.h" |
38 | #include "util/join.h" |
39 | #include "util/simd_utils.h" |
40 | |
41 | enum MatchMode { |
42 | CALLBACK_OUTPUT, |
43 | STOP_AT_MATCH, |
44 | NO_MATCHES |
45 | }; |
46 | |
47 | static really_inline |
48 | const struct sheng *get_sheng(const struct NFA *n) { |
49 | return (const struct sheng *)getImplNfa(n); |
50 | } |
51 | |
52 | static really_inline |
53 | const struct sstate_aux *get_aux(const struct sheng *sh, u8 id) { |
54 | u32 offset = sh->aux_offset - sizeof(struct NFA) + |
55 | (id & SHENG_STATE_MASK) * sizeof(struct sstate_aux); |
56 | DEBUG_PRINTF("Getting aux for state %u at offset %llu\n" , |
57 | id & SHENG_STATE_MASK, (u64a)offset + sizeof(struct NFA)); |
58 | return (const struct sstate_aux *)((const char *) sh + offset); |
59 | } |
60 | |
61 | static really_inline |
62 | const union AccelAux *get_accel(const struct sheng *sh, u8 id) { |
63 | const struct sstate_aux *saux = get_aux(sh, id); |
64 | DEBUG_PRINTF("Getting accel aux at offset %u\n" , saux->accel); |
65 | const union AccelAux *aux = (const union AccelAux *) |
66 | ((const char *)sh + saux->accel - sizeof(struct NFA)); |
67 | return aux; |
68 | } |
69 | |
70 | static really_inline |
71 | const struct report_list *get_rl(const struct sheng *sh, |
72 | const struct sstate_aux *aux) { |
73 | DEBUG_PRINTF("Getting report list at offset %u\n" , aux->accept); |
74 | return (const struct report_list *) |
75 | ((const char *)sh + aux->accept - sizeof(struct NFA)); |
76 | } |
77 | |
78 | static really_inline |
79 | const struct report_list *get_eod_rl(const struct sheng *sh, |
80 | const struct sstate_aux *aux) { |
81 | DEBUG_PRINTF("Getting EOD report list at offset %u\n" , aux->accept); |
82 | return (const struct report_list *) |
83 | ((const char *)sh + aux->accept_eod - sizeof(struct NFA)); |
84 | } |
85 | |
86 | static really_inline |
87 | char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux, |
88 | ReportID report) { |
89 | assert(sh && aux); |
90 | |
91 | const struct report_list *rl = get_rl(sh, aux); |
92 | assert(ISALIGNED_N(rl, 4)); |
93 | |
94 | DEBUG_PRINTF("report list has %u entries\n" , rl->count); |
95 | |
96 | for (u32 i = 0; i < rl->count; i++) { |
97 | if (rl->report[i] == report) { |
98 | DEBUG_PRINTF("reporting %u\n" , rl->report[i]); |
99 | return 1; |
100 | } |
101 | } |
102 | |
103 | return 0; |
104 | } |
105 | |
106 | static really_inline |
107 | char fireSingleReport(NfaCallback cb, void *ctxt, ReportID r, u64a loc) { |
108 | DEBUG_PRINTF("reporting %u\n" , r); |
109 | if (cb(0, loc, r, ctxt) == MO_HALT_MATCHING) { |
110 | return MO_HALT_MATCHING; /* termination requested */ |
111 | } |
112 | return MO_CONTINUE_MATCHING; /* continue execution */ |
113 | } |
114 | |
115 | static really_inline |
116 | char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt, |
117 | const u8 state, u64a loc, u8 *const cached_accept_state, |
118 | ReportID *const cached_accept_id, char eod) { |
119 | DEBUG_PRINTF("reporting matches @ %llu\n" , loc); |
120 | |
121 | if (!eod && state == *cached_accept_state) { |
122 | DEBUG_PRINTF("reporting %u\n" , *cached_accept_id); |
123 | if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { |
124 | return MO_HALT_MATCHING; /* termination requested */ |
125 | } |
126 | |
127 | return MO_CONTINUE_MATCHING; /* continue execution */ |
128 | } |
129 | const struct sstate_aux *aux = get_aux(sh, state); |
130 | const struct report_list *rl = eod ? get_eod_rl(sh, aux) : get_rl(sh, aux); |
131 | assert(ISALIGNED(rl)); |
132 | |
133 | DEBUG_PRINTF("report list has %u entries\n" , rl->count); |
134 | u32 count = rl->count; |
135 | |
136 | if (!eod && count == 1) { |
137 | *cached_accept_state = state; |
138 | *cached_accept_id = rl->report[0]; |
139 | |
140 | DEBUG_PRINTF("reporting %u\n" , rl->report[0]); |
141 | if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { |
142 | return MO_HALT_MATCHING; /* termination requested */ |
143 | } |
144 | |
145 | return MO_CONTINUE_MATCHING; /* continue execution */ |
146 | } |
147 | |
148 | for (u32 i = 0; i < count; i++) { |
149 | DEBUG_PRINTF("reporting %u\n" , rl->report[i]); |
150 | if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { |
151 | return MO_HALT_MATCHING; /* termination requested */ |
152 | } |
153 | } |
154 | return MO_CONTINUE_MATCHING; /* continue execution */ |
155 | } |
156 | |
157 | /* include Sheng function definitions */ |
158 | #include "sheng_defs.h" |
159 | |
160 | static really_inline |
161 | char runShengCb(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, |
162 | u8 *const cached_accept_state, ReportID *const cached_accept_id, |
163 | const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, |
164 | u8 has_accel, u8 single, const u8 **scanned, u8 *state) { |
165 | DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n" , |
166 | (u64a)(end - start), offset); |
167 | DEBUG_PRINTF("start: %lli end: %lli\n" , (s64a)(start - cur_buf), |
168 | (s64a)(end - cur_buf)); |
169 | DEBUG_PRINTF("can die: %u has accel: %u single: %u\n" , !!can_die, |
170 | !!has_accel, !!single); |
171 | int rv; |
172 | /* scan and report all matches */ |
173 | if (can_die) { |
174 | if (has_accel) { |
175 | rv = sheng4_coda(state, cb, ctxt, sh, cached_accept_state, |
176 | cached_accept_id, single, offset, cur_buf, start, |
177 | end, scanned); |
178 | } else { |
179 | rv = sheng4_cod(state, cb, ctxt, sh, cached_accept_state, |
180 | cached_accept_id, single, offset, cur_buf, start, |
181 | end, scanned); |
182 | } |
183 | if (rv == MO_HALT_MATCHING) { |
184 | return MO_DEAD; |
185 | } |
186 | rv = sheng_cod(state, cb, ctxt, sh, cached_accept_state, |
187 | cached_accept_id, single, offset, cur_buf, *scanned, end, |
188 | scanned); |
189 | } else { |
190 | if (has_accel) { |
191 | rv = sheng4_coa(state, cb, ctxt, sh, cached_accept_state, |
192 | cached_accept_id, single, offset, cur_buf, start, |
193 | end, scanned); |
194 | } else { |
195 | rv = sheng4_co(state, cb, ctxt, sh, cached_accept_state, |
196 | cached_accept_id, single, offset, cur_buf, start, |
197 | end, scanned); |
198 | } |
199 | if (rv == MO_HALT_MATCHING) { |
200 | return MO_DEAD; |
201 | } |
202 | rv = sheng_co(state, cb, ctxt, sh, cached_accept_state, |
203 | cached_accept_id, single, offset, cur_buf, *scanned, end, |
204 | scanned); |
205 | } |
206 | if (rv == MO_HALT_MATCHING) { |
207 | return MO_DEAD; |
208 | } |
209 | return MO_ALIVE; |
210 | } |
211 | |
212 | static really_inline |
213 | void runShengNm(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, |
214 | u8 *const cached_accept_state, ReportID *const cached_accept_id, |
215 | const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, |
216 | u8 has_accel, u8 single, const u8 **scanned, u8 *state) { |
217 | DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n" , |
218 | (u64a)(end - start), offset); |
219 | DEBUG_PRINTF("start: %lli end: %lli\n" , (s64a)(start - cur_buf), |
220 | (s64a)(end - cur_buf)); |
221 | DEBUG_PRINTF("can die: %u has accel: %u single: %u\n" , !!can_die, |
222 | !!has_accel, !!single); |
223 | /* just scan the buffer */ |
224 | if (can_die) { |
225 | if (has_accel) { |
226 | sheng4_nmda(state, cb, ctxt, sh, cached_accept_state, |
227 | cached_accept_id, single, offset, cur_buf, start, end, |
228 | scanned); |
229 | } else { |
230 | sheng4_nmd(state, cb, ctxt, sh, cached_accept_state, |
231 | cached_accept_id, single, offset, cur_buf, start, end, |
232 | scanned); |
233 | } |
234 | sheng_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, |
235 | single, offset, cur_buf, *scanned, end, scanned); |
236 | } else { |
237 | sheng4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, |
238 | single, offset, cur_buf, start, end, scanned); |
239 | sheng_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, |
240 | single, offset, cur_buf, *scanned, end, scanned); |
241 | } |
242 | } |
243 | |
244 | static really_inline |
245 | char runShengSam(const struct sheng *sh, NfaCallback cb, void *ctxt, |
246 | u64a offset, u8 *const cached_accept_state, |
247 | ReportID *const cached_accept_id, const u8 *cur_buf, |
248 | const u8 *start, const u8 *end, u8 can_die, u8 has_accel, |
249 | u8 single, const u8 **scanned, u8 *state) { |
250 | DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n" , |
251 | (u64a)(end - start), offset); |
252 | DEBUG_PRINTF("start: %lli end: %lli\n" , (s64a)(start - cur_buf), |
253 | (s64a)(end - cur_buf)); |
254 | DEBUG_PRINTF("can die: %u has accel: %u single: %u\n" , !!can_die, |
255 | !!has_accel, !!single); |
256 | int rv; |
257 | /* scan until first match */ |
258 | if (can_die) { |
259 | if (has_accel) { |
260 | rv = sheng4_samda(state, cb, ctxt, sh, cached_accept_state, |
261 | cached_accept_id, single, offset, cur_buf, start, |
262 | end, scanned); |
263 | } else { |
264 | rv = sheng4_samd(state, cb, ctxt, sh, cached_accept_state, |
265 | cached_accept_id, single, offset, cur_buf, start, |
266 | end, scanned); |
267 | } |
268 | if (rv == MO_HALT_MATCHING) { |
269 | return MO_DEAD; |
270 | } |
271 | /* if we stopped before we expected, we found a match */ |
272 | if (rv == MO_MATCHES_PENDING) { |
273 | return MO_MATCHES_PENDING; |
274 | } |
275 | |
276 | rv = sheng_samd(state, cb, ctxt, sh, cached_accept_state, |
277 | cached_accept_id, single, offset, cur_buf, *scanned, |
278 | end, scanned); |
279 | } else { |
280 | if (has_accel) { |
281 | rv = sheng4_sama(state, cb, ctxt, sh, cached_accept_state, |
282 | cached_accept_id, single, offset, cur_buf, start, |
283 | end, scanned); |
284 | } else { |
285 | rv = sheng4_sam(state, cb, ctxt, sh, cached_accept_state, |
286 | cached_accept_id, single, offset, cur_buf, start, |
287 | end, scanned); |
288 | } |
289 | if (rv == MO_HALT_MATCHING) { |
290 | return MO_DEAD; |
291 | } |
292 | /* if we stopped before we expected, we found a match */ |
293 | if (rv == MO_MATCHES_PENDING) { |
294 | return MO_MATCHES_PENDING; |
295 | } |
296 | |
297 | rv = sheng_sam(state, cb, ctxt, sh, cached_accept_state, |
298 | cached_accept_id, single, offset, cur_buf, *scanned, end, |
299 | scanned); |
300 | } |
301 | if (rv == MO_HALT_MATCHING) { |
302 | return MO_DEAD; |
303 | } |
304 | /* if we stopped before we expected, we found a match */ |
305 | if (rv == MO_MATCHES_PENDING) { |
306 | return MO_MATCHES_PENDING; |
307 | } |
308 | return MO_ALIVE; |
309 | } |
310 | |
311 | static never_inline |
312 | char runSheng(const struct sheng *sh, struct mq *q, s64a b_end, |
313 | enum MatchMode mode) { |
314 | u8 state = *(u8 *)q->state; |
315 | u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; |
316 | u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; |
317 | u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; |
318 | |
319 | u8 cached_accept_state = 0; |
320 | ReportID cached_accept_id = 0; |
321 | |
322 | DEBUG_PRINTF("starting Sheng execution in state %u\n" , |
323 | state & SHENG_STATE_MASK); |
324 | |
325 | if (q->report_current) { |
326 | DEBUG_PRINTF("reporting current pending matches\n" ); |
327 | assert(sh); |
328 | |
329 | q->report_current = 0; |
330 | |
331 | int rv; |
332 | if (single) { |
333 | rv = fireSingleReport(q->cb, q->context, sh->report, |
334 | q_cur_offset(q)); |
335 | } else { |
336 | rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q), |
337 | &cached_accept_state, &cached_accept_id, 0); |
338 | } |
339 | if (rv == MO_HALT_MATCHING) { |
340 | DEBUG_PRINTF("exiting in state %u\n" , state & SHENG_STATE_MASK); |
341 | return MO_DEAD; |
342 | } |
343 | |
344 | DEBUG_PRINTF("proceeding with matching\n" ); |
345 | } |
346 | |
347 | assert(q_cur_type(q) == MQE_START); |
348 | s64a start = q_cur_loc(q); |
349 | |
350 | DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n" , q->offset, start, |
351 | mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" : |
352 | mode == NO_MATCHES ? "NO MATCHES" : |
353 | mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???" ); |
354 | |
355 | DEBUG_PRINTF("processing event @ %lli: %s\n" , q->offset + q_cur_loc(q), |
356 | q_cur_type(q) == MQE_START ? "START" : |
357 | q_cur_type(q) == MQE_TOP ? "TOP" : |
358 | q_cur_type(q) == MQE_END ? "END" : "???" ); |
359 | |
360 | const u8* cur_buf; |
361 | if (start < 0) { |
362 | DEBUG_PRINTF("negative location, scanning history\n" ); |
363 | DEBUG_PRINTF("min location: %zd\n" , -q->hlength); |
364 | cur_buf = q->history + q->hlength; |
365 | } else { |
366 | DEBUG_PRINTF("positive location, scanning buffer\n" ); |
367 | DEBUG_PRINTF("max location: %lli\n" , b_end); |
368 | cur_buf = q->buffer; |
369 | } |
370 | |
371 | /* if we our queue event is past our end */ |
372 | if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { |
373 | DEBUG_PRINTF("current location past buffer end\n" ); |
374 | DEBUG_PRINTF("setting q location to %llu\n" , b_end); |
375 | DEBUG_PRINTF("exiting in state %u\n" , state & SHENG_STATE_MASK); |
376 | q->items[q->cur].location = b_end; |
377 | return MO_ALIVE; |
378 | } |
379 | |
380 | q->cur++; |
381 | |
382 | s64a cur_start = start; |
383 | |
384 | while (1) { |
385 | DEBUG_PRINTF("processing event @ %lli: %s\n" , q->offset + q_cur_loc(q), |
386 | q_cur_type(q) == MQE_START ? "START" : |
387 | q_cur_type(q) == MQE_TOP ? "TOP" : |
388 | q_cur_type(q) == MQE_END ? "END" : "???" ); |
389 | s64a end = q_cur_loc(q); |
390 | if (mode != NO_MATCHES) { |
391 | end = MIN(end, b_end); |
392 | } |
393 | assert(end <= (s64a) q->length); |
394 | s64a cur_end = end; |
395 | |
396 | /* we may cross the border between history and current buffer */ |
397 | if (cur_start < 0) { |
398 | cur_end = MIN(0, cur_end); |
399 | } |
400 | |
401 | DEBUG_PRINTF("start: %lli end: %lli\n" , start, end); |
402 | |
403 | /* don't scan zero length buffer */ |
404 | if (cur_start != cur_end) { |
405 | const u8 * scanned = cur_buf; |
406 | char rv; |
407 | |
408 | if (mode == NO_MATCHES) { |
409 | runShengNm(sh, q->cb, q->context, q->offset, |
410 | &cached_accept_state, &cached_accept_id, cur_buf, |
411 | cur_buf + cur_start, cur_buf + cur_end, can_die, |
412 | has_accel, single, &scanned, &state); |
413 | } else if (mode == CALLBACK_OUTPUT) { |
414 | rv = runShengCb(sh, q->cb, q->context, q->offset, |
415 | &cached_accept_state, &cached_accept_id, |
416 | cur_buf, cur_buf + cur_start, cur_buf + cur_end, |
417 | can_die, has_accel, single, &scanned, &state); |
418 | if (rv == MO_DEAD) { |
419 | DEBUG_PRINTF("exiting in state %u\n" , |
420 | state & SHENG_STATE_MASK); |
421 | return MO_DEAD; |
422 | } |
423 | } else if (mode == STOP_AT_MATCH) { |
424 | rv = runShengSam(sh, q->cb, q->context, q->offset, |
425 | &cached_accept_state, &cached_accept_id, |
426 | cur_buf, cur_buf + cur_start, |
427 | cur_buf + cur_end, can_die, has_accel, single, |
428 | &scanned, &state); |
429 | if (rv == MO_DEAD) { |
430 | DEBUG_PRINTF("exiting in state %u\n" , |
431 | state & SHENG_STATE_MASK); |
432 | return rv; |
433 | } else if (rv == MO_MATCHES_PENDING) { |
434 | assert(q->cur); |
435 | DEBUG_PRINTF("found a match, setting q location to %zd\n" , |
436 | scanned - cur_buf + 1); |
437 | q->cur--; |
438 | q->items[q->cur].type = MQE_START; |
439 | q->items[q->cur].location = |
440 | scanned - cur_buf + 1; /* due to exiting early */ |
441 | *(u8 *)q->state = state; |
442 | DEBUG_PRINTF("exiting in state %u\n" , |
443 | state & SHENG_STATE_MASK); |
444 | return rv; |
445 | } |
446 | } else { |
447 | assert(!"invalid scanning mode!" ); |
448 | } |
449 | assert(scanned == cur_buf + cur_end); |
450 | |
451 | cur_start = cur_end; |
452 | } |
453 | |
454 | /* if we our queue event is past our end */ |
455 | if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { |
456 | DEBUG_PRINTF("current location past buffer end\n" ); |
457 | DEBUG_PRINTF("setting q location to %llu\n" , b_end); |
458 | DEBUG_PRINTF("exiting in state %u\n" , state & SHENG_STATE_MASK); |
459 | q->cur--; |
460 | q->items[q->cur].type = MQE_START; |
461 | q->items[q->cur].location = b_end; |
462 | *(u8 *)q->state = state; |
463 | return MO_ALIVE; |
464 | } |
465 | |
466 | /* crossing over into actual buffer */ |
467 | if (cur_start == 0) { |
468 | DEBUG_PRINTF("positive location, scanning buffer\n" ); |
469 | DEBUG_PRINTF("max offset: %lli\n" , b_end); |
470 | cur_buf = q->buffer; |
471 | } |
472 | |
473 | /* continue scanning the same buffer */ |
474 | if (end != cur_end) { |
475 | continue; |
476 | } |
477 | |
478 | switch (q_cur_type(q)) { |
479 | case MQE_END: |
480 | *(u8 *)q->state = state; |
481 | q->cur++; |
482 | DEBUG_PRINTF("exiting in state %u\n" , state & SHENG_STATE_MASK); |
483 | if (can_die) { |
484 | return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE; |
485 | } |
486 | return MO_ALIVE; |
487 | case MQE_TOP: |
488 | if (q->offset + cur_start == 0) { |
489 | DEBUG_PRINTF("Anchored start, going to state %u\n" , |
490 | sh->anchored); |
491 | state = sh->anchored; |
492 | } else { |
493 | u8 new_state = get_aux(sh, state)->top; |
494 | DEBUG_PRINTF("Top event %u->%u\n" , state & SHENG_STATE_MASK, |
495 | new_state & SHENG_STATE_MASK); |
496 | state = new_state; |
497 | } |
498 | break; |
499 | default: |
500 | assert(!"invalid queue event" ); |
501 | break; |
502 | } |
503 | q->cur++; |
504 | } |
505 | } |
506 | |
507 | char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, |
508 | size_t length, NfaCallback cb, void *context) { |
509 | DEBUG_PRINTF("smallwrite Sheng\n" ); |
510 | assert(n->type == SHENG_NFA); |
511 | const struct sheng *sh = getImplNfa(n); |
512 | u8 state = sh->anchored; |
513 | u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; |
514 | u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; |
515 | u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; |
516 | u8 cached_accept_state = 0; |
517 | ReportID cached_accept_id = 0; |
518 | |
519 | /* scan and report all matches */ |
520 | int rv; |
521 | s64a end = length; |
522 | const u8 *scanned; |
523 | |
524 | rv = runShengCb(sh, cb, context, offset, &cached_accept_state, |
525 | &cached_accept_id, buffer, buffer, buffer + end, can_die, |
526 | has_accel, single, &scanned, &state); |
527 | if (rv == MO_DEAD) { |
528 | DEBUG_PRINTF("exiting in state %u\n" , |
529 | state & SHENG_STATE_MASK); |
530 | return MO_DEAD; |
531 | } |
532 | |
533 | DEBUG_PRINTF("%u\n" , state & SHENG_STATE_MASK); |
534 | |
535 | const struct sstate_aux *aux = get_aux(sh, state); |
536 | |
537 | if (aux->accept_eod) { |
538 | DEBUG_PRINTF("Reporting EOD matches\n" ); |
539 | fireReports(sh, cb, context, state, end + offset, &cached_accept_state, |
540 | &cached_accept_id, 1); |
541 | } |
542 | |
543 | return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE; |
544 | } |
545 | |
546 | char nfaExecSheng_Q(const struct NFA *n, struct mq *q, s64a end) { |
547 | const struct sheng *sh = get_sheng(n); |
548 | char rv = runSheng(sh, q, end, CALLBACK_OUTPUT); |
549 | return rv; |
550 | } |
551 | |
552 | char nfaExecSheng_Q2(const struct NFA *n, struct mq *q, s64a end) { |
553 | const struct sheng *sh = get_sheng(n); |
554 | char rv = runSheng(sh, q, end, STOP_AT_MATCH); |
555 | return rv; |
556 | } |
557 | |
558 | char nfaExecSheng_QR(const struct NFA *n, struct mq *q, ReportID report) { |
559 | assert(q_cur_type(q) == MQE_START); |
560 | |
561 | const struct sheng *sh = get_sheng(n); |
562 | char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES); |
563 | |
564 | if (rv && nfaExecSheng_inAccept(n, report, q)) { |
565 | return MO_MATCHES_PENDING; |
566 | } |
567 | return rv; |
568 | } |
569 | |
570 | char nfaExecSheng_inAccept(const struct NFA *n, ReportID report, struct mq *q) { |
571 | assert(n && q); |
572 | |
573 | const struct sheng *sh = get_sheng(n); |
574 | u8 s = *(const u8 *)q->state; |
575 | DEBUG_PRINTF("checking accepts for %u\n" , (u8)(s & SHENG_STATE_MASK)); |
576 | |
577 | const struct sstate_aux *aux = get_aux(sh, s); |
578 | |
579 | if (!aux->accept) { |
580 | return 0; |
581 | } |
582 | |
583 | return shengHasAccept(sh, aux, report); |
584 | } |
585 | |
586 | char nfaExecSheng_inAnyAccept(const struct NFA *n, struct mq *q) { |
587 | assert(n && q); |
588 | |
589 | const struct sheng *sh = get_sheng(n); |
590 | u8 s = *(const u8 *)q->state; |
591 | DEBUG_PRINTF("checking accepts for %u\n" , (u8)(s & SHENG_STATE_MASK)); |
592 | |
593 | const struct sstate_aux *aux = get_aux(sh, s); |
594 | return !!aux->accept; |
595 | } |
596 | |
597 | char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state, |
598 | UNUSED const char *streamState, u64a offset, |
599 | NfaCallback cb, void *ctxt) { |
600 | assert(nfa); |
601 | |
602 | const struct sheng *sh = get_sheng(nfa); |
603 | u8 s = *(const u8 *)state; |
604 | DEBUG_PRINTF("checking EOD accepts for %u\n" , (u8)(s & SHENG_STATE_MASK)); |
605 | |
606 | const struct sstate_aux *aux = get_aux(sh, s); |
607 | |
608 | if (!aux->accept_eod) { |
609 | return MO_CONTINUE_MATCHING; |
610 | } |
611 | |
612 | return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1); |
613 | } |
614 | |
615 | char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) { |
616 | const struct sheng *sh = (const struct sheng *)getImplNfa(n); |
617 | NfaCallback cb = q->cb; |
618 | void *ctxt = q->context; |
619 | u8 s = *(u8 *)q->state; |
620 | const struct sstate_aux *aux = get_aux(sh, s); |
621 | u64a offset = q_cur_offset(q); |
622 | u8 cached_state_id = 0; |
623 | ReportID cached_report_id = 0; |
624 | assert(q_cur_type(q) == MQE_START); |
625 | |
626 | if (aux->accept) { |
627 | if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { |
628 | fireSingleReport(cb, ctxt, sh->report, offset); |
629 | } else { |
630 | fireReports(sh, cb, ctxt, s, offset, &cached_state_id, |
631 | &cached_report_id, 0); |
632 | } |
633 | } |
634 | |
635 | return 0; |
636 | } |
637 | |
638 | char nfaExecSheng_initCompressedState(const struct NFA *nfa, u64a offset, |
639 | void *state, UNUSED u8 key) { |
640 | const struct sheng *sh = get_sheng(nfa); |
641 | u8 *s = (u8 *)state; |
642 | *s = offset ? sh->floating: sh->anchored; |
643 | return !(*s & SHENG_STATE_DEAD); |
644 | } |
645 | |
646 | char nfaExecSheng_queueInitState(const struct NFA *nfa, struct mq *q) { |
647 | assert(nfa->scratchStateSize == 1); |
648 | |
649 | /* starting in floating state */ |
650 | const struct sheng *sh = get_sheng(nfa); |
651 | *(u8 *)q->state = sh->floating; |
652 | DEBUG_PRINTF("starting in floating state\n" ); |
653 | return 0; |
654 | } |
655 | |
656 | char nfaExecSheng_queueCompressState(UNUSED const struct NFA *nfa, |
657 | const struct mq *q, UNUSED s64a loc) { |
658 | void *dest = q->streamState; |
659 | const void *src = q->state; |
660 | assert(nfa->scratchStateSize == 1); |
661 | assert(nfa->streamStateSize == 1); |
662 | *(u8 *)dest = *(const u8 *)src; |
663 | return 0; |
664 | } |
665 | |
666 | char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest, |
667 | const void *src, UNUSED u64a offset, |
668 | UNUSED u8 key) { |
669 | assert(nfa->scratchStateSize == 1); |
670 | assert(nfa->streamStateSize == 1); |
671 | *(u8 *)dest = *(const u8 *)src; |
672 | return 0; |
673 | } |
674 | |