| 1 | /* |
| 2 | * Copyright (c) 2016-2018, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "sheng.h" |
| 30 | |
| 31 | #include "accel.h" |
| 32 | #include "sheng_internal.h" |
| 33 | #include "nfa_api.h" |
| 34 | #include "nfa_api_queue.h" |
| 35 | #include "nfa_internal.h" |
| 36 | #include "util/bitutils.h" |
| 37 | #include "util/compare.h" |
| 38 | #include "util/join.h" |
| 39 | #include "util/simd_utils.h" |
| 40 | |
| 41 | enum MatchMode { |
| 42 | CALLBACK_OUTPUT, |
| 43 | STOP_AT_MATCH, |
| 44 | NO_MATCHES |
| 45 | }; |
| 46 | |
| 47 | static really_inline |
| 48 | const struct sheng *get_sheng(const struct NFA *n) { |
| 49 | return (const struct sheng *)getImplNfa(n); |
| 50 | } |
| 51 | |
| 52 | static really_inline |
| 53 | const struct sstate_aux *get_aux(const struct sheng *sh, u8 id) { |
| 54 | u32 offset = sh->aux_offset - sizeof(struct NFA) + |
| 55 | (id & SHENG_STATE_MASK) * sizeof(struct sstate_aux); |
| 56 | DEBUG_PRINTF("Getting aux for state %u at offset %llu\n" , |
| 57 | id & SHENG_STATE_MASK, (u64a)offset + sizeof(struct NFA)); |
| 58 | return (const struct sstate_aux *)((const char *) sh + offset); |
| 59 | } |
| 60 | |
| 61 | static really_inline |
| 62 | const union AccelAux *get_accel(const struct sheng *sh, u8 id) { |
| 63 | const struct sstate_aux *saux = get_aux(sh, id); |
| 64 | DEBUG_PRINTF("Getting accel aux at offset %u\n" , saux->accel); |
| 65 | const union AccelAux *aux = (const union AccelAux *) |
| 66 | ((const char *)sh + saux->accel - sizeof(struct NFA)); |
| 67 | return aux; |
| 68 | } |
| 69 | |
| 70 | static really_inline |
| 71 | const struct report_list *get_rl(const struct sheng *sh, |
| 72 | const struct sstate_aux *aux) { |
| 73 | DEBUG_PRINTF("Getting report list at offset %u\n" , aux->accept); |
| 74 | return (const struct report_list *) |
| 75 | ((const char *)sh + aux->accept - sizeof(struct NFA)); |
| 76 | } |
| 77 | |
| 78 | static really_inline |
| 79 | const struct report_list *get_eod_rl(const struct sheng *sh, |
| 80 | const struct sstate_aux *aux) { |
| 81 | DEBUG_PRINTF("Getting EOD report list at offset %u\n" , aux->accept); |
| 82 | return (const struct report_list *) |
| 83 | ((const char *)sh + aux->accept_eod - sizeof(struct NFA)); |
| 84 | } |
| 85 | |
| 86 | static really_inline |
| 87 | char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux, |
| 88 | ReportID report) { |
| 89 | assert(sh && aux); |
| 90 | |
| 91 | const struct report_list *rl = get_rl(sh, aux); |
| 92 | assert(ISALIGNED_N(rl, 4)); |
| 93 | |
| 94 | DEBUG_PRINTF("report list has %u entries\n" , rl->count); |
| 95 | |
| 96 | for (u32 i = 0; i < rl->count; i++) { |
| 97 | if (rl->report[i] == report) { |
| 98 | DEBUG_PRINTF("reporting %u\n" , rl->report[i]); |
| 99 | return 1; |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | return 0; |
| 104 | } |
| 105 | |
| 106 | static really_inline |
| 107 | char fireSingleReport(NfaCallback cb, void *ctxt, ReportID r, u64a loc) { |
| 108 | DEBUG_PRINTF("reporting %u\n" , r); |
| 109 | if (cb(0, loc, r, ctxt) == MO_HALT_MATCHING) { |
| 110 | return MO_HALT_MATCHING; /* termination requested */ |
| 111 | } |
| 112 | return MO_CONTINUE_MATCHING; /* continue execution */ |
| 113 | } |
| 114 | |
| 115 | static really_inline |
| 116 | char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt, |
| 117 | const u8 state, u64a loc, u8 *const cached_accept_state, |
| 118 | ReportID *const cached_accept_id, char eod) { |
| 119 | DEBUG_PRINTF("reporting matches @ %llu\n" , loc); |
| 120 | |
| 121 | if (!eod && state == *cached_accept_state) { |
| 122 | DEBUG_PRINTF("reporting %u\n" , *cached_accept_id); |
| 123 | if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { |
| 124 | return MO_HALT_MATCHING; /* termination requested */ |
| 125 | } |
| 126 | |
| 127 | return MO_CONTINUE_MATCHING; /* continue execution */ |
| 128 | } |
| 129 | const struct sstate_aux *aux = get_aux(sh, state); |
| 130 | const struct report_list *rl = eod ? get_eod_rl(sh, aux) : get_rl(sh, aux); |
| 131 | assert(ISALIGNED(rl)); |
| 132 | |
| 133 | DEBUG_PRINTF("report list has %u entries\n" , rl->count); |
| 134 | u32 count = rl->count; |
| 135 | |
| 136 | if (!eod && count == 1) { |
| 137 | *cached_accept_state = state; |
| 138 | *cached_accept_id = rl->report[0]; |
| 139 | |
| 140 | DEBUG_PRINTF("reporting %u\n" , rl->report[0]); |
| 141 | if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { |
| 142 | return MO_HALT_MATCHING; /* termination requested */ |
| 143 | } |
| 144 | |
| 145 | return MO_CONTINUE_MATCHING; /* continue execution */ |
| 146 | } |
| 147 | |
| 148 | for (u32 i = 0; i < count; i++) { |
| 149 | DEBUG_PRINTF("reporting %u\n" , rl->report[i]); |
| 150 | if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { |
| 151 | return MO_HALT_MATCHING; /* termination requested */ |
| 152 | } |
| 153 | } |
| 154 | return MO_CONTINUE_MATCHING; /* continue execution */ |
| 155 | } |
| 156 | |
| 157 | /* include Sheng function definitions */ |
| 158 | #include "sheng_defs.h" |
| 159 | |
| 160 | static really_inline |
| 161 | char runShengCb(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, |
| 162 | u8 *const cached_accept_state, ReportID *const cached_accept_id, |
| 163 | const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, |
| 164 | u8 has_accel, u8 single, const u8 **scanned, u8 *state) { |
| 165 | DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n" , |
| 166 | (u64a)(end - start), offset); |
| 167 | DEBUG_PRINTF("start: %lli end: %lli\n" , (s64a)(start - cur_buf), |
| 168 | (s64a)(end - cur_buf)); |
| 169 | DEBUG_PRINTF("can die: %u has accel: %u single: %u\n" , !!can_die, |
| 170 | !!has_accel, !!single); |
| 171 | int rv; |
| 172 | /* scan and report all matches */ |
| 173 | if (can_die) { |
| 174 | if (has_accel) { |
| 175 | rv = sheng4_coda(state, cb, ctxt, sh, cached_accept_state, |
| 176 | cached_accept_id, single, offset, cur_buf, start, |
| 177 | end, scanned); |
| 178 | } else { |
| 179 | rv = sheng4_cod(state, cb, ctxt, sh, cached_accept_state, |
| 180 | cached_accept_id, single, offset, cur_buf, start, |
| 181 | end, scanned); |
| 182 | } |
| 183 | if (rv == MO_HALT_MATCHING) { |
| 184 | return MO_DEAD; |
| 185 | } |
| 186 | rv = sheng_cod(state, cb, ctxt, sh, cached_accept_state, |
| 187 | cached_accept_id, single, offset, cur_buf, *scanned, end, |
| 188 | scanned); |
| 189 | } else { |
| 190 | if (has_accel) { |
| 191 | rv = sheng4_coa(state, cb, ctxt, sh, cached_accept_state, |
| 192 | cached_accept_id, single, offset, cur_buf, start, |
| 193 | end, scanned); |
| 194 | } else { |
| 195 | rv = sheng4_co(state, cb, ctxt, sh, cached_accept_state, |
| 196 | cached_accept_id, single, offset, cur_buf, start, |
| 197 | end, scanned); |
| 198 | } |
| 199 | if (rv == MO_HALT_MATCHING) { |
| 200 | return MO_DEAD; |
| 201 | } |
| 202 | rv = sheng_co(state, cb, ctxt, sh, cached_accept_state, |
| 203 | cached_accept_id, single, offset, cur_buf, *scanned, end, |
| 204 | scanned); |
| 205 | } |
| 206 | if (rv == MO_HALT_MATCHING) { |
| 207 | return MO_DEAD; |
| 208 | } |
| 209 | return MO_ALIVE; |
| 210 | } |
| 211 | |
| 212 | static really_inline |
| 213 | void runShengNm(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, |
| 214 | u8 *const cached_accept_state, ReportID *const cached_accept_id, |
| 215 | const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, |
| 216 | u8 has_accel, u8 single, const u8 **scanned, u8 *state) { |
| 217 | DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n" , |
| 218 | (u64a)(end - start), offset); |
| 219 | DEBUG_PRINTF("start: %lli end: %lli\n" , (s64a)(start - cur_buf), |
| 220 | (s64a)(end - cur_buf)); |
| 221 | DEBUG_PRINTF("can die: %u has accel: %u single: %u\n" , !!can_die, |
| 222 | !!has_accel, !!single); |
| 223 | /* just scan the buffer */ |
| 224 | if (can_die) { |
| 225 | if (has_accel) { |
| 226 | sheng4_nmda(state, cb, ctxt, sh, cached_accept_state, |
| 227 | cached_accept_id, single, offset, cur_buf, start, end, |
| 228 | scanned); |
| 229 | } else { |
| 230 | sheng4_nmd(state, cb, ctxt, sh, cached_accept_state, |
| 231 | cached_accept_id, single, offset, cur_buf, start, end, |
| 232 | scanned); |
| 233 | } |
| 234 | sheng_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, |
| 235 | single, offset, cur_buf, *scanned, end, scanned); |
| 236 | } else { |
| 237 | sheng4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, |
| 238 | single, offset, cur_buf, start, end, scanned); |
| 239 | sheng_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, |
| 240 | single, offset, cur_buf, *scanned, end, scanned); |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | static really_inline |
| 245 | char runShengSam(const struct sheng *sh, NfaCallback cb, void *ctxt, |
| 246 | u64a offset, u8 *const cached_accept_state, |
| 247 | ReportID *const cached_accept_id, const u8 *cur_buf, |
| 248 | const u8 *start, const u8 *end, u8 can_die, u8 has_accel, |
| 249 | u8 single, const u8 **scanned, u8 *state) { |
| 250 | DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n" , |
| 251 | (u64a)(end - start), offset); |
| 252 | DEBUG_PRINTF("start: %lli end: %lli\n" , (s64a)(start - cur_buf), |
| 253 | (s64a)(end - cur_buf)); |
| 254 | DEBUG_PRINTF("can die: %u has accel: %u single: %u\n" , !!can_die, |
| 255 | !!has_accel, !!single); |
| 256 | int rv; |
| 257 | /* scan until first match */ |
| 258 | if (can_die) { |
| 259 | if (has_accel) { |
| 260 | rv = sheng4_samda(state, cb, ctxt, sh, cached_accept_state, |
| 261 | cached_accept_id, single, offset, cur_buf, start, |
| 262 | end, scanned); |
| 263 | } else { |
| 264 | rv = sheng4_samd(state, cb, ctxt, sh, cached_accept_state, |
| 265 | cached_accept_id, single, offset, cur_buf, start, |
| 266 | end, scanned); |
| 267 | } |
| 268 | if (rv == MO_HALT_MATCHING) { |
| 269 | return MO_DEAD; |
| 270 | } |
| 271 | /* if we stopped before we expected, we found a match */ |
| 272 | if (rv == MO_MATCHES_PENDING) { |
| 273 | return MO_MATCHES_PENDING; |
| 274 | } |
| 275 | |
| 276 | rv = sheng_samd(state, cb, ctxt, sh, cached_accept_state, |
| 277 | cached_accept_id, single, offset, cur_buf, *scanned, |
| 278 | end, scanned); |
| 279 | } else { |
| 280 | if (has_accel) { |
| 281 | rv = sheng4_sama(state, cb, ctxt, sh, cached_accept_state, |
| 282 | cached_accept_id, single, offset, cur_buf, start, |
| 283 | end, scanned); |
| 284 | } else { |
| 285 | rv = sheng4_sam(state, cb, ctxt, sh, cached_accept_state, |
| 286 | cached_accept_id, single, offset, cur_buf, start, |
| 287 | end, scanned); |
| 288 | } |
| 289 | if (rv == MO_HALT_MATCHING) { |
| 290 | return MO_DEAD; |
| 291 | } |
| 292 | /* if we stopped before we expected, we found a match */ |
| 293 | if (rv == MO_MATCHES_PENDING) { |
| 294 | return MO_MATCHES_PENDING; |
| 295 | } |
| 296 | |
| 297 | rv = sheng_sam(state, cb, ctxt, sh, cached_accept_state, |
| 298 | cached_accept_id, single, offset, cur_buf, *scanned, end, |
| 299 | scanned); |
| 300 | } |
| 301 | if (rv == MO_HALT_MATCHING) { |
| 302 | return MO_DEAD; |
| 303 | } |
| 304 | /* if we stopped before we expected, we found a match */ |
| 305 | if (rv == MO_MATCHES_PENDING) { |
| 306 | return MO_MATCHES_PENDING; |
| 307 | } |
| 308 | return MO_ALIVE; |
| 309 | } |
| 310 | |
| 311 | static never_inline |
| 312 | char runSheng(const struct sheng *sh, struct mq *q, s64a b_end, |
| 313 | enum MatchMode mode) { |
| 314 | u8 state = *(u8 *)q->state; |
| 315 | u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; |
| 316 | u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; |
| 317 | u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; |
| 318 | |
| 319 | u8 cached_accept_state = 0; |
| 320 | ReportID cached_accept_id = 0; |
| 321 | |
| 322 | DEBUG_PRINTF("starting Sheng execution in state %u\n" , |
| 323 | state & SHENG_STATE_MASK); |
| 324 | |
| 325 | if (q->report_current) { |
| 326 | DEBUG_PRINTF("reporting current pending matches\n" ); |
| 327 | assert(sh); |
| 328 | |
| 329 | q->report_current = 0; |
| 330 | |
| 331 | int rv; |
| 332 | if (single) { |
| 333 | rv = fireSingleReport(q->cb, q->context, sh->report, |
| 334 | q_cur_offset(q)); |
| 335 | } else { |
| 336 | rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q), |
| 337 | &cached_accept_state, &cached_accept_id, 0); |
| 338 | } |
| 339 | if (rv == MO_HALT_MATCHING) { |
| 340 | DEBUG_PRINTF("exiting in state %u\n" , state & SHENG_STATE_MASK); |
| 341 | return MO_DEAD; |
| 342 | } |
| 343 | |
| 344 | DEBUG_PRINTF("proceeding with matching\n" ); |
| 345 | } |
| 346 | |
| 347 | assert(q_cur_type(q) == MQE_START); |
| 348 | s64a start = q_cur_loc(q); |
| 349 | |
| 350 | DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n" , q->offset, start, |
| 351 | mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" : |
| 352 | mode == NO_MATCHES ? "NO MATCHES" : |
| 353 | mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???" ); |
| 354 | |
| 355 | DEBUG_PRINTF("processing event @ %lli: %s\n" , q->offset + q_cur_loc(q), |
| 356 | q_cur_type(q) == MQE_START ? "START" : |
| 357 | q_cur_type(q) == MQE_TOP ? "TOP" : |
| 358 | q_cur_type(q) == MQE_END ? "END" : "???" ); |
| 359 | |
| 360 | const u8* cur_buf; |
| 361 | if (start < 0) { |
| 362 | DEBUG_PRINTF("negative location, scanning history\n" ); |
| 363 | DEBUG_PRINTF("min location: %zd\n" , -q->hlength); |
| 364 | cur_buf = q->history + q->hlength; |
| 365 | } else { |
| 366 | DEBUG_PRINTF("positive location, scanning buffer\n" ); |
| 367 | DEBUG_PRINTF("max location: %lli\n" , b_end); |
| 368 | cur_buf = q->buffer; |
| 369 | } |
| 370 | |
| 371 | /* if we our queue event is past our end */ |
| 372 | if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { |
| 373 | DEBUG_PRINTF("current location past buffer end\n" ); |
| 374 | DEBUG_PRINTF("setting q location to %llu\n" , b_end); |
| 375 | DEBUG_PRINTF("exiting in state %u\n" , state & SHENG_STATE_MASK); |
| 376 | q->items[q->cur].location = b_end; |
| 377 | return MO_ALIVE; |
| 378 | } |
| 379 | |
| 380 | q->cur++; |
| 381 | |
| 382 | s64a cur_start = start; |
| 383 | |
| 384 | while (1) { |
| 385 | DEBUG_PRINTF("processing event @ %lli: %s\n" , q->offset + q_cur_loc(q), |
| 386 | q_cur_type(q) == MQE_START ? "START" : |
| 387 | q_cur_type(q) == MQE_TOP ? "TOP" : |
| 388 | q_cur_type(q) == MQE_END ? "END" : "???" ); |
| 389 | s64a end = q_cur_loc(q); |
| 390 | if (mode != NO_MATCHES) { |
| 391 | end = MIN(end, b_end); |
| 392 | } |
| 393 | assert(end <= (s64a) q->length); |
| 394 | s64a cur_end = end; |
| 395 | |
| 396 | /* we may cross the border between history and current buffer */ |
| 397 | if (cur_start < 0) { |
| 398 | cur_end = MIN(0, cur_end); |
| 399 | } |
| 400 | |
| 401 | DEBUG_PRINTF("start: %lli end: %lli\n" , start, end); |
| 402 | |
| 403 | /* don't scan zero length buffer */ |
| 404 | if (cur_start != cur_end) { |
| 405 | const u8 * scanned = cur_buf; |
| 406 | char rv; |
| 407 | |
| 408 | if (mode == NO_MATCHES) { |
| 409 | runShengNm(sh, q->cb, q->context, q->offset, |
| 410 | &cached_accept_state, &cached_accept_id, cur_buf, |
| 411 | cur_buf + cur_start, cur_buf + cur_end, can_die, |
| 412 | has_accel, single, &scanned, &state); |
| 413 | } else if (mode == CALLBACK_OUTPUT) { |
| 414 | rv = runShengCb(sh, q->cb, q->context, q->offset, |
| 415 | &cached_accept_state, &cached_accept_id, |
| 416 | cur_buf, cur_buf + cur_start, cur_buf + cur_end, |
| 417 | can_die, has_accel, single, &scanned, &state); |
| 418 | if (rv == MO_DEAD) { |
| 419 | DEBUG_PRINTF("exiting in state %u\n" , |
| 420 | state & SHENG_STATE_MASK); |
| 421 | return MO_DEAD; |
| 422 | } |
| 423 | } else if (mode == STOP_AT_MATCH) { |
| 424 | rv = runShengSam(sh, q->cb, q->context, q->offset, |
| 425 | &cached_accept_state, &cached_accept_id, |
| 426 | cur_buf, cur_buf + cur_start, |
| 427 | cur_buf + cur_end, can_die, has_accel, single, |
| 428 | &scanned, &state); |
| 429 | if (rv == MO_DEAD) { |
| 430 | DEBUG_PRINTF("exiting in state %u\n" , |
| 431 | state & SHENG_STATE_MASK); |
| 432 | return rv; |
| 433 | } else if (rv == MO_MATCHES_PENDING) { |
| 434 | assert(q->cur); |
| 435 | DEBUG_PRINTF("found a match, setting q location to %zd\n" , |
| 436 | scanned - cur_buf + 1); |
| 437 | q->cur--; |
| 438 | q->items[q->cur].type = MQE_START; |
| 439 | q->items[q->cur].location = |
| 440 | scanned - cur_buf + 1; /* due to exiting early */ |
| 441 | *(u8 *)q->state = state; |
| 442 | DEBUG_PRINTF("exiting in state %u\n" , |
| 443 | state & SHENG_STATE_MASK); |
| 444 | return rv; |
| 445 | } |
| 446 | } else { |
| 447 | assert(!"invalid scanning mode!" ); |
| 448 | } |
| 449 | assert(scanned == cur_buf + cur_end); |
| 450 | |
| 451 | cur_start = cur_end; |
| 452 | } |
| 453 | |
| 454 | /* if we our queue event is past our end */ |
| 455 | if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { |
| 456 | DEBUG_PRINTF("current location past buffer end\n" ); |
| 457 | DEBUG_PRINTF("setting q location to %llu\n" , b_end); |
| 458 | DEBUG_PRINTF("exiting in state %u\n" , state & SHENG_STATE_MASK); |
| 459 | q->cur--; |
| 460 | q->items[q->cur].type = MQE_START; |
| 461 | q->items[q->cur].location = b_end; |
| 462 | *(u8 *)q->state = state; |
| 463 | return MO_ALIVE; |
| 464 | } |
| 465 | |
| 466 | /* crossing over into actual buffer */ |
| 467 | if (cur_start == 0) { |
| 468 | DEBUG_PRINTF("positive location, scanning buffer\n" ); |
| 469 | DEBUG_PRINTF("max offset: %lli\n" , b_end); |
| 470 | cur_buf = q->buffer; |
| 471 | } |
| 472 | |
| 473 | /* continue scanning the same buffer */ |
| 474 | if (end != cur_end) { |
| 475 | continue; |
| 476 | } |
| 477 | |
| 478 | switch (q_cur_type(q)) { |
| 479 | case MQE_END: |
| 480 | *(u8 *)q->state = state; |
| 481 | q->cur++; |
| 482 | DEBUG_PRINTF("exiting in state %u\n" , state & SHENG_STATE_MASK); |
| 483 | if (can_die) { |
| 484 | return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE; |
| 485 | } |
| 486 | return MO_ALIVE; |
| 487 | case MQE_TOP: |
| 488 | if (q->offset + cur_start == 0) { |
| 489 | DEBUG_PRINTF("Anchored start, going to state %u\n" , |
| 490 | sh->anchored); |
| 491 | state = sh->anchored; |
| 492 | } else { |
| 493 | u8 new_state = get_aux(sh, state)->top; |
| 494 | DEBUG_PRINTF("Top event %u->%u\n" , state & SHENG_STATE_MASK, |
| 495 | new_state & SHENG_STATE_MASK); |
| 496 | state = new_state; |
| 497 | } |
| 498 | break; |
| 499 | default: |
| 500 | assert(!"invalid queue event" ); |
| 501 | break; |
| 502 | } |
| 503 | q->cur++; |
| 504 | } |
| 505 | } |
| 506 | |
| 507 | char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, |
| 508 | size_t length, NfaCallback cb, void *context) { |
| 509 | DEBUG_PRINTF("smallwrite Sheng\n" ); |
| 510 | assert(n->type == SHENG_NFA); |
| 511 | const struct sheng *sh = getImplNfa(n); |
| 512 | u8 state = sh->anchored; |
| 513 | u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; |
| 514 | u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; |
| 515 | u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; |
| 516 | u8 cached_accept_state = 0; |
| 517 | ReportID cached_accept_id = 0; |
| 518 | |
| 519 | /* scan and report all matches */ |
| 520 | int rv; |
| 521 | s64a end = length; |
| 522 | const u8 *scanned; |
| 523 | |
| 524 | rv = runShengCb(sh, cb, context, offset, &cached_accept_state, |
| 525 | &cached_accept_id, buffer, buffer, buffer + end, can_die, |
| 526 | has_accel, single, &scanned, &state); |
| 527 | if (rv == MO_DEAD) { |
| 528 | DEBUG_PRINTF("exiting in state %u\n" , |
| 529 | state & SHENG_STATE_MASK); |
| 530 | return MO_DEAD; |
| 531 | } |
| 532 | |
| 533 | DEBUG_PRINTF("%u\n" , state & SHENG_STATE_MASK); |
| 534 | |
| 535 | const struct sstate_aux *aux = get_aux(sh, state); |
| 536 | |
| 537 | if (aux->accept_eod) { |
| 538 | DEBUG_PRINTF("Reporting EOD matches\n" ); |
| 539 | fireReports(sh, cb, context, state, end + offset, &cached_accept_state, |
| 540 | &cached_accept_id, 1); |
| 541 | } |
| 542 | |
| 543 | return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE; |
| 544 | } |
| 545 | |
| 546 | char nfaExecSheng_Q(const struct NFA *n, struct mq *q, s64a end) { |
| 547 | const struct sheng *sh = get_sheng(n); |
| 548 | char rv = runSheng(sh, q, end, CALLBACK_OUTPUT); |
| 549 | return rv; |
| 550 | } |
| 551 | |
| 552 | char nfaExecSheng_Q2(const struct NFA *n, struct mq *q, s64a end) { |
| 553 | const struct sheng *sh = get_sheng(n); |
| 554 | char rv = runSheng(sh, q, end, STOP_AT_MATCH); |
| 555 | return rv; |
| 556 | } |
| 557 | |
| 558 | char nfaExecSheng_QR(const struct NFA *n, struct mq *q, ReportID report) { |
| 559 | assert(q_cur_type(q) == MQE_START); |
| 560 | |
| 561 | const struct sheng *sh = get_sheng(n); |
| 562 | char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES); |
| 563 | |
| 564 | if (rv && nfaExecSheng_inAccept(n, report, q)) { |
| 565 | return MO_MATCHES_PENDING; |
| 566 | } |
| 567 | return rv; |
| 568 | } |
| 569 | |
| 570 | char nfaExecSheng_inAccept(const struct NFA *n, ReportID report, struct mq *q) { |
| 571 | assert(n && q); |
| 572 | |
| 573 | const struct sheng *sh = get_sheng(n); |
| 574 | u8 s = *(const u8 *)q->state; |
| 575 | DEBUG_PRINTF("checking accepts for %u\n" , (u8)(s & SHENG_STATE_MASK)); |
| 576 | |
| 577 | const struct sstate_aux *aux = get_aux(sh, s); |
| 578 | |
| 579 | if (!aux->accept) { |
| 580 | return 0; |
| 581 | } |
| 582 | |
| 583 | return shengHasAccept(sh, aux, report); |
| 584 | } |
| 585 | |
| 586 | char nfaExecSheng_inAnyAccept(const struct NFA *n, struct mq *q) { |
| 587 | assert(n && q); |
| 588 | |
| 589 | const struct sheng *sh = get_sheng(n); |
| 590 | u8 s = *(const u8 *)q->state; |
| 591 | DEBUG_PRINTF("checking accepts for %u\n" , (u8)(s & SHENG_STATE_MASK)); |
| 592 | |
| 593 | const struct sstate_aux *aux = get_aux(sh, s); |
| 594 | return !!aux->accept; |
| 595 | } |
| 596 | |
| 597 | char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state, |
| 598 | UNUSED const char *streamState, u64a offset, |
| 599 | NfaCallback cb, void *ctxt) { |
| 600 | assert(nfa); |
| 601 | |
| 602 | const struct sheng *sh = get_sheng(nfa); |
| 603 | u8 s = *(const u8 *)state; |
| 604 | DEBUG_PRINTF("checking EOD accepts for %u\n" , (u8)(s & SHENG_STATE_MASK)); |
| 605 | |
| 606 | const struct sstate_aux *aux = get_aux(sh, s); |
| 607 | |
| 608 | if (!aux->accept_eod) { |
| 609 | return MO_CONTINUE_MATCHING; |
| 610 | } |
| 611 | |
| 612 | return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1); |
| 613 | } |
| 614 | |
| 615 | char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) { |
| 616 | const struct sheng *sh = (const struct sheng *)getImplNfa(n); |
| 617 | NfaCallback cb = q->cb; |
| 618 | void *ctxt = q->context; |
| 619 | u8 s = *(u8 *)q->state; |
| 620 | const struct sstate_aux *aux = get_aux(sh, s); |
| 621 | u64a offset = q_cur_offset(q); |
| 622 | u8 cached_state_id = 0; |
| 623 | ReportID cached_report_id = 0; |
| 624 | assert(q_cur_type(q) == MQE_START); |
| 625 | |
| 626 | if (aux->accept) { |
| 627 | if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { |
| 628 | fireSingleReport(cb, ctxt, sh->report, offset); |
| 629 | } else { |
| 630 | fireReports(sh, cb, ctxt, s, offset, &cached_state_id, |
| 631 | &cached_report_id, 0); |
| 632 | } |
| 633 | } |
| 634 | |
| 635 | return 0; |
| 636 | } |
| 637 | |
| 638 | char nfaExecSheng_initCompressedState(const struct NFA *nfa, u64a offset, |
| 639 | void *state, UNUSED u8 key) { |
| 640 | const struct sheng *sh = get_sheng(nfa); |
| 641 | u8 *s = (u8 *)state; |
| 642 | *s = offset ? sh->floating: sh->anchored; |
| 643 | return !(*s & SHENG_STATE_DEAD); |
| 644 | } |
| 645 | |
| 646 | char nfaExecSheng_queueInitState(const struct NFA *nfa, struct mq *q) { |
| 647 | assert(nfa->scratchStateSize == 1); |
| 648 | |
| 649 | /* starting in floating state */ |
| 650 | const struct sheng *sh = get_sheng(nfa); |
| 651 | *(u8 *)q->state = sh->floating; |
| 652 | DEBUG_PRINTF("starting in floating state\n" ); |
| 653 | return 0; |
| 654 | } |
| 655 | |
| 656 | char nfaExecSheng_queueCompressState(UNUSED const struct NFA *nfa, |
| 657 | const struct mq *q, UNUSED s64a loc) { |
| 658 | void *dest = q->streamState; |
| 659 | const void *src = q->state; |
| 660 | assert(nfa->scratchStateSize == 1); |
| 661 | assert(nfa->streamStateSize == 1); |
| 662 | *(u8 *)dest = *(const u8 *)src; |
| 663 | return 0; |
| 664 | } |
| 665 | |
| 666 | char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest, |
| 667 | const void *src, UNUSED u64a offset, |
| 668 | UNUSED u8 key) { |
| 669 | assert(nfa->scratchStateSize == 1); |
| 670 | assert(nfa->streamStateSize == 1); |
| 671 | *(u8 *)dest = *(const u8 *)src; |
| 672 | return 0; |
| 673 | } |
| 674 | |