1/*
2 * Copyright (c) 2015-2019, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Runtime functions.
31 */
32
33#include <stdlib.h>
34#include <string.h>
35
36#include "allocator.h"
37#include "hs_compile.h" /* for HS_MODE_* flags */
38#include "hs_runtime.h"
39#include "hs_internal.h"
40#include "hwlm/hwlm.h"
41#include "nfa/mcclellan.h"
42#include "nfa/nfa_api.h"
43#include "nfa/nfa_api_util.h"
44#include "nfa/nfa_internal.h"
45#include "nfa/nfa_rev_api.h"
46#include "nfa/sheng.h"
47#include "smallwrite/smallwrite_internal.h"
48#include "rose/rose.h"
49#include "rose/runtime.h"
50#include "database.h"
51#include "report.h"
52#include "scratch.h"
53#include "som/som_runtime.h"
54#include "som/som_stream.h"
55#include "state.h"
56#include "stream_compress.h"
57#include "ue2common.h"
58#include "util/exhaust.h"
59#include "util/multibit.h"
60
61static really_inline
62void prefetch_data(const char *data, unsigned length) {
63 __builtin_prefetch(data);
64 __builtin_prefetch(data + length/2);
65 __builtin_prefetch(data + length - 24);
66}
67
68/** dummy event handler for use when user does not provide one */
69static
70int HS_CDECL null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from,
71 UNUSED unsigned long long to, UNUSED unsigned flags,
72 UNUSED void *ctxt) {
73 return 0;
74}
75
76static really_inline
77u32 getHistoryAmount(const struct RoseEngine *t, u64a offset) {
78 return MIN(t->historyRequired, offset);
79}
80
81static really_inline
82u8 *getHistory(char *state, const struct RoseEngine *t, u64a offset) {
83 return (u8 *)state + t->stateOffsets.history + t->historyRequired
84 - MIN(t->historyRequired, offset);
85}
86
87/** \brief Sanity checks for scratch space.
88 *
89 * Although more at home in scratch.c, it is located here to be closer to its
90 * callers.
91 */
92static really_inline
93char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) {
94 if (!ISALIGNED_CL(s)) {
95 DEBUG_PRINTF("bad alignment %p\n", s);
96 return 0;
97 }
98
99 if (s->magic != SCRATCH_MAGIC) {
100 DEBUG_PRINTF("bad magic 0x%x\n", s->magic);
101 return 0;
102 }
103
104 if (t->mode == HS_MODE_BLOCK && t->stateOffsets.end > s->bStateSize) {
105 DEBUG_PRINTF("bad state size\n");
106 return 0;
107 }
108
109 if (t->queueCount > s->queueCount) {
110 DEBUG_PRINTF("bad queue count\n");
111 return 0;
112 }
113
114 /* TODO: add quick rose sanity checks */
115
116 return 1;
117}
118
119static really_inline
120void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
121 char *state, match_event_handler onEvent, void *userCtx,
122 const char *data, size_t length, const u8 *history,
123 size_t hlen, u64a offset, u8 status,
124 UNUSED unsigned int flags) {
125 assert(rose);
126 s->core_info.userContext = userCtx;
127 s->core_info.userCallback = onEvent ? onEvent : null_onEvent;
128 s->core_info.rose = rose;
129 s->core_info.state = state; /* required for chained queues + evec */
130
131 s->core_info.exhaustionVector = state + rose->stateOffsets.exhausted;
132 s->core_info.status = status;
133 s->core_info.buf = (const u8 *)data;
134 s->core_info.len = length;
135 s->core_info.hbuf = history;
136 s->core_info.hlen = hlen;
137 s->core_info.buf_offset = offset;
138
139 /* and some stuff not actually in core info */
140 s->som_set_now_offset = ~0ULL;
141 s->deduper.current_report_offset = ~0ULL;
142 s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
143 s->fdr_conf = NULL;
144 s->pure = 0;
145
146 // Rose program execution (used for some report paths) depends on these
147 // values being initialised.
148 s->tctxt.lastMatchOffset = 0;
149 s->tctxt.minMatchOffset = offset;
150 s->tctxt.minNonMpvMatchOffset = offset;
151}
152
153#define STATUS_VALID_BITS \
154 (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_DELAY_DIRTY | STATUS_ERROR)
155
156/** \brief Retrieve status bitmask from stream state. */
157static really_inline
158u8 getStreamStatus(const char *state) {
159 u8 status = *(const u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS);
160 assert((status & ~STATUS_VALID_BITS) == 0);
161 return status;
162}
163
164/** \brief Store status bitmask to stream state. */
165static really_inline
166void setStreamStatus(char *state, u8 status) {
167 assert((status & ~STATUS_VALID_BITS) == 0);
168 *(u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS) = status;
169}
170
171/** \brief Initialise SOM state. Used in both block and streaming mode. */
172static really_inline
173void initSomState(const struct RoseEngine *rose, char *state) {
174 assert(rose && state);
175 const u32 somCount = rose->somLocationCount;
176 mmbit_clear((u8 *)state + rose->stateOffsets.somValid, somCount);
177 mmbit_clear((u8 *)state + rose->stateOffsets.somWritable, somCount);
178}
179
180static really_inline
181void rawBlockExec(const struct RoseEngine *rose, struct hs_scratch *scratch) {
182 assert(rose);
183 assert(scratch);
184
185 initSomState(rose, scratch->core_info.state);
186
187 DEBUG_PRINTF("blockmode scan len=%zu\n", scratch->core_info.len);
188
189 roseBlockExec(rose, scratch);
190}
191
192static really_inline
193void pureLiteralInitScratch(struct hs_scratch *scratch, u64a offset) {
194 // Some init has already been done.
195 assert(offset == scratch->core_info.buf_offset);
196
197 scratch->tctxt.lit_offset_adjust = offset + 1;
198 scratch->tctxt.lastEndOffset = offset;
199 scratch->tctxt.delayLastEndOffset = offset;
200 scratch->tctxt.filledDelayedSlots = 0;
201 scratch->al_log_sum = 0;
202}
203
204static really_inline
205void pureLiteralBlockExec(const struct RoseEngine *rose,
206 struct hs_scratch *scratch) {
207 assert(rose);
208 assert(scratch);
209
210 const struct HWLM *ftable = getFLiteralMatcher(rose);
211 initSomState(rose, scratch->core_info.state);
212 const u8 *buffer = scratch->core_info.buf;
213 size_t length = scratch->core_info.len;
214 DEBUG_PRINTF("rose engine %d\n", rose->runtimeImpl);
215
216 pureLiteralInitScratch(scratch, 0);
217 scratch->tctxt.groups = rose->initialGroups;
218
219 hwlmExec(ftable, buffer, length, 0, roseCallback, scratch,
220 rose->initialGroups & rose->floating_group_mask);
221}
222
223static really_inline
224void initOutfixQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
225 struct hs_scratch *scratch) {
226 const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
227 q->nfa = getNfaByInfo(t, info);
228 q->end = 0;
229 q->cur = 0;
230 q->state = scratch->fullState + info->fullStateOffset;
231 q->streamState = (char *)scratch->core_info.state + info->stateOffset;
232 q->offset = scratch->core_info.buf_offset;
233 q->buffer = scratch->core_info.buf;
234 q->length = scratch->core_info.len;
235 q->history = scratch->core_info.hbuf;
236 q->hlength = scratch->core_info.hlen;
237 q->cb = roseReportAdaptor;
238 q->context = scratch;
239 q->report_current = 0;
240
241 DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
242 "state=%u\n", qi, q->offset, info->fullStateOffset,
243 info->stateOffset, *(u32 *)q->state);
244}
245
246static never_inline
247void soleOutfixBlockExec(const struct RoseEngine *t,
248 struct hs_scratch *scratch) {
249 assert(t);
250 assert(scratch);
251
252 initSomState(t, scratch->core_info.state);
253 assert(t->outfixEndQueue == 1);
254 assert(!t->amatcherOffset);
255 assert(!t->ematcherOffset);
256 assert(!t->fmatcherOffset);
257
258 const struct NFA *nfa = getNfaByQueue(t, 0);
259
260 size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
261 scratch->core_info.len);
262 if (!len) {
263 return;
264 }
265
266 struct mq *q = scratch->queues;
267 initOutfixQueue(q, 0, t, scratch);
268 q->length = len; /* adjust for rev_accel */
269 nfaQueueInitState(nfa, q);
270 pushQueueAt(q, 0, MQE_START, 0);
271 pushQueueAt(q, 1, MQE_TOP, 0);
272 pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
273
274 char rv = nfaQueueExec(q->nfa, q, scratch->core_info.len);
275
276 if (rv && nfaAcceptsEod(nfa) && len == scratch->core_info.len) {
277 nfaCheckFinalState(nfa, q->state, q->streamState, q->length, q->cb,
278 scratch);
279 }
280}
281
282static rose_inline
283void runSmallWriteEngine(const struct SmallWriteEngine *smwr,
284 struct hs_scratch *scratch) {
285 assert(smwr);
286 assert(scratch);
287
288 const u8 *buffer = scratch->core_info.buf;
289 size_t length = scratch->core_info.len;
290
291 DEBUG_PRINTF("USING SMALL WRITE\n");
292
293 if (length <= smwr->start_offset) {
294 DEBUG_PRINTF("too short\n");
295 return;
296 }
297
298 const struct NFA *nfa = getSmwrNfa(smwr);
299
300 size_t local_alen = length - smwr->start_offset;
301 const u8 *local_buffer = buffer + smwr->start_offset;
302
303 assert(isDfaType(nfa->type));
304 if (nfa->type == MCCLELLAN_NFA_8) {
305 nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer,
306 local_alen, roseReportAdaptor, scratch);
307 } else if (nfa->type == MCCLELLAN_NFA_16) {
308 nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer,
309 local_alen, roseReportAdaptor, scratch);
310 } else {
311 nfaExecSheng_B(nfa, smwr->start_offset, local_buffer,
312 local_alen, roseReportAdaptor, scratch);
313 }
314}
315
316HS_PUBLIC_API
317hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
318 unsigned length, unsigned flags,
319 hs_scratch_t *scratch, match_event_handler onEvent,
320 void *userCtx) {
321 if (unlikely(!scratch || !data)) {
322 return HS_INVALID;
323 }
324
325 hs_error_t err = validDatabase(db);
326 if (unlikely(err != HS_SUCCESS)) {
327 return err;
328 }
329
330 const struct RoseEngine *rose = hs_get_bytecode(db);
331 if (unlikely(!ISALIGNED_16(rose))) {
332 return HS_INVALID;
333 }
334
335 if (unlikely(rose->mode != HS_MODE_BLOCK)) {
336 return HS_DB_MODE_ERROR;
337 }
338
339 if (unlikely(!validScratch(rose, scratch))) {
340 return HS_INVALID;
341 }
342
343 if (unlikely(markScratchInUse(scratch))) {
344 return HS_SCRATCH_IN_USE;
345 }
346
347 if (rose->minWidth > length) {
348 DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length);
349 unmarkScratchInUse(scratch);
350 return HS_SUCCESS;
351 }
352
353 prefetch_data(data, length);
354
355 /* populate core info in scratch */
356 populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data,
357 length, NULL, 0, 0, 0, flags);
358
359 clearEvec(rose, scratch->core_info.exhaustionVector);
360 if (rose->ckeyCount) {
361 scratch->core_info.logicalVector = scratch->bstate +
362 rose->stateOffsets.logicalVec;
363 scratch->core_info.combVector = scratch->bstate +
364 rose->stateOffsets.combVec;
365 scratch->tctxt.lastCombMatchOffset = 0;
366 clearLvec(rose, scratch->core_info.logicalVector,
367 scratch->core_info.combVector);
368 }
369
370 if (!length) {
371 if (rose->boundary.reportZeroEodOffset) {
372 roseRunBoundaryProgram(rose, rose->boundary.reportZeroEodOffset, 0,
373 scratch);
374 }
375 goto set_retval;
376 }
377
378 if (rose->boundary.reportZeroOffset) {
379 int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
380 0, scratch);
381 if (rv == MO_HALT_MATCHING) {
382 goto set_retval;
383 }
384 }
385
386 if (rose->minWidthExcludingBoundaries > length) {
387 DEBUG_PRINTF("minWidthExcludingBoundaries=%u > length=%u\n",
388 rose->minWidthExcludingBoundaries, length);
389 goto done_scan;
390 }
391
392 // Similarly, we may have a maximum width (for engines constructed entirely
393 // of bi-anchored patterns).
394 if (rose->maxBiAnchoredWidth != ROSE_BOUND_INF
395 && length > rose->maxBiAnchoredWidth) {
396 DEBUG_PRINTF("block len=%u longer than maxBAWidth=%u\n", length,
397 rose->maxBiAnchoredWidth);
398 goto done_scan;
399 }
400
401 // Is this a small write case?
402 if (rose->smallWriteOffset) {
403 const struct SmallWriteEngine *smwr = getSmallWrite(rose);
404 assert(smwr);
405
406 // Apply the small write engine if and only if the block (buffer) is
407 // small enough. Otherwise, we allow rose &co to deal with it.
408 if (length < smwr->largestBuffer) {
409 DEBUG_PRINTF("Attempting small write of block %u bytes long.\n",
410 length);
411 runSmallWriteEngine(smwr, scratch);
412 goto done_scan;
413 }
414 }
415
416 switch (rose->runtimeImpl) {
417 default:
418 assert(0);
419 case ROSE_RUNTIME_FULL_ROSE:
420 rawBlockExec(rose, scratch);
421 break;
422 case ROSE_RUNTIME_PURE_LITERAL:
423 pureLiteralBlockExec(rose, scratch);
424 break;
425 case ROSE_RUNTIME_SINGLE_OUTFIX:
426 soleOutfixBlockExec(rose, scratch);
427 break;
428 }
429
430done_scan:
431 if (unlikely(internal_matching_error(scratch))) {
432 unmarkScratchInUse(scratch);
433 return HS_UNKNOWN_ERROR;
434 } else if (told_to_stop_matching(scratch)) {
435 unmarkScratchInUse(scratch);
436 return HS_SCAN_TERMINATED;
437 }
438
439 if (rose->hasSom) {
440 int halt = flushStoredSomMatches(scratch, ~0ULL);
441 if (halt) {
442 unmarkScratchInUse(scratch);
443 return HS_SCAN_TERMINATED;
444 }
445 }
446
447 if (rose->boundary.reportEodOffset) {
448 roseRunBoundaryProgram(rose, rose->boundary.reportEodOffset, length,
449 scratch);
450 }
451
452set_retval:
453 if (unlikely(internal_matching_error(scratch))) {
454 unmarkScratchInUse(scratch);
455 return HS_UNKNOWN_ERROR;
456 }
457
458 if (rose->flushCombProgramOffset) {
459 if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
460 if (unlikely(internal_matching_error(scratch))) {
461 unmarkScratchInUse(scratch);
462 return HS_UNKNOWN_ERROR;
463 }
464 unmarkScratchInUse(scratch);
465 return HS_SCAN_TERMINATED;
466 }
467 }
468
469 DEBUG_PRINTF("done. told_to_stop_matching=%d\n",
470 told_to_stop_matching(scratch));
471 hs_error_t rv = told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED
472 : HS_SUCCESS;
473 unmarkScratchInUse(scratch);
474 return rv;
475}
476
477static really_inline
478void maintainHistoryBuffer(const struct RoseEngine *rose, char *state,
479 const char *buffer, size_t length) {
480 if (!rose->historyRequired) {
481 return;
482 }
483
484 // Hopefully few of our users are scanning no data.
485 if (unlikely(length == 0)) {
486 DEBUG_PRINTF("zero-byte scan\n");
487 return;
488 }
489
490 char *his_state = state + rose->stateOffsets.history;
491
492 if (length < rose->historyRequired) {
493 size_t shortfall = rose->historyRequired - length;
494 memmove(his_state, his_state + rose->historyRequired - shortfall,
495 shortfall);
496 }
497 size_t amount = MIN(rose->historyRequired, length);
498
499 memcpy(his_state + rose->historyRequired - amount, buffer + length - amount,
500 amount);
501#ifdef DEBUG_HISTORY
502 printf("History [%u] : ", rose->historyRequired);
503 for (size_t i = 0; i < rose->historyRequired; i++) {
504 printf(" %02hhx", his_state[i]);
505 }
506 printf("\n");
507#endif
508}
509
510static really_inline
511void init_stream(struct hs_stream *s, const struct RoseEngine *rose,
512 char init_history) {
513 char *state = getMultiState(s);
514
515 if (init_history) {
516 // Make absolutely sure that the 16 bytes leading up to the end of the
517 // history buffer are initialised, as we rely on this (regardless of the
518 // actual values used) in FDR.
519 char *hist_end =
520 state + rose->stateOffsets.history + rose->historyRequired;
521 assert(hist_end - 16 >= (const char *)s);
522 memset(hist_end - 16, 0x5a, 16);
523 }
524
525 s->rose = rose;
526 s->offset = 0;
527
528 setStreamStatus(state, 0);
529 roseInitState(rose, state);
530
531 clearEvec(rose, state + rose->stateOffsets.exhausted);
532 if (rose->ckeyCount) {
533 clearLvec(rose, state + rose->stateOffsets.logicalVec,
534 state + rose->stateOffsets.combVec);
535 }
536
537 // SOM state multibit structures.
538 initSomState(rose, state);
539}
540
541HS_PUBLIC_API
542hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db,
543 UNUSED unsigned flags,
544 hs_stream_t **stream) {
545 if (unlikely(!stream)) {
546 return HS_INVALID;
547 }
548
549 *stream = NULL;
550
551 hs_error_t err = validDatabase(db);
552 if (unlikely(err != HS_SUCCESS)) {
553 return err;
554 }
555
556 const struct RoseEngine *rose = hs_get_bytecode(db);
557 if (unlikely(!ISALIGNED_16(rose))) {
558 return HS_INVALID;
559 }
560
561 if (unlikely(rose->mode != HS_MODE_STREAM)) {
562 return HS_DB_MODE_ERROR;
563 }
564
565 size_t stateSize = rose->stateOffsets.end;
566 struct hs_stream *s = hs_stream_alloc(sizeof(struct hs_stream) + stateSize);
567 if (unlikely(!s)) {
568 return HS_NOMEM;
569 }
570
571 init_stream(s, rose, 1);
572
573 *stream = s;
574 return HS_SUCCESS;
575}
576
577
578static really_inline
579void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
580 const struct RoseEngine *rose = id->rose;
581
582 if (can_stop_matching(scratch)) {
583 DEBUG_PRINTF("stream already broken\n");
584 return;
585 }
586
587 if (isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
588 DEBUG_PRINTF("stream exhausted\n");
589 return;
590 }
591
592 roseStreamEodExec(rose, id->offset, scratch);
593}
594
595static never_inline
596void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
597 const struct RoseEngine *t = id->rose;
598
599 if (can_stop_matching(scratch)) {
600 DEBUG_PRINTF("stream already broken\n");
601 return;
602 }
603
604 if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
605 DEBUG_PRINTF("stream exhausted\n");
606 return;
607 }
608
609 assert(t->outfixEndQueue == 1);
610 assert(!t->amatcherOffset);
611 assert(!t->ematcherOffset);
612 assert(!t->fmatcherOffset);
613
614 const struct NFA *nfa = getNfaByQueue(t, 0);
615
616 struct mq *q = scratch->queues;
617 initOutfixQueue(q, 0, t, scratch);
618 if (!scratch->core_info.buf_offset) {
619 DEBUG_PRINTF("buf_offset is zero\n");
620 return; /* no vacuous engines */
621 }
622
623 nfaExpandState(nfa, q->state, q->streamState, q->offset,
624 queue_prev_byte(q, 0));
625
626 assert(nfaAcceptsEod(nfa));
627 nfaCheckFinalState(nfa, q->state, q->streamState, q->offset, q->cb,
628 scratch);
629}
630
631static really_inline
632void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
633 match_event_handler onEvent, void *context) {
634 DEBUG_PRINTF("--- report eod matches at offset %llu\n", id->offset);
635 assert(onEvent);
636
637 const struct RoseEngine *rose = id->rose;
638 char *state = getMultiState(id);
639 u8 status = getStreamStatus(state);
640
641 if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_ERROR)) {
642 DEBUG_PRINTF("stream is broken, just freeing storage\n");
643 return;
644 }
645
646 populateCoreInfo(scratch, rose, state, onEvent, context, NULL, 0,
647 getHistory(state, rose, id->offset),
648 getHistoryAmount(rose, id->offset), id->offset, status, 0);
649
650 if (rose->ckeyCount) {
651 scratch->core_info.logicalVector = state +
652 rose->stateOffsets.logicalVec;
653 scratch->core_info.combVector = state + rose->stateOffsets.combVec;
654 scratch->tctxt.lastCombMatchOffset = id->offset;
655 }
656
657 if (rose->somLocationCount) {
658 loadSomFromStream(scratch, id->offset);
659 }
660
661 if (!id->offset) {
662 if (rose->boundary.reportZeroEodOffset) {
663 int rv = roseRunBoundaryProgram(
664 rose, rose->boundary.reportZeroEodOffset, 0, scratch);
665 if (rv == MO_HALT_MATCHING) {
666 return;
667 }
668 }
669 } else {
670 if (rose->boundary.reportEodOffset) {
671 int rv = roseRunBoundaryProgram(
672 rose, rose->boundary.reportEodOffset, id->offset, scratch);
673 if (rv == MO_HALT_MATCHING) {
674 return;
675 }
676 }
677
678 if (rose->requiresEodCheck) {
679 switch (rose->runtimeImpl) {
680 default:
681 case ROSE_RUNTIME_PURE_LITERAL:
682 assert(0);
683 case ROSE_RUNTIME_FULL_ROSE:
684 rawEodExec(id, scratch);
685 break;
686 case ROSE_RUNTIME_SINGLE_OUTFIX:
687 soleOutfixEodExec(id, scratch);
688 break;
689 }
690 }
691 }
692
693 if (rose->hasSom && !told_to_stop_matching(scratch)) {
694 int halt = flushStoredSomMatches(scratch, ~0ULL);
695 if (halt) {
696 DEBUG_PRINTF("told to stop matching\n");
697 scratch->core_info.status |= STATUS_TERMINATED;
698 }
699 }
700
701 if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
702 if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
703 DEBUG_PRINTF("told to stop matching\n");
704 scratch->core_info.status |= STATUS_TERMINATED;
705 }
706 }
707}
708
709HS_PUBLIC_API
710hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id,
711 const hs_stream_t *from_id) {
712 if (!to_id) {
713 return HS_INVALID;
714 }
715
716 *to_id = NULL;
717
718 if (!from_id || !from_id->rose) {
719 return HS_INVALID;
720 }
721
722 const struct RoseEngine *rose = from_id->rose;
723 size_t stateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
724
725 struct hs_stream *s = hs_stream_alloc(stateSize);
726 if (!s) {
727 return HS_NOMEM;
728 }
729
730 memcpy(s, from_id, stateSize);
731
732 *to_id = s;
733
734 return HS_SUCCESS;
735}
736
737HS_PUBLIC_API
738hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id,
739 const hs_stream_t *from_id,
740 hs_scratch_t *scratch,
741 match_event_handler onEvent,
742 void *context) {
743 if (!from_id || !from_id->rose) {
744 return HS_INVALID;
745 }
746
747 if (!to_id || to_id->rose != from_id->rose) {
748 return HS_INVALID;
749 }
750
751 if (to_id == from_id) {
752 return HS_INVALID;
753 }
754
755 if (onEvent) {
756 if (!scratch || !validScratch(to_id->rose, scratch)) {
757 return HS_INVALID;
758 }
759 if (unlikely(markScratchInUse(scratch))) {
760 return HS_SCRATCH_IN_USE;
761 }
762 report_eod_matches(to_id, scratch, onEvent, context);
763 if (unlikely(internal_matching_error(scratch))) {
764 unmarkScratchInUse(scratch);
765 return HS_UNKNOWN_ERROR;
766 }
767 unmarkScratchInUse(scratch);
768 }
769
770 size_t stateSize
771 = sizeof(struct hs_stream) + from_id->rose->stateOffsets.end;
772
773 memcpy(to_id, from_id, stateSize);
774
775 return HS_SUCCESS;
776}
777
778static really_inline
779void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) {
780 assert(stream_state);
781 assert(scratch);
782 assert(!can_stop_matching(scratch));
783
784 DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
785 stream_state->offset, scratch->core_info.len);
786
787 const struct RoseEngine *rose = stream_state->rose;
788 assert(rose);
789 roseStreamExec(rose, scratch);
790
791 if (!told_to_stop_matching(scratch) &&
792 isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
793 DEBUG_PRINTF("stream exhausted\n");
794 scratch->core_info.status |= STATUS_EXHAUSTED;
795 }
796}
797
798static really_inline
799void pureLiteralStreamExec(struct hs_stream *stream_state,
800 struct hs_scratch *scratch) {
801 assert(stream_state);
802 assert(scratch);
803 assert(!can_stop_matching(scratch));
804
805 const struct RoseEngine *rose = stream_state->rose;
806 const struct HWLM *ftable = getFLiteralMatcher(rose);
807
808 size_t len2 = scratch->core_info.len;
809
810 DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
811 stream_state->offset, scratch->core_info.len);
812
813 pureLiteralInitScratch(scratch, stream_state->offset);
814 scratch->tctxt.groups = loadGroups(rose, scratch->core_info.state);
815
816 // Pure literal cases don't have floatingMinDistance set, so we always
817 // start the match region at zero.
818 const size_t start = 0;
819
820 hwlmExecStreaming(ftable, len2, start, roseCallback, scratch,
821 rose->initialGroups & rose->floating_group_mask);
822
823 if (!told_to_stop_matching(scratch) &&
824 isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
825 DEBUG_PRINTF("stream exhausted\n");
826 scratch->core_info.status |= STATUS_EXHAUSTED;
827 }
828}
829
830static never_inline
831void soleOutfixStreamExec(struct hs_stream *stream_state,
832 struct hs_scratch *scratch) {
833 assert(stream_state);
834 assert(scratch);
835 assert(!can_stop_matching(scratch));
836
837 const struct RoseEngine *t = stream_state->rose;
838 assert(t->outfixEndQueue == 1);
839 assert(!t->amatcherOffset);
840 assert(!t->ematcherOffset);
841 assert(!t->fmatcherOffset);
842
843 const struct NFA *nfa = getNfaByQueue(t, 0);
844
845 struct mq *q = scratch->queues;
846 initOutfixQueue(q, 0, t, scratch);
847 if (!scratch->core_info.buf_offset) {
848 nfaQueueInitState(nfa, q);
849 pushQueueAt(q, 0, MQE_START, 0);
850 pushQueueAt(q, 1, MQE_TOP, 0);
851 pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
852 } else {
853 nfaExpandState(nfa, q->state, q->streamState, q->offset,
854 queue_prev_byte(q, 0));
855 pushQueueAt(q, 0, MQE_START, 0);
856 pushQueueAt(q, 1, MQE_END, scratch->core_info.len);
857 }
858
859 if (nfaQueueExec(q->nfa, q, scratch->core_info.len)) {
860 nfaQueueCompressState(nfa, q, scratch->core_info.len);
861 } else if (!told_to_stop_matching(scratch)) {
862 scratch->core_info.status |= STATUS_EXHAUSTED;
863 }
864}
865
866static inline
867hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
868 unsigned length, UNUSED unsigned flags,
869 hs_scratch_t *scratch,
870 match_event_handler onEvent, void *context) {
871 assert(id);
872 assert(scratch);
873
874 if (unlikely(!data)) {
875 return HS_INVALID;
876 }
877
878 const struct RoseEngine *rose = id->rose;
879 char *state = getMultiState(id);
880
881 u8 status = getStreamStatus(state);
882 if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_ERROR)) {
883 DEBUG_PRINTF("stream is broken, halting scan\n");
884 if (status & STATUS_ERROR) {
885 return HS_UNKNOWN_ERROR;
886 } else if (status & STATUS_TERMINATED) {
887 return HS_SCAN_TERMINATED;
888 } else {
889 return HS_SUCCESS;
890 }
891 }
892
893 // We avoid doing any work if the user has given us zero bytes of data to
894 // scan. Arguably we should define some semantics for how we treat vacuous
895 // cases here.
896 if (unlikely(length == 0)) {
897 DEBUG_PRINTF("zero length block\n");
898 return HS_SUCCESS;
899 }
900
901 u32 historyAmount = getHistoryAmount(rose, id->offset);
902 populateCoreInfo(scratch, rose, state, onEvent, context, data, length,
903 getHistory(state, rose, id->offset), historyAmount,
904 id->offset, status, flags);
905 if (rose->ckeyCount) {
906 scratch->core_info.logicalVector = state +
907 rose->stateOffsets.logicalVec;
908 scratch->core_info.combVector = state + rose->stateOffsets.combVec;
909 scratch->tctxt.lastCombMatchOffset = id->offset;
910 }
911 assert(scratch->core_info.hlen <= id->offset
912 && scratch->core_info.hlen <= rose->historyRequired);
913
914 prefetch_data(data, length);
915
916 if (rose->somLocationCount) {
917 loadSomFromStream(scratch, id->offset);
918 }
919
920 if (!id->offset && rose->boundary.reportZeroOffset) {
921 DEBUG_PRINTF("zero reports\n");
922 int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
923 0, scratch);
924 if (rv == MO_HALT_MATCHING) {
925 DEBUG_PRINTF("halting scan\n");
926 setStreamStatus(state, scratch->core_info.status);
927 if (told_to_stop_matching(scratch)) {
928 return HS_SCAN_TERMINATED;
929 } else {
930 assert(scratch->core_info.status & STATUS_EXHAUSTED);
931 return HS_SUCCESS;
932 }
933 }
934 }
935
936 switch (rose->runtimeImpl) {
937 default:
938 assert(0);
939 case ROSE_RUNTIME_FULL_ROSE:
940 rawStreamExec(id, scratch);
941 break;
942 case ROSE_RUNTIME_PURE_LITERAL:
943 pureLiteralStreamExec(id, scratch);
944 break;
945 case ROSE_RUNTIME_SINGLE_OUTFIX:
946 soleOutfixStreamExec(id, scratch);
947 }
948
949 if (rose->hasSom && !told_to_stop_matching(scratch)) {
950 int halt = flushStoredSomMatches(scratch, ~0ULL);
951 if (halt) {
952 scratch->core_info.status |= STATUS_TERMINATED;
953 }
954 }
955
956 setStreamStatus(state, scratch->core_info.status);
957
958 if (unlikely(internal_matching_error(scratch))) {
959 return HS_UNKNOWN_ERROR;
960 } else if (likely(!can_stop_matching(scratch))) {
961 maintainHistoryBuffer(rose, state, data, length);
962 id->offset += length; /* maintain offset */
963
964 if (rose->somLocationCount) {
965 storeSomToStream(scratch, id->offset);
966 }
967 } else if (told_to_stop_matching(scratch)) {
968 return HS_SCAN_TERMINATED;
969 }
970
971 return HS_SUCCESS;
972}
973
974HS_PUBLIC_API
975hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data,
976 unsigned length, unsigned flags,
977 hs_scratch_t *scratch,
978 match_event_handler onEvent, void *context) {
979 if (unlikely(!id || !scratch || !data ||
980 !validScratch(id->rose, scratch))) {
981 return HS_INVALID;
982 }
983
984 if (unlikely(markScratchInUse(scratch))) {
985 return HS_SCRATCH_IN_USE;
986 }
987 hs_error_t rv = hs_scan_stream_internal(id, data, length, flags, scratch,
988 onEvent, context);
989 unmarkScratchInUse(scratch);
990 return rv;
991}
992
993HS_PUBLIC_API
994hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
995 match_event_handler onEvent,
996 void *context) {
997 if (!id) {
998 return HS_INVALID;
999 }
1000
1001 if (onEvent) {
1002 if (!scratch || !validScratch(id->rose, scratch)) {
1003 return HS_INVALID;
1004 }
1005 if (unlikely(markScratchInUse(scratch))) {
1006 return HS_SCRATCH_IN_USE;
1007 }
1008 report_eod_matches(id, scratch, onEvent, context);
1009 if (unlikely(internal_matching_error(scratch))) {
1010 unmarkScratchInUse(scratch);
1011 return HS_UNKNOWN_ERROR;
1012 }
1013 unmarkScratchInUse(scratch);
1014 }
1015
1016 if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
1017 if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL)
1018 == MO_HALT_MATCHING) {
1019 scratch->core_info.status |= STATUS_TERMINATED;
1020 if (unlikely(internal_matching_error(scratch))) {
1021 unmarkScratchInUse(scratch);
1022 return HS_UNKNOWN_ERROR;
1023 }
1024 unmarkScratchInUse(scratch);
1025 }
1026 }
1027
1028 hs_stream_free(id);
1029
1030 return HS_SUCCESS;
1031}
1032
1033HS_PUBLIC_API
1034hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
1035 hs_scratch_t *scratch,
1036 match_event_handler onEvent,
1037 void *context) {
1038 if (!id) {
1039 return HS_INVALID;
1040 }
1041
1042 if (onEvent) {
1043 if (!scratch || !validScratch(id->rose, scratch)) {
1044 return HS_INVALID;
1045 }
1046 if (unlikely(markScratchInUse(scratch))) {
1047 return HS_SCRATCH_IN_USE;
1048 }
1049 report_eod_matches(id, scratch, onEvent, context);
1050 if (unlikely(internal_matching_error(scratch))) {
1051 unmarkScratchInUse(scratch);
1052 return HS_UNKNOWN_ERROR;
1053 }
1054 unmarkScratchInUse(scratch);
1055 }
1056
1057 if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
1058 if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL)
1059 == MO_HALT_MATCHING) {
1060 scratch->core_info.status |= STATUS_TERMINATED;
1061 if (unlikely(internal_matching_error(scratch))) {
1062 unmarkScratchInUse(scratch);
1063 return HS_UNKNOWN_ERROR;
1064 }
1065 unmarkScratchInUse(scratch);
1066 }
1067 }
1068
1069 // history already initialised
1070 init_stream(id, id->rose, 0);
1071
1072 return HS_SUCCESS;
1073}
1074
1075HS_PUBLIC_API
1076hs_error_t HS_CDECL hs_stream_size(const hs_database_t *db,
1077 size_t *stream_size) {
1078 if (!stream_size) {
1079 return HS_INVALID;
1080 }
1081
1082 hs_error_t ret = validDatabase(db);
1083 if (ret != HS_SUCCESS) {
1084 return ret;
1085 }
1086
1087 const struct RoseEngine *rose = hs_get_bytecode(db);
1088 if (!ISALIGNED_16(rose)) {
1089 return HS_INVALID;
1090 }
1091
1092 if (rose->mode != HS_MODE_STREAM) {
1093 return HS_DB_MODE_ERROR;
1094 }
1095
1096 u32 base_stream_size = rose->stateOffsets.end;
1097
1098 // stream state plus the hs_stream struct itself
1099 *stream_size = base_stream_size + sizeof(struct hs_stream);
1100
1101 return HS_SUCCESS;
1102}
1103
1104#if defined(DEBUG) || defined(DUMP_SUPPORT)
1105#include "util/compare.h"
1106// A debugging crutch: print a hex-escaped version of the match for our
1107// perusal.
1108static UNUSED
1109void dumpData(const char *data, size_t len) {
1110 DEBUG_PRINTF("BUFFER:");
1111 for (size_t i = 0; i < len; i++) {
1112 u8 c = data[i];
1113 if (ourisprint(c) && c != '\'') {
1114 printf("%c", c);
1115 } else {
1116 printf("\\x%02x", c);
1117 }
1118 }
1119 printf("\n");
1120}
1121#endif
1122
1123HS_PUBLIC_API
1124hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db,
1125 const char * const * data,
1126 const unsigned int *length,
1127 unsigned int count,
1128 UNUSED unsigned int flags,
1129 hs_scratch_t *scratch,
1130 match_event_handler onEvent, void *context) {
1131 if (unlikely(!scratch || !data || !length)) {
1132 return HS_INVALID;
1133 }
1134
1135 hs_error_t err = validDatabase(db);
1136 if (unlikely(err != HS_SUCCESS)) {
1137 return err;
1138 }
1139
1140 const struct RoseEngine *rose = hs_get_bytecode(db);
1141 if (unlikely(!ISALIGNED_16(rose))) {
1142 return HS_INVALID;
1143 }
1144
1145 if (unlikely(rose->mode != HS_MODE_VECTORED)) {
1146 return HS_DB_MODE_ERROR;
1147 }
1148
1149 if (unlikely(!validScratch(rose, scratch))) {
1150 return HS_INVALID;
1151 }
1152
1153 if (unlikely(markScratchInUse(scratch))) {
1154 return HS_SCRATCH_IN_USE;
1155 }
1156
1157 hs_stream_t *id = (hs_stream_t *)(scratch->bstate);
1158
1159 init_stream(id, rose, 1); /* open stream */
1160
1161 for (u32 i = 0; i < count; i++) {
1162 DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset,
1163 length[i]);
1164#ifdef DEBUG
1165 dumpData(data[i], length[i]);
1166#endif
1167 hs_error_t ret
1168 = hs_scan_stream_internal(id, data[i], length[i], 0, scratch,
1169 onEvent, context);
1170 if (ret != HS_SUCCESS) {
1171 unmarkScratchInUse(scratch);
1172 return ret;
1173 }
1174 }
1175
1176 /* close stream */
1177 if (onEvent) {
1178 report_eod_matches(id, scratch, onEvent, context);
1179
1180 if (unlikely(internal_matching_error(scratch))) {
1181 unmarkScratchInUse(scratch);
1182 return HS_UNKNOWN_ERROR;
1183 } else if (told_to_stop_matching(scratch)) {
1184 unmarkScratchInUse(scratch);
1185 return HS_SCAN_TERMINATED;
1186 }
1187 }
1188
1189 unmarkScratchInUse(scratch);
1190
1191 return HS_SUCCESS;
1192}
1193
1194HS_PUBLIC_API
1195hs_error_t HS_CDECL hs_compress_stream(const hs_stream_t *stream, char *buf,
1196 size_t buf_space, size_t *used_space) {
1197 if (unlikely(!stream || !used_space)) {
1198 return HS_INVALID;
1199 }
1200
1201 if (unlikely(buf_space && !buf)) {
1202 return HS_INVALID;
1203 }
1204
1205 const struct RoseEngine *rose = stream->rose;
1206
1207 size_t stream_size = size_compress_stream(rose, stream);
1208
1209 DEBUG_PRINTF("require %zu [orig %zu]\n", stream_size,
1210 rose->stateOffsets.end + sizeof(struct hs_stream));
1211 *used_space = stream_size;
1212
1213 if (buf_space < stream_size) {
1214 return HS_INSUFFICIENT_SPACE;
1215 }
1216 compress_stream(buf, stream_size, rose, stream);
1217
1218 return HS_SUCCESS;
1219}
1220
1221HS_PUBLIC_API
1222hs_error_t HS_CDECL hs_expand_stream(const hs_database_t *db,
1223 hs_stream_t **stream,
1224 const char *buf, size_t buf_size) {
1225 if (unlikely(!stream || !buf)) {
1226 return HS_INVALID;
1227 }
1228
1229 *stream = NULL;
1230
1231 hs_error_t err = validDatabase(db);
1232 if (unlikely(err != HS_SUCCESS)) {
1233 return err;
1234 }
1235
1236 const struct RoseEngine *rose = hs_get_bytecode(db);
1237 if (unlikely(!ISALIGNED_16(rose))) {
1238 return HS_INVALID;
1239 }
1240
1241 if (unlikely(rose->mode != HS_MODE_STREAM)) {
1242 return HS_DB_MODE_ERROR;
1243 }
1244
1245 size_t stream_size = rose->stateOffsets.end + sizeof(struct hs_stream);
1246
1247 struct hs_stream *s = hs_stream_alloc(stream_size);
1248 if (unlikely(!s)) {
1249 return HS_NOMEM;
1250 }
1251
1252 if (!expand_stream(s, rose, buf, buf_size)) {
1253 hs_stream_free(s);
1254 return HS_INVALID;
1255 }
1256
1257 *stream = s;
1258 return HS_SUCCESS;
1259}
1260
1261HS_PUBLIC_API
1262hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream,
1263 const char *buf, size_t buf_size,
1264 hs_scratch_t *scratch,
1265 match_event_handler onEvent,
1266 void *context) {
1267 if (unlikely(!to_stream || !buf)) {
1268 return HS_INVALID;
1269 }
1270
1271 const struct RoseEngine *rose = to_stream->rose;
1272
1273 if (onEvent) {
1274 if (!scratch || !validScratch(to_stream->rose, scratch)) {
1275 return HS_INVALID;
1276 }
1277 if (unlikely(markScratchInUse(scratch))) {
1278 return HS_SCRATCH_IN_USE;
1279 }
1280 report_eod_matches(to_stream, scratch, onEvent, context);
1281 if (unlikely(internal_matching_error(scratch))) {
1282 unmarkScratchInUse(scratch);
1283 return HS_UNKNOWN_ERROR;
1284 }
1285 unmarkScratchInUse(scratch);
1286 }
1287
1288 if (expand_stream(to_stream, rose, buf, buf_size)) {
1289 return HS_SUCCESS;
1290 } else {
1291 return HS_INVALID;
1292 }
1293}
1294