1 | /* |
2 | * Copyright (c) 2015-2016, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Castle: multi-tenant repeat engine, runtime code. |
31 | */ |
32 | |
33 | #include "castle.h" |
34 | |
35 | #include "castle_internal.h" |
36 | #include "nfa_api.h" |
37 | #include "nfa_api_queue.h" |
38 | #include "nfa_internal.h" |
39 | #include "repeat.h" |
40 | #include "shufti.h" |
41 | #include "truffle.h" |
42 | #include "vermicelli.h" |
43 | #include "util/bitutils.h" |
44 | #include "util/multibit.h" |
45 | #include "util/partial_store.h" |
46 | #include "ue2common.h" |
47 | |
48 | static really_inline |
49 | const struct SubCastle *getSubCastle(const struct Castle *c, u32 num) { |
50 | assert(num < c->numRepeats); |
51 | const struct SubCastle *sub = |
52 | (const struct SubCastle *)((const char *)c + sizeof(struct Castle)); |
53 | assert(ISALIGNED(sub)); |
54 | return &sub[num]; |
55 | } |
56 | |
57 | static really_inline |
58 | const struct RepeatInfo *getRepeatInfo(const struct SubCastle *sub) { |
59 | const struct RepeatInfo *repeatInfo = |
60 | (const struct RepeatInfo *)((const char *)sub + sub->repeatInfoOffset); |
61 | return repeatInfo; |
62 | } |
63 | |
64 | static really_inline |
65 | union RepeatControl *getControl(char *full_state, const struct SubCastle *sub) { |
66 | union RepeatControl *rctrl = |
67 | (union RepeatControl *)(full_state + sub->fullStateOffset); |
68 | assert(ISALIGNED(rctrl)); |
69 | return rctrl; |
70 | } |
71 | |
72 | static really_inline |
73 | const union RepeatControl *getControlConst(const char *full_state, |
74 | const struct SubCastle *sub) { |
75 | const union RepeatControl *rctrl = |
76 | (const union RepeatControl *)(full_state + sub->fullStateOffset); |
77 | assert(ISALIGNED(rctrl)); |
78 | return rctrl; |
79 | } |
80 | |
81 | enum MatchMode { |
82 | CALLBACK_OUTPUT, |
83 | STOP_AT_MATCH, |
84 | }; |
85 | |
86 | static really_inline |
87 | char subCastleReportCurrent(const struct Castle *c, struct mq *q, |
88 | const u64a offset, const u32 subIdx) { |
89 | const struct SubCastle *sub = getSubCastle(c, subIdx); |
90 | const struct RepeatInfo *info = getRepeatInfo(sub); |
91 | |
92 | union RepeatControl *rctrl = getControl(q->state, sub); |
93 | char *rstate = (char *)q->streamState + sub->streamStateOffset + |
94 | info->packedCtrlSize; |
95 | enum RepeatMatch match = |
96 | repeatHasMatch(info, rctrl, rstate, offset); |
97 | DEBUG_PRINTF("repeatHasMatch returned %d\n" , match); |
98 | if (match == REPEAT_MATCH) { |
99 | DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n" , offset, |
100 | subIdx, sub->report); |
101 | if (q->cb(0, offset, sub->report, q->context) == MO_HALT_MATCHING) { |
102 | return MO_HALT_MATCHING; |
103 | } |
104 | } |
105 | |
106 | return MO_CONTINUE_MATCHING; |
107 | } |
108 | |
109 | static really_inline |
110 | int castleReportCurrent(const struct Castle *c, struct mq *q) { |
111 | const u64a offset = q_cur_offset(q); |
112 | DEBUG_PRINTF("offset=%llu\n" , offset); |
113 | |
114 | if (c->exclusive) { |
115 | u8 *active = (u8 *)q->streamState; |
116 | u8 *groups = active + c->groupIterOffset; |
117 | for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); |
118 | i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { |
119 | u8 *cur = active + i * c->activeIdxSize; |
120 | const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); |
121 | DEBUG_PRINTF("subcastle %u\n" , activeIdx); |
122 | if (subCastleReportCurrent(c, q, |
123 | offset, activeIdx) == MO_HALT_MATCHING) { |
124 | return MO_HALT_MATCHING; |
125 | } |
126 | } |
127 | } |
128 | |
129 | if (c->exclusive != PURE_EXCLUSIVE) { |
130 | const u8 *active = (const u8 *)q->streamState + c->activeOffset; |
131 | for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); |
132 | i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { |
133 | DEBUG_PRINTF("subcastle %u\n" , i); |
134 | if (subCastleReportCurrent(c, q, offset, i) == MO_HALT_MATCHING) { |
135 | return MO_HALT_MATCHING; |
136 | } |
137 | } |
138 | } |
139 | |
140 | return MO_CONTINUE_MATCHING; |
141 | } |
142 | |
143 | static really_inline |
144 | char subCastleInAccept(const struct Castle *c, struct mq *q, |
145 | const ReportID report, const u64a offset, |
146 | const u32 subIdx) { |
147 | const struct SubCastle *sub = getSubCastle(c, subIdx); |
148 | |
149 | if (sub->report != report) { |
150 | return 0; |
151 | } |
152 | const struct RepeatInfo *info = getRepeatInfo(sub); |
153 | |
154 | union RepeatControl *rctrl = getControl(q->state, sub); |
155 | char *rstate = (char *)q->streamState + sub->streamStateOffset + |
156 | info->packedCtrlSize; |
157 | enum RepeatMatch match = |
158 | repeatHasMatch(info, rctrl, rstate, offset); |
159 | if (match == REPEAT_MATCH) { |
160 | DEBUG_PRINTF("in an accept\n" ); |
161 | return 1; |
162 | } |
163 | |
164 | return 0; |
165 | } |
166 | |
167 | static really_inline |
168 | char castleInAccept(const struct Castle *c, struct mq *q, |
169 | const ReportID report, const u64a offset) { |
170 | DEBUG_PRINTF("offset=%llu\n" , offset); |
171 | /* ignore when just catching up due to full queue */ |
172 | if (report == MO_INVALID_IDX) { |
173 | return 0; |
174 | } |
175 | |
176 | if (c->exclusive) { |
177 | u8 *active = (u8 *)q->streamState; |
178 | u8 *groups = active + c->groupIterOffset; |
179 | for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); |
180 | i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { |
181 | u8 *cur = active + i * c->activeIdxSize; |
182 | const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); |
183 | DEBUG_PRINTF("subcastle %u\n" , activeIdx); |
184 | if (subCastleInAccept(c, q, report, offset, activeIdx)) { |
185 | return 1; |
186 | } |
187 | } |
188 | } |
189 | |
190 | if (c->exclusive != PURE_EXCLUSIVE) { |
191 | const u8 *active = (const u8 *)q->streamState + c->activeOffset; |
192 | for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); |
193 | i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { |
194 | DEBUG_PRINTF("subcastle %u\n" , i); |
195 | if (subCastleInAccept(c, q, report, offset, i)) { |
196 | return 1; |
197 | } |
198 | } |
199 | } |
200 | |
201 | return 0; |
202 | } |
203 | |
204 | static really_inline |
205 | void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset, |
206 | void *full_state, void *stream_state, |
207 | const u32 subIdx) { |
208 | const struct SubCastle *sub = getSubCastle(c, subIdx); |
209 | const struct RepeatInfo *info = getRepeatInfo(sub); |
210 | |
211 | union RepeatControl *rctrl = getControl(full_state, sub); |
212 | char *rstate = (char *)stream_state + sub->streamStateOffset + |
213 | info->packedCtrlSize; |
214 | |
215 | if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) { |
216 | DEBUG_PRINTF("sub %u is stale at offset %llu\n" , subIdx, offset); |
217 | if (sub->exclusiveId < c->numRepeats) { |
218 | u8 *active = (u8 *)stream_state; |
219 | u8 *groups = active + c->groupIterOffset; |
220 | mmbit_unset(groups, c->numGroups, sub->exclusiveId); |
221 | } else { |
222 | u8 *active = (u8 *)stream_state + c->activeOffset; |
223 | mmbit_unset(active, c->numRepeats, subIdx); |
224 | } |
225 | } |
226 | } |
227 | |
228 | static really_inline |
229 | void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, |
230 | void *full_state, void *stream_state) { |
231 | DEBUG_PRINTF("offset=%llu\n" , offset); |
232 | |
233 | if (!c->staleIterOffset) { |
234 | DEBUG_PRINTF("{no repeats can go stale}\n" ); |
235 | return; /* no subcastle can ever go stale */ |
236 | } |
237 | |
238 | if (c->exclusive) { |
239 | u8 *active = (u8 *)stream_state; |
240 | u8 *groups = active + c->groupIterOffset; |
241 | for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); |
242 | i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { |
243 | u8 *cur = active + i * c->activeIdxSize; |
244 | const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); |
245 | DEBUG_PRINTF("subcastle %u\n" , activeIdx); |
246 | subCastleDeactivateStaleSubs(c, offset, full_state, |
247 | stream_state, activeIdx); |
248 | } |
249 | } |
250 | |
251 | if (c->exclusive != PURE_EXCLUSIVE) { |
252 | const u8 *active = (const u8 *)stream_state + c->activeOffset; |
253 | const struct mmbit_sparse_iter *it |
254 | = (const void *)((const char *)c + c->staleIterOffset); |
255 | |
256 | struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; |
257 | u32 numRepeats = c->numRepeats; |
258 | u32 idx = 0; |
259 | |
260 | u32 i = mmbit_sparse_iter_begin(active, numRepeats, &idx, it, si_state); |
261 | while(i != MMB_INVALID) { |
262 | DEBUG_PRINTF("subcastle %u\n" , i); |
263 | subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, i); |
264 | i = mmbit_sparse_iter_next(active, numRepeats, i, &idx, it, |
265 | si_state); |
266 | } |
267 | } |
268 | } |
269 | |
270 | static really_inline |
271 | void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, |
272 | void *full_state, void *stream_state, |
273 | UNUSED char stale_checked) { |
274 | assert(top < c->numRepeats); |
275 | |
276 | const struct SubCastle *sub = getSubCastle(c, top); |
277 | const struct RepeatInfo *info = getRepeatInfo(sub); |
278 | union RepeatControl *rctrl = getControl(full_state, sub); |
279 | char *rstate = (char *)stream_state + sub->streamStateOffset + |
280 | info->packedCtrlSize; |
281 | |
282 | char is_alive = 0; |
283 | u8 *active = (u8 *)stream_state; |
284 | if (sub->exclusiveId < c->numRepeats) { |
285 | u8 *groups = active + c->groupIterOffset; |
286 | active += sub->exclusiveId * c->activeIdxSize; |
287 | if (mmbit_set(groups, c->numGroups, sub->exclusiveId)) { |
288 | const u32 activeIdx = partial_load_u32(active, c->activeIdxSize); |
289 | is_alive = (activeIdx == top); |
290 | } |
291 | |
292 | if (!is_alive) { |
293 | partial_store_u32(active, top, c->activeIdxSize); |
294 | } |
295 | } else { |
296 | active += c->activeOffset; |
297 | is_alive = mmbit_set(active, c->numRepeats, top); |
298 | } |
299 | |
300 | if (!is_alive) { |
301 | DEBUG_PRINTF("first top for inactive repeat %u\n" , top); |
302 | } else { |
303 | DEBUG_PRINTF("repeat %u is already alive\n" , top); |
304 | // Caller should ensure we're not stale. |
305 | assert(!stale_checked |
306 | || repeatHasMatch(info, rctrl, rstate, offset) != REPEAT_STALE); |
307 | |
308 | // Ignore duplicate top events. |
309 | u64a last = repeatLastTop(info, rctrl, rstate); |
310 | |
311 | assert(last <= offset); |
312 | if (last == offset) { |
313 | DEBUG_PRINTF("dupe top at %llu\n" , offset); |
314 | return; |
315 | } |
316 | } |
317 | |
318 | repeatStore(info, rctrl, rstate, offset, is_alive); |
319 | } |
320 | |
321 | static really_inline |
322 | void subCastleFindMatch(const struct Castle *c, const u64a begin, |
323 | const u64a end, void *full_state, void *stream_state, |
324 | size_t *mloc, char *found, const u32 subIdx) { |
325 | const struct SubCastle *sub = getSubCastle(c, subIdx); |
326 | const struct RepeatInfo *info = getRepeatInfo(sub); |
327 | union RepeatControl *rctrl = getControl(full_state, sub); |
328 | char *rstate = (char *)stream_state + sub->streamStateOffset + |
329 | info->packedCtrlSize; |
330 | |
331 | u64a match = repeatNextMatch(info, rctrl, rstate, begin); |
332 | if (match == 0) { |
333 | DEBUG_PRINTF("no more matches for sub %u\n" , subIdx); |
334 | if (sub->exclusiveId < c->numRepeats) { |
335 | u8 *groups = (u8 *)stream_state + c->groupIterOffset; |
336 | mmbit_unset(groups, c->numGroups, sub->exclusiveId); |
337 | } else { |
338 | u8 *active = (u8 *)stream_state + c->activeOffset; |
339 | mmbit_unset(active, c->numRepeats, subIdx); |
340 | } |
341 | return; |
342 | } else if (match > end) { |
343 | DEBUG_PRINTF("next match for sub %u at %llu is > horizon\n" , subIdx, |
344 | match); |
345 | return; |
346 | } |
347 | DEBUG_PRINTF("sub %u earliest match at %llu\n" , subIdx, match); |
348 | size_t diff = match - begin; |
349 | if (!(*found) || diff < *mloc) { |
350 | *mloc = diff; |
351 | DEBUG_PRINTF("mloc=%zu\n" , *mloc); |
352 | } |
353 | *found = 1; |
354 | } |
355 | |
356 | static really_inline |
357 | char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end, |
358 | void *full_state, void *stream_state, size_t *mloc) { |
359 | DEBUG_PRINTF("begin=%llu, end=%llu\n" , begin, end); |
360 | assert(begin <= end); |
361 | |
362 | if (begin == end) { |
363 | DEBUG_PRINTF("no work to do\n" ); |
364 | return 0; |
365 | } |
366 | |
367 | char found = 0; |
368 | *mloc = 0; |
369 | |
370 | if (c->exclusive) { |
371 | u8 *active = (u8 *)stream_state; |
372 | u8 *groups = active + c->groupIterOffset; |
373 | for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); |
374 | i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { |
375 | u8 *cur = active + i * c->activeIdxSize; |
376 | const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); |
377 | DEBUG_PRINTF("subcastle %u\n" , activeIdx); |
378 | subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, |
379 | &found, activeIdx); |
380 | } |
381 | } |
382 | |
383 | if (c->exclusive != PURE_EXCLUSIVE) { |
384 | u8 *active = (u8 *)stream_state + c->activeOffset; |
385 | for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); |
386 | i != MMB_INVALID; |
387 | i = mmbit_iterate(active, c->numRepeats, i)) { |
388 | DEBUG_PRINTF("subcastle %u\n" , i); |
389 | subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, |
390 | &found, i); |
391 | } |
392 | } |
393 | |
394 | return found; |
395 | } |
396 | |
397 | static really_inline |
398 | u64a subCastleNextMatch(const struct Castle *c, void *full_state, |
399 | void *stream_state, const u64a loc, |
400 | const u32 subIdx) { |
401 | DEBUG_PRINTF("subcastle %u\n" , subIdx); |
402 | const struct SubCastle *sub = getSubCastle(c, subIdx); |
403 | const struct RepeatInfo *info = getRepeatInfo(sub); |
404 | const union RepeatControl *rctrl = |
405 | getControlConst(full_state, sub); |
406 | const char *rstate = (const char *)stream_state + |
407 | sub->streamStateOffset + |
408 | info->packedCtrlSize; |
409 | |
410 | return repeatNextMatch(info, rctrl, rstate, loc); |
411 | } |
412 | |
413 | static really_inline |
414 | void set_matching(const struct Castle *c, const u64a match, u8 *active, |
415 | u8 *matching, const u32 active_size, const u32 active_id, |
416 | const u32 matching_id, u64a *offset, const u64a end) { |
417 | if (match == 0) { |
418 | DEBUG_PRINTF("no more matches\n" ); |
419 | mmbit_unset(active, active_size, active_id); |
420 | } else if (match > end) { |
421 | // If we had a local copy of the active mmbit, we could skip |
422 | // looking at this repeat again. But we don't, so we just move |
423 | // on. |
424 | } else if (match == *offset) { |
425 | mmbit_set(matching, c->numRepeats, matching_id); |
426 | } else if (match < *offset) { |
427 | // New minimum offset. |
428 | *offset = match; |
429 | mmbit_clear(matching, c->numRepeats); |
430 | mmbit_set(matching, c->numRepeats, matching_id); |
431 | } |
432 | } |
433 | |
434 | static really_inline |
435 | void subCastleMatchLoop(const struct Castle *c, void *full_state, |
436 | void *stream_state, const u64a end, |
437 | const u64a loc, u64a *offset) { |
438 | u8 *active = (u8 *)stream_state + c->activeOffset; |
439 | u8 *matching = full_state; |
440 | for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); |
441 | i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { |
442 | u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i); |
443 | set_matching(c, match, active, matching, c->numRepeats, i, |
444 | i, offset, end); |
445 | } |
446 | } |
447 | |
448 | static really_inline |
449 | char subCastleFireMatch(const struct Castle *c, const void *full_state, |
450 | UNUSED const void *stream_state, NfaCallback cb, |
451 | void *ctx, const u64a offset) { |
452 | const u8 *matching = full_state; |
453 | |
454 | // Fire all matching sub-castles at this offset. |
455 | for (u32 i = mmbit_iterate(matching, c->numRepeats, MMB_INVALID); |
456 | i != MMB_INVALID; |
457 | i = mmbit_iterate(matching, c->numRepeats, i)) { |
458 | const struct SubCastle *sub = getSubCastle(c, i); |
459 | DEBUG_PRINTF("firing match at %llu for sub %u\n" , offset, i); |
460 | if (cb(0, offset, sub->report, ctx) == MO_HALT_MATCHING) { |
461 | DEBUG_PRINTF("caller told us to halt\n" ); |
462 | return MO_HALT_MATCHING; |
463 | } |
464 | } |
465 | |
466 | return MO_CONTINUE_MATCHING; |
467 | } |
468 | |
469 | static really_inline |
470 | char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end, |
471 | void *full_state, void *stream_state, NfaCallback cb, |
472 | void *ctx) { |
473 | DEBUG_PRINTF("begin=%llu, end=%llu\n" , begin, end); |
474 | assert(begin <= end); |
475 | |
476 | u8 *matching = full_state; // temp multibit |
477 | |
478 | u64a loc = begin; |
479 | while (loc < end) { |
480 | |
481 | // Find minimum next offset for the next match(es) from amongst our |
482 | // active sub-castles, and store the indices of the sub-castles that |
483 | // match at that offset in the 'matching' mmbit, which is in the |
484 | // full_state (scratch). |
485 | |
486 | u64a offset = end; // min offset of next match |
487 | u32 activeIdx = 0; |
488 | mmbit_clear(matching, c->numRepeats); |
489 | if (c->exclusive) { |
490 | u8 *active = (u8 *)stream_state; |
491 | u8 *groups = active + c->groupIterOffset; |
492 | for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); |
493 | i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { |
494 | u8 *cur = active + i * c->activeIdxSize; |
495 | activeIdx = partial_load_u32(cur, c->activeIdxSize); |
496 | u64a match = subCastleNextMatch(c, full_state, stream_state, |
497 | loc, activeIdx); |
498 | set_matching(c, match, groups, matching, c->numGroups, i, |
499 | activeIdx, &offset, end); |
500 | } |
501 | } |
502 | |
503 | if (c->exclusive != PURE_EXCLUSIVE) { |
504 | subCastleMatchLoop(c, full_state, stream_state, |
505 | end, loc, &offset); |
506 | } |
507 | DEBUG_PRINTF("offset=%llu\n" , offset); |
508 | if (!mmbit_any(matching, c->numRepeats)) { |
509 | DEBUG_PRINTF("no more matches\n" ); |
510 | break; |
511 | } |
512 | |
513 | if (subCastleFireMatch(c, full_state, stream_state, |
514 | cb, ctx, offset) == MO_HALT_MATCHING) { |
515 | return MO_HALT_MATCHING; |
516 | } |
517 | loc = offset; |
518 | } |
519 | |
520 | return MO_CONTINUE_MATCHING; |
521 | } |
522 | |
523 | static really_inline |
524 | char castleScanVerm(const struct Castle *c, const u8 *buf, const size_t begin, |
525 | const size_t end, size_t *loc) { |
526 | const u8 *ptr = vermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); |
527 | if (ptr == buf + end) { |
528 | DEBUG_PRINTF("no escape found\n" ); |
529 | return 0; |
530 | } |
531 | |
532 | assert(loc); |
533 | assert(ptr >= buf && ptr < buf + end); |
534 | *loc = (size_t)(ptr - buf); |
535 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
536 | return 1; |
537 | } |
538 | |
539 | static really_inline |
540 | char castleScanNVerm(const struct Castle *c, const u8 *buf, const size_t begin, |
541 | const size_t end, size_t *loc) { |
542 | const u8 *ptr = nvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); |
543 | if (ptr == buf + end) { |
544 | DEBUG_PRINTF("no escape found\n" ); |
545 | return 0; |
546 | } |
547 | |
548 | assert(loc); |
549 | assert(ptr >= buf && ptr < buf + end); |
550 | *loc = (size_t)(ptr - buf); |
551 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
552 | return 1; |
553 | } |
554 | |
555 | static really_inline |
556 | char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, |
557 | const size_t end, size_t *loc) { |
558 | const m128 mask_lo = c->u.shuf.mask_lo; |
559 | const m128 mask_hi = c->u.shuf.mask_hi; |
560 | const u8 *ptr = shuftiExec(mask_lo, mask_hi, buf + begin, buf + end); |
561 | if (ptr == buf + end) { |
562 | DEBUG_PRINTF("no escape found\n" ); |
563 | return 0; |
564 | } |
565 | |
566 | assert(loc); |
567 | assert(ptr >= buf && ptr < buf + end); |
568 | *loc = (size_t)(ptr - buf); |
569 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
570 | return 1; |
571 | } |
572 | |
573 | static really_inline |
574 | char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin, |
575 | const size_t end, size_t *loc) { |
576 | const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2, |
577 | buf + begin, buf + end); |
578 | if (ptr == buf + end) { |
579 | DEBUG_PRINTF("no escape found\n" ); |
580 | return 0; |
581 | } |
582 | |
583 | assert(loc); |
584 | assert(ptr >= buf && ptr < buf + end); |
585 | *loc = (size_t)(ptr - buf); |
586 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
587 | return 1; |
588 | } |
589 | |
590 | static really_inline |
591 | char castleScan(const struct Castle *c, const u8 *buf, const size_t begin, |
592 | const size_t end, size_t *loc) { |
593 | assert(begin <= end); |
594 | |
595 | if (begin == end) { |
596 | return 0; |
597 | } |
598 | |
599 | switch (c->type) { |
600 | case CASTLE_DOT: |
601 | // Nothing can stop a dot scan! |
602 | return 0; |
603 | case CASTLE_VERM: |
604 | return castleScanVerm(c, buf, begin, end, loc); |
605 | case CASTLE_NVERM: |
606 | return castleScanNVerm(c, buf, begin, end, loc); |
607 | case CASTLE_SHUFTI: |
608 | return castleScanShufti(c, buf, begin, end, loc); |
609 | case CASTLE_TRUFFLE: |
610 | return castleScanTruffle(c, buf, begin, end, loc); |
611 | default: |
612 | DEBUG_PRINTF("unknown scan type!\n" ); |
613 | assert(0); |
614 | return 0; |
615 | } |
616 | } |
617 | |
618 | static really_inline |
619 | char castleRevScanVerm(const struct Castle *c, const u8 *buf, |
620 | const size_t begin, const size_t end, size_t *loc) { |
621 | const u8 *ptr = rvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); |
622 | if (ptr == buf + begin - 1) { |
623 | DEBUG_PRINTF("no escape found\n" ); |
624 | return 0; |
625 | } |
626 | |
627 | assert(loc); |
628 | assert(ptr >= buf && ptr < buf + end); |
629 | *loc = (size_t)(ptr - buf); |
630 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
631 | return 1; |
632 | } |
633 | |
634 | static really_inline |
635 | char castleRevScanNVerm(const struct Castle *c, const u8 *buf, |
636 | const size_t begin, const size_t end, size_t *loc) { |
637 | const u8 *ptr = rnvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); |
638 | if (ptr == buf + begin - 1) { |
639 | DEBUG_PRINTF("no escape found\n" ); |
640 | return 0; |
641 | } |
642 | |
643 | assert(loc); |
644 | assert(ptr >= buf && ptr < buf + end); |
645 | *loc = (size_t)(ptr - buf); |
646 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
647 | return 1; |
648 | } |
649 | |
650 | static really_inline |
651 | char castleRevScanShufti(const struct Castle *c, const u8 *buf, |
652 | const size_t begin, const size_t end, size_t *loc) { |
653 | const m128 mask_lo = c->u.shuf.mask_lo; |
654 | const m128 mask_hi = c->u.shuf.mask_hi; |
655 | const u8 *ptr = rshuftiExec(mask_lo, mask_hi, buf + begin, buf + end); |
656 | if (ptr == buf + begin - 1) { |
657 | DEBUG_PRINTF("no escape found\n" ); |
658 | return 0; |
659 | } |
660 | |
661 | assert(loc); |
662 | assert(ptr >= buf && ptr < buf + end); |
663 | *loc = (size_t)(ptr - buf); |
664 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
665 | return 1; |
666 | } |
667 | |
668 | static really_inline |
669 | char castleRevScanTruffle(const struct Castle *c, const u8 *buf, |
670 | const size_t begin, const size_t end, size_t *loc) { |
671 | const u8 *ptr = rtruffleExec(c->u.truffle.mask1, c->u.truffle.mask2, |
672 | buf + begin, buf + end); |
673 | if (ptr == buf + begin - 1) { |
674 | DEBUG_PRINTF("no escape found\n" ); |
675 | return 0; |
676 | } |
677 | |
678 | assert(loc); |
679 | assert(ptr >= buf && ptr < buf + end); |
680 | *loc = (size_t)(ptr - buf); |
681 | DEBUG_PRINTF("escape found at offset %zu\n" , *loc); |
682 | return 1; |
683 | } |
684 | |
685 | static really_inline |
686 | char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin, |
687 | const size_t end, size_t *loc) { |
688 | assert(begin <= end); |
689 | DEBUG_PRINTF("scanning backwards over (%zu,%zu]\n" , begin, end); |
690 | if (begin == end) { |
691 | return 0; |
692 | } |
693 | |
694 | switch (c->type) { |
695 | case CASTLE_DOT: |
696 | // Nothing can stop a dot scan! |
697 | return 0; |
698 | case CASTLE_VERM: |
699 | return castleRevScanVerm(c, buf, begin, end, loc); |
700 | case CASTLE_NVERM: |
701 | return castleRevScanNVerm(c, buf, begin, end, loc); |
702 | case CASTLE_SHUFTI: |
703 | return castleRevScanShufti(c, buf, begin, end, loc); |
704 | case CASTLE_TRUFFLE: |
705 | return castleRevScanTruffle(c, buf, begin, end, loc); |
706 | default: |
707 | DEBUG_PRINTF("unknown scan type!\n" ); |
708 | assert(0); |
709 | return 0; |
710 | } |
711 | } |
712 | |
713 | static really_inline |
714 | void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp, |
715 | char stale_checked) { |
716 | const u32 event = q->items[q->cur].type; |
717 | switch (event) { |
718 | case MQE_TOP: |
719 | assert(0); // should be a numbered top |
720 | break; |
721 | case MQE_START: |
722 | case MQE_END: |
723 | break; |
724 | default: |
725 | assert(event >= MQE_TOP_FIRST); |
726 | assert(event < MQE_INVALID); |
727 | u32 top = event - MQE_TOP_FIRST; |
728 | DEBUG_PRINTF("top %u at offset %llu\n" , top, sp); |
729 | castleProcessTop(c, top, sp, q->state, q->streamState, stale_checked); |
730 | break; |
731 | } |
732 | } |
733 | |
734 | static really_inline |
735 | void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { |
736 | DEBUG_PRINTF("clearing active repeats due to escape\n" ); |
737 | if (c->exclusive) { |
738 | u8 *groups = (u8 *)q->streamState + c->groupIterOffset; |
739 | mmbit_clear(groups, c->numGroups); |
740 | } |
741 | |
742 | if (c->exclusive != PURE_EXCLUSIVE) { |
743 | mmbit_clear(active, c->numRepeats); |
744 | } |
745 | } |
746 | |
747 | static really_inline |
748 | char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, |
749 | enum MatchMode mode) { |
750 | assert(n && q); |
751 | assert(n->type == CASTLE_NFA); |
752 | |
753 | DEBUG_PRINTF("state=%p, streamState=%p\n" , q->state, q->streamState); |
754 | |
755 | const struct Castle *c = getImplNfa(n); |
756 | |
757 | if (q->report_current) { |
758 | int rv = castleReportCurrent(c, q); |
759 | q->report_current = 0; |
760 | if (rv == MO_HALT_MATCHING) { |
761 | return MO_HALT_MATCHING; |
762 | } |
763 | } |
764 | |
765 | if (q->cur == q->end) { |
766 | return 1; |
767 | } |
768 | |
769 | u8 *active = (u8 *)q->streamState + c->activeOffset;// active multibit |
770 | |
771 | assert(q->cur + 1 < q->end); // require at least two items |
772 | assert(q_cur_type(q) == MQE_START); |
773 | u64a sp = q_cur_offset(q); |
774 | q->cur++; |
775 | DEBUG_PRINTF("sp=%llu, abs_end=%llu\n" , sp, end + q->offset); |
776 | |
777 | while (q->cur < q->end) { |
778 | DEBUG_PRINTF("q item type=%d offset=%llu\n" , q_cur_type(q), |
779 | q_cur_offset(q)); |
780 | |
781 | char found = 0; |
782 | if (c->exclusive) { |
783 | u8 *groups = (u8 *)q->streamState + c->groupIterOffset; |
784 | found = mmbit_any(groups, c->numGroups); |
785 | } |
786 | |
787 | if (!found && !mmbit_any(active, c->numRepeats)) { |
788 | DEBUG_PRINTF("no repeats active, skipping scan\n" ); |
789 | goto scan_done; |
790 | } |
791 | |
792 | u64a ep = q_cur_offset(q); |
793 | ep = MIN(ep, q->offset + end); |
794 | if (sp < ep) { |
795 | size_t eloc = 0; |
796 | char escape_found = 0; |
797 | DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n" , sp, ep); |
798 | assert(sp >= q->offset && ep >= q->offset); |
799 | if (castleScan(c, q->buffer, sp - q->offset, ep - q->offset, |
800 | &eloc)) { |
801 | escape_found = 1; |
802 | ep = q->offset + eloc; |
803 | DEBUG_PRINTF("escape found at %llu\n" , ep); |
804 | assert(ep >= sp); |
805 | } |
806 | |
807 | assert(sp <= ep); |
808 | |
809 | if (mode == STOP_AT_MATCH) { |
810 | size_t mloc; |
811 | if (castleFindMatch(c, sp, ep, q->state, q->streamState, |
812 | &mloc)) { |
813 | DEBUG_PRINTF("storing match at %llu\n" , sp + mloc); |
814 | q->cur--; |
815 | assert(q->cur < MAX_MQE_LEN); |
816 | q->items[q->cur].type = MQE_START; |
817 | q->items[q->cur].location = (s64a)(sp - q->offset) + mloc; |
818 | return MO_MATCHES_PENDING; |
819 | } |
820 | } else { |
821 | assert(mode == CALLBACK_OUTPUT); |
822 | char rv = castleMatchLoop(c, sp, ep, q->state, q->streamState, |
823 | q->cb, q->context); |
824 | if (rv == MO_HALT_MATCHING) { |
825 | return MO_HALT_MATCHING; |
826 | } |
827 | assert(rv == MO_CONTINUE_MATCHING); |
828 | } |
829 | |
830 | if (escape_found) { |
831 | clear_repeats(c, q, active); |
832 | } |
833 | } |
834 | |
835 | scan_done: |
836 | if (q_cur_loc(q) > end) { |
837 | q->cur--; |
838 | assert(q->cur < MAX_MQE_LEN); |
839 | q->items[q->cur].type = MQE_START; |
840 | q->items[q->cur].location = end; |
841 | return MO_ALIVE; |
842 | } |
843 | |
844 | sp = q_cur_offset(q); |
845 | castleHandleEvent(c, q, sp, 1); |
846 | q->cur++; |
847 | } |
848 | |
849 | if (c->exclusive) { |
850 | u8 *groups = (u8 *)q->streamState + c->groupIterOffset; |
851 | if (mmbit_any_precise(groups, c->numGroups)) { |
852 | return 1; |
853 | } |
854 | } |
855 | |
856 | return mmbit_any_precise(active, c->numRepeats); |
857 | } |
858 | |
859 | char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end) { |
860 | DEBUG_PRINTF("entry\n" ); |
861 | return nfaExecCastle_Q_i(n, q, end, CALLBACK_OUTPUT); |
862 | } |
863 | |
864 | char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) { |
865 | DEBUG_PRINTF("entry\n" ); |
866 | return nfaExecCastle_Q_i(n, q, end, STOP_AT_MATCH); |
867 | } |
868 | |
869 | static |
870 | s64a castleLastKillLoc(const struct Castle *c, struct mq *q) { |
871 | assert(q_cur_type(q) == MQE_START); |
872 | assert(q_last_type(q) == MQE_END); |
873 | s64a sp = q_cur_loc(q); |
874 | s64a ep = q_last_loc(q); |
875 | |
876 | DEBUG_PRINTF("finding final squash in (%lld, %lld]\n" , sp, ep); |
877 | |
878 | size_t loc; |
879 | |
880 | if (ep > 0) { |
881 | if (castleRevScan(c, q->buffer, sp > 0 ? sp : 0, ep, &loc)) { |
882 | return (s64a)loc; |
883 | } |
884 | ep = 0; |
885 | } |
886 | |
887 | if (sp < 0) { |
888 | s64a hlen = q->hlength; |
889 | |
890 | if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) { |
891 | return (s64a)loc - hlen; |
892 | } |
893 | ep = 0; |
894 | } |
895 | |
896 | return sp - 1; /* the repeats are never killed */ |
897 | } |
898 | |
899 | char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) { |
900 | assert(n && q); |
901 | assert(n->type == CASTLE_NFA); |
902 | DEBUG_PRINTF("entry\n" ); |
903 | |
904 | if (q->cur == q->end) { |
905 | return 1; |
906 | } |
907 | |
908 | assert(q->cur + 1 < q->end); /* require at least two items */ |
909 | assert(q_cur_type(q) == MQE_START); |
910 | |
911 | const struct Castle *c = getImplNfa(n); |
912 | u8 *active = (u8 *)q->streamState + c->activeOffset; |
913 | |
914 | u64a end_offset = q_last_loc(q) + q->offset; |
915 | s64a last_kill_loc = castleLastKillLoc(c, q); |
916 | DEBUG_PRINTF("all repeats killed at %lld (exec range %lld, %lld)\n" , |
917 | last_kill_loc, q_cur_loc(q), q_last_loc(q)); |
918 | assert(last_kill_loc < q_last_loc(q)); |
919 | |
920 | if (last_kill_loc != q_cur_loc(q) - 1) { |
921 | clear_repeats(c, q, active); |
922 | } |
923 | |
924 | q->cur++; /* skip start event */ |
925 | |
926 | /* skip events prior to the repeats being squashed */ |
927 | while (q_cur_loc(q) <= last_kill_loc) { |
928 | DEBUG_PRINTF("skipping moot event at %lld\n" , q_cur_loc(q)); |
929 | q->cur++; |
930 | assert(q->cur < q->end); |
931 | } |
932 | |
933 | while (q->cur < q->end) { |
934 | DEBUG_PRINTF("q item type=%d offset=%llu\n" , q_cur_type(q), |
935 | q_cur_offset(q)); |
936 | u64a sp = q_cur_offset(q); |
937 | castleHandleEvent(c, q, sp, 0); |
938 | q->cur++; |
939 | } |
940 | |
941 | castleDeactivateStaleSubs(c, end_offset, q->state, q->streamState); |
942 | |
943 | char found = 0; |
944 | if (c->exclusive) { |
945 | u8 *groups = (u8 *)q->streamState + c->groupIterOffset; |
946 | found = mmbit_any_precise(groups, c->numGroups); |
947 | |
948 | } |
949 | |
950 | if (!found && !mmbit_any_precise(active, c->numRepeats)) { |
951 | DEBUG_PRINTF("castle is dead\n" ); |
952 | return 0; |
953 | } |
954 | |
955 | if (castleInAccept(c, q, report, end_offset)) { |
956 | return MO_MATCHES_PENDING; |
957 | } |
958 | |
959 | return 1; |
960 | } |
961 | |
962 | char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q) { |
963 | assert(n && q); |
964 | assert(n->type == CASTLE_NFA); |
965 | DEBUG_PRINTF("entry\n" ); |
966 | |
967 | const struct Castle *c = getImplNfa(n); |
968 | castleReportCurrent(c, q); |
969 | return 0; |
970 | } |
971 | |
972 | char nfaExecCastle_inAccept(const struct NFA *n, ReportID report, |
973 | struct mq *q) { |
974 | assert(n && q); |
975 | assert(n->type == CASTLE_NFA); |
976 | DEBUG_PRINTF("entry\n" ); |
977 | |
978 | const struct Castle *c = getImplNfa(n); |
979 | return castleInAccept(c, q, report, q_cur_offset(q)); |
980 | } |
981 | |
982 | char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) { |
983 | assert(n && q); |
984 | assert(n->type == CASTLE_NFA); |
985 | DEBUG_PRINTF("entry\n" ); |
986 | |
987 | const struct Castle *c = getImplNfa(n); |
988 | const u64a offset = q_cur_offset(q); |
989 | DEBUG_PRINTF("offset=%llu\n" , offset); |
990 | |
991 | if (c->exclusive) { |
992 | u8 *active = (u8 *)q->streamState; |
993 | u8 *groups = active + c->groupIterOffset; |
994 | for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); |
995 | i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { |
996 | u8 *cur = active + i * c->activeIdxSize; |
997 | const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); |
998 | DEBUG_PRINTF("subcastle %u\n" , activeIdx); |
999 | const struct SubCastle *sub = getSubCastle(c, activeIdx); |
1000 | if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) { |
1001 | return 1; |
1002 | } |
1003 | } |
1004 | } |
1005 | |
1006 | if (c->exclusive != PURE_EXCLUSIVE) { |
1007 | const u8 *active = (const u8 *)q->streamState + c->activeOffset; |
1008 | for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); |
1009 | i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { |
1010 | DEBUG_PRINTF("subcastle %u\n" , i); |
1011 | const struct SubCastle *sub = getSubCastle(c, i); |
1012 | if (subCastleInAccept(c, q, sub->report, offset, i)) { |
1013 | return 1; |
1014 | } |
1015 | } |
1016 | } |
1017 | |
1018 | return 0; |
1019 | } |
1020 | |
1021 | |
1022 | char nfaExecCastle_queueInitState(UNUSED const struct NFA *n, struct mq *q) { |
1023 | assert(n && q); |
1024 | assert(n->type == CASTLE_NFA); |
1025 | DEBUG_PRINTF("entry\n" ); |
1026 | |
1027 | const struct Castle *c = getImplNfa(n); |
1028 | assert(q->streamState); |
1029 | if (c->exclusive) { |
1030 | u8 *groups = (u8 *)q->streamState + c->groupIterOffset; |
1031 | mmbit_clear(groups, c->numGroups); |
1032 | } |
1033 | |
1034 | if (c->exclusive != PURE_EXCLUSIVE) { |
1035 | u8 *active = (u8 *)q->streamState + c->activeOffset; |
1036 | mmbit_clear(active, c->numRepeats); |
1037 | } |
1038 | return 0; |
1039 | } |
1040 | |
1041 | char nfaExecCastle_initCompressedState(const struct NFA *n, UNUSED u64a offset, |
1042 | void *state, UNUSED u8 key) { |
1043 | assert(n && state); |
1044 | assert(n->type == CASTLE_NFA); |
1045 | DEBUG_PRINTF("entry\n" ); |
1046 | |
1047 | const struct Castle *c = getImplNfa(n); |
1048 | if (c->exclusive) { |
1049 | u8 *groups = (u8 *)state + c->groupIterOffset; |
1050 | mmbit_clear(groups, c->numGroups); |
1051 | } |
1052 | |
1053 | if (c->exclusive != PURE_EXCLUSIVE) { |
1054 | u8 *active = (u8 *)state + c->activeOffset; |
1055 | mmbit_clear(active, c->numRepeats); |
1056 | } |
1057 | return 0; |
1058 | } |
1059 | |
1060 | static really_inline |
1061 | void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx, |
1062 | const struct mq *q, const u64a offset) { |
1063 | const struct SubCastle *sub = getSubCastle(c, subIdx); |
1064 | const struct RepeatInfo *info = getRepeatInfo(sub); |
1065 | union RepeatControl *rctrl = getControl(q->state, sub); |
1066 | char *packed = (char *)q->streamState + sub->streamStateOffset; |
1067 | DEBUG_PRINTF("sub %u next match %llu\n" , subIdx, |
1068 | repeatNextMatch(info, rctrl, |
1069 | packed + info->packedCtrlSize, offset)); |
1070 | repeatPack(packed, info, rctrl, offset); |
1071 | } |
1072 | |
1073 | char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q, |
1074 | s64a loc) { |
1075 | assert(n && q); |
1076 | assert(n->type == CASTLE_NFA); |
1077 | DEBUG_PRINTF("entry, loc=%lld\n" , loc); |
1078 | |
1079 | const struct Castle *c = getImplNfa(n); |
1080 | |
1081 | // Pack state for all active repeats. |
1082 | const u64a offset = q->offset + loc; |
1083 | DEBUG_PRINTF("offset=%llu\n" , offset); |
1084 | if (c->exclusive) { |
1085 | u8 *active = (u8 *)q->streamState; |
1086 | u8 *groups = active + c->groupIterOffset; |
1087 | for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); |
1088 | i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { |
1089 | u8 *cur = active + i * c->activeIdxSize; |
1090 | const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); |
1091 | DEBUG_PRINTF("packing state for sub %u\n" , activeIdx); |
1092 | subCastleQueueCompressState(c, activeIdx, q, offset); |
1093 | } |
1094 | } |
1095 | |
1096 | if (c->exclusive != PURE_EXCLUSIVE) { |
1097 | const u8 *active = (const u8 *)q->streamState + c->activeOffset; |
1098 | for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); |
1099 | i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { |
1100 | DEBUG_PRINTF("packing state for sub %u\n" , i); |
1101 | subCastleQueueCompressState(c, i, q, offset); |
1102 | } |
1103 | } |
1104 | return 0; |
1105 | } |
1106 | |
1107 | static really_inline |
1108 | void subCastleExpandState(const struct Castle *c, const u32 subIdx, |
1109 | void *dest, const void *src, const u64a offset) { |
1110 | const struct SubCastle *sub = getSubCastle(c, subIdx); |
1111 | const struct RepeatInfo *info = getRepeatInfo(sub); |
1112 | DEBUG_PRINTF("unpacking state for sub %u\n" , subIdx); |
1113 | union RepeatControl *rctrl = getControl(dest, sub); |
1114 | const char *packed = (const char *)src + sub->streamStateOffset; |
1115 | repeatUnpack(packed, info, offset, rctrl); |
1116 | DEBUG_PRINTF("sub %u next match %llu\n" , subIdx, |
1117 | repeatNextMatch(info, rctrl, |
1118 | packed + info->packedCtrlSize, offset)); |
1119 | } |
1120 | |
1121 | char nfaExecCastle_expandState(const struct NFA *n, void *dest, const void *src, |
1122 | u64a offset, UNUSED u8 key) { |
1123 | assert(n && dest && src); |
1124 | assert(n->type == CASTLE_NFA); |
1125 | DEBUG_PRINTF("entry, src=%p, dest=%p, offset=%llu\n" , src, dest, offset); |
1126 | |
1127 | const struct Castle *c = getImplNfa(n); |
1128 | |
1129 | if (c->exclusive) { |
1130 | const u8 *active = (const u8 *)src; |
1131 | const u8 *groups = active + c->groupIterOffset; |
1132 | for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); |
1133 | i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { |
1134 | const u8 *cur = active + i * c->activeIdxSize; |
1135 | const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); |
1136 | subCastleExpandState(c, activeIdx, dest, src, offset); |
1137 | } |
1138 | } |
1139 | |
1140 | if (c->exclusive != PURE_EXCLUSIVE) { |
1141 | // Unpack state for all active repeats. |
1142 | const u8 *active = (const u8 *)src + c->activeOffset; |
1143 | for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); |
1144 | i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { |
1145 | subCastleExpandState(c, i, dest, src, offset); |
1146 | } |
1147 | } |
1148 | return 0; |
1149 | } |
1150 | |