1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief API for handling bounded repeats.
31 *
32 * This file provides an internal API for handling bounded repeats of character
33 * classes. It is used by the Large Bounded Repeat (LBR) engine and by the
34 * bounded repeat handling in the LimEx NFA engine as well.
35 */
36#include "repeat.h"
37#include "util/bitutils.h"
38#include "util/multibit.h"
39#include "util/pack_bits.h"
40#include "util/partial_store.h"
41#include "util/unaligned.h"
42
43#include <stdint.h>
44#include <string.h>
45
46/** \brief Returns the total capacity of the ring.
47 * Note that it's currently one greater than repeatMax so that we can handle
48 * cases where the tug and pos triggers overlap. */
49static
50u32 ringCapacity(const struct RepeatInfo *info) {
51 return info->repeatMax + 1;
52}
53
54/** \brief Returns the number of elements currently in the ring. Note that if
55 * the first and last indices are equal, the ring is full. */
56static
57u32 ringOccupancy(const struct RepeatRingControl *xs, const u32 ringSize) {
58 if (xs->last > xs->first) {
59 return xs->last - xs->first;
60 } else { // wrapped
61 return ringSize - (xs->first - xs->last);
62 }
63}
64
65/** \brief Returns the offset of the _last_ top stored in the ring. */
66static
67u64a ringLastTop(const struct RepeatRingControl *xs, const u32 ringSize) {
68 return xs->offset + ringOccupancy(xs, ringSize) - 1;
69}
70
71#if !defined(NDEBUG) || defined(DUMP_SUPPORT)
72/** \brief For debugging: returns the total capacity of the range list. */
73static UNUSED
74u32 rangeListCapacity(const struct RepeatInfo *info) {
75 u32 d = info->repeatMax - info->repeatMin;
76 assert(d > 0); // should be in a RING model!
77 return 2 * ((info->repeatMax / d) + 1);
78}
79#endif
80
81#ifdef DEBUG
82static
83void dumpRing(const struct RepeatInfo *info, const struct RepeatRingControl *xs,
84 const u8 *ring) {
85 const u32 ringSize = ringCapacity(info);
86 DEBUG_PRINTF("ring (occ %u/%u, %u->%u): ", ringOccupancy(xs, ringSize),
87 ringSize, xs->first, xs->last);
88
89 u16 i = xs->first, n = 0;
90 do {
91 if (mmbit_isset(ring, ringSize, i)) {
92 u64a ringOffset = xs->offset + n;
93 printf("%llu ", ringOffset);
94 }
95 ++i, ++n;
96 if (i == ringSize) {
97 i = 0;
98 }
99 } while (i != xs->last);
100 printf("\n");
101}
102
103static
104void dumpRange(const struct RepeatInfo *info,
105 const struct RepeatRangeControl *xs, const u16 *ring) {
106 const u32 ringSize = rangeListCapacity(info);
107 DEBUG_PRINTF("ring (occ %u/%u): ", xs->num, ringSize);
108
109 if (xs->num) {
110 for (u32 i = 0; i < xs->num; i++) {
111 printf("%llu ", xs->offset + unaligned_load_u16(ring + i));
112 }
113 } else {
114 printf("empty");
115 }
116 printf("\n");
117}
118
119static
120void dumpBitmap(const struct RepeatBitmapControl *xs) {
121 DEBUG_PRINTF("bitmap (base=%llu): ", xs->offset);
122 u64a bitmap = xs->bitmap;
123 while (bitmap) {
124 printf("%llu ", xs->offset + findAndClearLSB_64(&bitmap));
125 }
126 printf("\n");
127}
128
129static
130void dumpTrailer(const struct RepeatInfo *info,
131 const struct RepeatTrailerControl *xs) {
132 const u64a m_width = info->repeatMax - info->repeatMin;
133 DEBUG_PRINTF("trailer: current extent is [%llu,%llu]", xs->offset,
134 xs->offset + m_width);
135 u64a bitmap = xs->bitmap;
136 if (bitmap) {
137 printf(", also matches at: ");
138 while (bitmap) {
139 u32 idx = findAndClearMSB_64(&bitmap);
140 printf("%llu ", xs->offset - idx - 1);
141 }
142 } else {
143 printf(", no earlier matches");
144 }
145 printf("\n");
146}
147
148#endif // DEBUG
149
150#ifndef NDEBUG
151/** \brief For debugging: returns true if the range is ordered with no dupes. */
152static UNUSED
153int rangeListIsOrdered(const struct RepeatRangeControl *xs, const u16 *ring) {
154 for (u32 i = 1; i < xs->num; i++) {
155 u16 a = unaligned_load_u16(ring + i - 1);
156 u16 b = unaligned_load_u16(ring + i);
157 if (a >= b) {
158 return 0;
159 }
160 }
161 return 1;
162}
163#endif
164
165u64a repeatLastTopRing(const struct RepeatInfo *info,
166 const union RepeatControl *ctrl) {
167 const u32 ringSize = ringCapacity(info);
168 return ringLastTop(&ctrl->ring, ringSize);
169}
170
171u64a repeatLastTopRange(const union RepeatControl *ctrl, const void *state) {
172 const u16 *ring = (const u16 *)state;
173 const struct RepeatRangeControl *xs = &ctrl->range;
174 assert(xs->num);
175 return xs->offset + unaligned_load_u16(ring + xs->num - 1);
176}
177
178u64a repeatLastTopBitmap(const union RepeatControl *ctrl) {
179 const struct RepeatBitmapControl *xs = &ctrl->bitmap;
180 if (!xs->bitmap) {
181 /* last top was too long ago */
182 return 0;
183 }
184 return xs->offset + 63 - clz64(xs->bitmap);
185}
186
187u64a repeatLastTopTrailer(const struct RepeatInfo *info,
188 const union RepeatControl *ctrl) {
189 const struct RepeatTrailerControl *xs = &ctrl->trailer;
190 assert(xs->offset >= info->repeatMin);
191 return xs->offset - info->repeatMin;
192}
193
194u64a repeatNextMatchRing(const struct RepeatInfo *info,
195 const union RepeatControl *ctrl, const void *state,
196 u64a offset) {
197 const struct RepeatRingControl *xs = &ctrl->ring;
198 const u8 *ring = (const u8 *)state;
199 const u32 ringSize = ringCapacity(info);
200
201 // We should have at least one top stored.
202 assert(mmbit_any(ring, ringSize));
203 assert(info->repeatMax < REPEAT_INF);
204
205 // Increment offset, as we want the NEXT match.
206 offset++;
207
208 const u64a base_offset = xs->offset;
209 DEBUG_PRINTF("offset=%llu, base_offset=%llu\n", offset, base_offset);
210
211 u64a delta = offset - base_offset;
212 if (offset < base_offset || delta < info->repeatMin) {
213 DEBUG_PRINTF("before min repeat\n");
214 return base_offset + info->repeatMin;
215 }
216 if (offset > ringLastTop(xs, ringSize) + info->repeatMax) {
217 DEBUG_PRINTF("ring is stale\n");
218 return 0; // no more matches
219 }
220
221 DEBUG_PRINTF("delta=%llu\n", delta);
222 u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
223 DEBUG_PRINTF("lower=%llu\n", lower);
224
225 assert(lower < ringSize);
226
227 // First scan, either to xs->last if there's no wrap-around or ringSize
228 // (end of the underlying multibit) if we are wrapping.
229
230 u32 begin = xs->first + lower;
231 if (begin >= ringSize) {
232 // This branch and sub tested a lot faster than using % (integer div).
233 begin -= ringSize;
234 }
235 const u32 end = begin >= xs->last ? ringSize : xs->last;
236 u32 i = mmbit_iterate_bounded(ring, ringSize, begin, end);
237 if (i != MMB_INVALID) {
238 u32 j = i - begin + lower;
239 return MAX(offset, base_offset + j + info->repeatMin);
240 }
241
242 // A second scan is necessary if we need to cope with wrap-around in the
243 // ring buffer.
244
245 if (begin >= xs->last) {
246 i = mmbit_iterate_bounded(ring, ringSize, 0, xs->last);
247 if (i != MMB_INVALID) {
248 u32 j = i + (ringSize - begin) + lower;
249 return MAX(offset, base_offset + j + info->repeatMin);
250 }
251 }
252
253 return 0;
254}
255
256u64a repeatNextMatchRange(const struct RepeatInfo *info,
257 const union RepeatControl *ctrl, const void *state,
258 u64a offset) {
259 const struct RepeatRangeControl *xs = &ctrl->range;
260 const u16 *ring = (const u16 *)state;
261
262 assert(xs->num > 0);
263 assert(xs->num <= rangeListCapacity(info));
264 assert(rangeListIsOrdered(xs, ring));
265 assert(info->repeatMax < REPEAT_INF);
266
267 for (u32 i = 0; i < xs->num; i++) {
268 u64a base = xs->offset + unaligned_load_u16(ring + i);
269 u64a first = base + info->repeatMin;
270 if (offset < first) {
271 return first;
272 }
273 if (offset < base + info->repeatMax) {
274 return offset + 1;
275 }
276 }
277
278 return 0;
279}
280
281u64a repeatNextMatchBitmap(const struct RepeatInfo *info,
282 const union RepeatControl *ctrl, u64a offset) {
283 const struct RepeatBitmapControl *xs = &ctrl->bitmap;
284 const u64a base = xs->offset;
285 u64a bitmap = xs->bitmap;
286
287 // FIXME: quick exit if there is no match, based on last top in bitmap?
288
289 while (bitmap) {
290 u64a top = base + findAndClearLSB_64(&bitmap);
291 if (offset < top + info->repeatMin) {
292 return top + info->repeatMin;
293 }
294 if (offset < top + info->repeatMax) {
295 return offset + 1;
296 }
297 }
298
299 return 0; // No more matches.
300}
301
302u64a repeatNextMatchTrailer(const struct RepeatInfo *info,
303 const union RepeatControl *ctrl, u64a offset) {
304 const struct RepeatTrailerControl *xs = &ctrl->trailer;
305 const u32 m_width = info->repeatMax - info->repeatMin;
306
307 DEBUG_PRINTF("offset=%llu, xs->offset=%llu\n", offset, xs->offset);
308 DEBUG_PRINTF("{%u,%u} repeat, m_width=%u\n", info->repeatMin,
309 info->repeatMax, m_width);
310
311 assert(xs->offset >= info->repeatMin);
312
313 if (offset >= xs->offset + m_width) {
314 DEBUG_PRINTF("no more matches\n");
315 return 0;
316 }
317
318 if (offset >= xs->offset) {
319 DEBUG_PRINTF("inside most recent match window, next match %llu\n",
320 offset + 1);
321 return offset + 1;
322 }
323
324 // Offset is before the match window, we need to consult the bitmap of
325 // earlier match offsets.
326 u64a bitmap = xs->bitmap;
327
328 u64a diff = xs->offset - offset;
329 DEBUG_PRINTF("diff=%llu\n", diff);
330 if (diff <= 64) {
331 assert(diff);
332 bitmap &= (1ULL << (diff - 1)) - 1;
333 }
334 DEBUG_PRINTF("bitmap = 0x%llx\n", bitmap);
335 if (bitmap) {
336 u32 idx = 63 - clz64(bitmap);
337 DEBUG_PRINTF("clz=%u, idx = %u -> offset %llu\n", clz64(bitmap), idx,
338 xs->offset - idx);
339 DEBUG_PRINTF("next match at %llu\n", xs->offset - idx - 1);
340 u64a next_match = xs->offset - idx - 1;
341 assert(next_match > offset);
342 return next_match;
343 }
344
345 DEBUG_PRINTF("next match is start of match window, %llu\n", xs->offset);
346 return xs->offset;
347}
348
349/** \brief Store the first top in the ring buffer. */
350static
351void storeInitialRingTop(struct RepeatRingControl *xs, u8 *ring,
352 u64a offset, const u32 ringSize) {
353 DEBUG_PRINTF("ring=%p, ringSize=%u\n", ring, ringSize);
354 xs->offset = offset;
355 mmbit_clear(ring, ringSize);
356 mmbit_set(ring, ringSize, 0);
357 xs->first = 0;
358 xs->last = 1;
359}
360
361static really_inline
362char ringIsStale(const struct RepeatRingControl *xs, const u32 ringSize,
363 const u64a offset) {
364 u64a finalMatch = ringLastTop(xs, ringSize);
365 if (offset - finalMatch >= ringSize) {
366 DEBUG_PRINTF("all matches in ring are stale\n");
367 return 1;
368 }
369
370 return 0;
371}
372
373void repeatStoreRing(const struct RepeatInfo *info, union RepeatControl *ctrl,
374 void *state, u64a offset, char is_alive) {
375 struct RepeatRingControl *xs = &ctrl->ring;
376 u8 *ring = (u8 *)state;
377 const u32 ringSize = ringCapacity(info);
378 assert(ringSize > 0);
379
380 DEBUG_PRINTF("storing top for offset %llu in ring\n", offset);
381
382 if (!is_alive || ringIsStale(xs, ringSize, offset)) {
383 storeInitialRingTop(xs, ring, offset, ringSize);
384 } else {
385 assert(offset > ringLastTop(xs, ringSize)); // Dupe or out of order.
386 u32 occ = ringOccupancy(xs, ringSize);
387 u64a diff = offset - xs->offset;
388 DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
389 if (diff >= ringSize) {
390 u32 push = diff - ringSize + 1;
391 DEBUG_PRINTF("push ring %u\n", push);
392 xs->first += push;
393 if (xs->first >= ringSize) {
394 xs->first -= ringSize;
395 }
396 xs->offset += push;
397 diff -= push;
398 occ -= push;
399 }
400
401 // There's now room in the ring for this top, so we write a run of
402 // zeroes, then a one.
403 DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
404 assert(diff < ringSize);
405 assert(diff >= occ);
406 u32 n = diff - occ;
407
408 u32 i = xs->last + n;
409
410 mmbit_unset_range(ring, ringSize, xs->last, MIN(i, ringSize));
411 if (i >= ringSize) {
412 i -= ringSize;
413 mmbit_unset_range(ring, ringSize, 0, i);
414 }
415
416 assert(i != xs->first);
417 DEBUG_PRINTF("set bit %u\n", i);
418 mmbit_set(ring, ringSize, i);
419 xs->last = i + 1;
420 if (xs->last == ringSize) {
421 xs->last = 0;
422 }
423 }
424
425 // Our ring indices shouldn't have spiraled off into uncharted space.
426 assert(xs->first < ringSize);
427 assert(xs->last < ringSize);
428
429#ifdef DEBUG
430 DEBUG_PRINTF("post-store ring state\n");
431 dumpRing(info, xs, ring);
432#endif
433
434 // The final top stored in our ring should be the one we just wrote in.
435 assert(ringLastTop(xs, ringSize) == offset);
436}
437
438static really_inline
439void storeInitialRangeTop(struct RepeatRangeControl *xs, u16 *ring,
440 u64a offset) {
441 xs->offset = offset;
442 xs->num = 1;
443 unaligned_store_u16(ring, 0);
444}
445
446void repeatStoreRange(const struct RepeatInfo *info, union RepeatControl *ctrl,
447 void *state, u64a offset, char is_alive) {
448 struct RepeatRangeControl *xs = &ctrl->range;
449 u16 *ring = (u16 *)state;
450
451 if (!is_alive) {
452 DEBUG_PRINTF("storing initial top at %llu\n", offset);
453 storeInitialRangeTop(xs, ring, offset);
454 return;
455 }
456
457 DEBUG_PRINTF("storing top at %llu, list currently has %u/%u elements\n",
458 offset, xs->num, rangeListCapacity(info));
459
460#ifdef DEBUG
461 dumpRange(info, xs, ring);
462#endif
463
464 // Walk ring from front. Identify the number of stale elements, and shift
465 // the whole ring to delete them.
466 u32 i = 0;
467 for (; i < xs->num; i++) {
468 u64a this_offset = xs->offset + unaligned_load_u16(ring + i);
469 DEBUG_PRINTF("this_offset=%llu, diff=%llu\n", this_offset,
470 offset - this_offset);
471 if (offset - this_offset <= info->repeatMax) {
472 break;
473 }
474 }
475
476 if (i == xs->num) {
477 DEBUG_PRINTF("whole ring is stale\n");
478 storeInitialRangeTop(xs, ring, offset);
479 return;
480 } else if (i > 0) {
481 DEBUG_PRINTF("expiring %u stale tops\n", i);
482 u16 first_offset = unaligned_load_u16(ring + i); // first live top
483 for (u32 j = 0; j < xs->num - i; j++) {
484 u16 val = unaligned_load_u16(ring + i + j);
485 assert(val >= first_offset);
486 unaligned_store_u16(ring + j, val - first_offset);
487 }
488 xs->offset += first_offset;
489 xs->num -= i;
490 }
491
492#ifdef DEBUG
493 DEBUG_PRINTF("post-expire:\n");
494 dumpRange(info, xs, ring);
495#endif
496
497 if (xs->num == 1) {
498 goto append;
499 }
500
501 // Let d = repeatMax - repeatMin
502 // Examine penultimate entry x[-2].
503 // If (offset - x[-2] <= d), then last entry x[-1] can be replaced with
504 // entry for offset.
505 assert(xs->num >= 2);
506 u32 d = info->repeatMax - info->repeatMin;
507 u64a penultimate_offset =
508 xs->offset + unaligned_load_u16(ring + xs->num - 2);
509 if (offset - penultimate_offset <= d) {
510 assert(offset - xs->offset <= (u16)-1);
511 unaligned_store_u16(ring + xs->num - 1, offset - xs->offset);
512 goto done;
513 }
514
515 // Otherwise, write a new entry for offset and return.
516
517append:
518 assert(offset - xs->offset <= (u16)-1);
519 assert(xs->num < rangeListCapacity(info));
520 unaligned_store_u16(ring + xs->num, offset - xs->offset);
521 xs->num++;
522
523done:
524 assert(rangeListIsOrdered(xs, ring));
525}
526
527void repeatStoreBitmap(const struct RepeatInfo *info, union RepeatControl *ctrl,
528 u64a offset, char is_alive) {
529 DEBUG_PRINTF("{%u,%u} repeat, storing top at %llu\n", info->repeatMin,
530 info->repeatMax, offset);
531
532 struct RepeatBitmapControl *xs = &ctrl->bitmap;
533 if (!is_alive || !xs->bitmap) {
534 DEBUG_PRINTF("storing initial top at %llu\n", offset);
535 xs->offset = offset;
536 xs->bitmap = 1U;
537 return;
538 }
539
540#ifdef DEBUG
541 DEBUG_PRINTF("pre-store:\n");
542 dumpBitmap(xs);
543#endif
544
545 assert(offset >= xs->offset);
546
547 u64a last_top = xs->offset + 63 - clz64(xs->bitmap);
548 if (offset > last_top + info->repeatMax) {
549 DEBUG_PRINTF("bitmap stale, storing initial top\n");
550 xs->offset = offset;
551 xs->bitmap = 1U;
552 return;
553 }
554
555 u64a diff = offset - xs->offset;
556 if (diff >= info->repeatMax + 1) {
557 DEBUG_PRINTF("need expire, diff=%llu\n", diff);
558 u64a push = diff - info->repeatMax;
559 xs->offset += push;
560 xs->bitmap = push >= 64 ? 0 : xs->bitmap >> push;
561 DEBUG_PRINTF("pushed xs->offset to %llu\n", xs->offset);
562 }
563
564 // Write a new entry.
565 diff = offset - xs->offset;
566 assert(diff < 64);
567 xs->bitmap |= (1ULL << diff);
568
569#ifdef DEBUG
570 DEBUG_PRINTF("post-store:\n");
571 dumpBitmap(xs);
572#endif
573}
574
575/** \brief Returns 1 if the ring has a match between (logical) index \a lower
576 * and \a upper, excluding \a upper. */
577static
578int ringHasMatch(const struct RepeatRingControl *xs, const u8 *ring,
579 const u32 ringSize, u32 lower, u32 upper) {
580 assert(lower < upper);
581 assert(lower < ringSize);
582 assert(upper <= ringSize);
583
584 u32 i = xs->first + lower;
585 if (i >= ringSize) {
586 i -= ringSize;
587 }
588
589 // Performance tweak: if we're looking at a fixed repeat, we can just use
590 // mmbit_isset.
591 if (lower + 1 == upper) {
592 return mmbit_isset(ring, ringSize, i);
593 }
594
595 u32 end = xs->first + upper;
596 if (end >= ringSize) {
597 end -= ringSize;
598 }
599
600 // First scan, either to end if there's no wrap-around or ringSize (end of
601 // the underlying multibit) if we are wrapping.
602
603 u32 scan_end = i < end ? end : ringSize;
604 u32 m = mmbit_iterate_bounded(ring, ringSize, i, scan_end);
605 if (m != MMB_INVALID) {
606 return 1;
607 }
608
609 // A second scan is necessary if we need to cope with wrap-around in the
610 // ring buffer.
611
612 if (i >= end) {
613 m = mmbit_iterate_bounded(ring, ringSize, 0, end);
614 return m != MMB_INVALID;
615 }
616
617 return 0;
618}
619
620/** Return a mask of ones in bit positions [0..v]. */
621static really_inline
622u64a mask_ones_to(u32 v) {
623 if (v < 63) {
624 return (1ULL << (v + 1)) - 1;
625 } else {
626 return ~(0ULL);
627 }
628}
629
630void repeatStoreTrailer(const struct RepeatInfo *info,
631 union RepeatControl *ctrl, u64a offset, char is_alive) {
632 DEBUG_PRINTF("{%u,%u} repeat, top at %llu\n", info->repeatMin,
633 info->repeatMax, offset);
634
635 struct RepeatTrailerControl *xs = &ctrl->trailer;
636
637 /* The TRAILER repeat model stores the following data in its control block:
638 *
639 * 1. offset, which is the min extent of the most recent match window
640 * (i.e. corresponding to the most recent top)
641 * 2. bitmap, which is a bitmap of up to repeatMin matches before
642 * the min extent offset.
643 */
644
645 const u64a next_extent = offset + info->repeatMin;
646
647 if (!is_alive) {
648 xs->offset = next_extent;
649 xs->bitmap = 0;
650 DEBUG_PRINTF("initial top, set extent to %llu\n", next_extent);
651 return;
652 }
653
654#ifdef DEBUG
655 DEBUG_PRINTF("pre-store:\n");
656 dumpTrailer(info, xs);
657#endif
658
659 const u32 m_width = info->repeatMax - info->repeatMin;
660 DEBUG_PRINTF("most recent match window is [%llu,%llu]\n", xs->offset,
661 xs->offset + m_width);
662
663 assert(next_extent > xs->offset);
664 u64a diff = next_extent - xs->offset;
665 DEBUG_PRINTF("diff=%llu, m_width=%u\n", diff, m_width);
666
667 assert(diff);
668 xs->bitmap = diff < 64 ? xs->bitmap << diff : 0;
669
670 // Switch on bits in the bitmask corresponding to matches in the previous
671 // match window.
672 if (diff <= m_width) {
673 u64a m = mask_ones_to(diff - 1);
674 xs->bitmap |= m;
675 } else {
676 u64a shift = diff - m_width - 1;
677 if (shift < 64) {
678 u64a m = mask_ones_to(m_width);
679 m <<= shift;
680 xs->bitmap |= m;
681 }
682 }
683
684 DEBUG_PRINTF("bitmap=0x%llx\n", xs->bitmap);
685
686 // Update max extent.
687 xs->offset = next_extent;
688
689 // Trim stale history: we only need repeatMin bytes of history.
690 if (info->repeatMin < 63) {
691 u64a mask = (1ULL << (info->repeatMin + 1)) - 1;
692 xs->bitmap &= mask;
693 }
694
695#ifdef DEBUG
696 DEBUG_PRINTF("post-store:\n");
697 dumpTrailer(info, xs);
698#endif
699}
700
701enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
702 const union RepeatControl *ctrl,
703 const void *state, u64a offset) {
704 const struct RepeatRingControl *xs = &ctrl->ring;
705 const u8 *ring = (const u8 *)state;
706 const u32 ringSize = ringCapacity(info);
707
708 assert(mmbit_any(ring, ringSize));
709 assert(offset >= xs->offset);
710
711 DEBUG_PRINTF("check: offset=%llu, repeat=[%u,%u]\n", offset,
712 info->repeatMin, info->repeatMax);
713#ifdef DEBUG
714 DEBUG_PRINTF("ring state\n");
715 dumpRing(info, xs, ring);
716#endif
717
718 if (offset - xs->offset < info->repeatMin) {
719 DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
720 return REPEAT_NOMATCH;
721 }
722
723 if (offset - ringLastTop(xs, ringSize) >= ringSize) {
724 DEBUG_PRINTF("ring is stale\n");
725 return REPEAT_STALE;
726 }
727
728 // If we're not stale, delta fits in the range [repeatMin, lastTop +
729 // repeatMax], which fits in a u32.
730 assert(offset - xs->offset < UINT32_MAX);
731 u32 delta = (u32)(offset - xs->offset);
732 DEBUG_PRINTF("delta=%u\n", delta);
733
734 // Find the bounds on possible matches in the ring buffer.
735 u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
736 u32 upper = MIN(delta - info->repeatMin + 1, ringOccupancy(xs, ringSize));
737
738 if (lower >= upper) {
739 DEBUG_PRINTF("no matches to check\n");
740 return REPEAT_NOMATCH;
741 }
742
743 DEBUG_PRINTF("possible match indices=[%u,%u]\n", lower, upper);
744 if (ringHasMatch(xs, ring, ringSize, lower, upper)) {
745 return REPEAT_MATCH;
746 }
747
748 return REPEAT_NOMATCH;
749}
750
751enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
752 const union RepeatControl *ctrl,
753 const void *state, u64a offset) {
754 const struct RepeatRangeControl *xs = &ctrl->range;
755 const u16 *ring = (const u16 *)state;
756
757 assert(xs->num > 0);
758 assert(xs->num <= rangeListCapacity(info));
759 assert(rangeListIsOrdered(xs, ring));
760
761 // Walk the ring. For each entry x:
762 // if (offset - x) falls inside repeat bounds, return success.
763
764 // It may be worth doing tests on first and last elements first to bail
765 // early if the whole ring is too young or stale.
766
767 DEBUG_PRINTF("check %u (of %u) elements, offset %llu, bounds={%u,%u}\n",
768 xs->num, rangeListCapacity(info), offset,
769 info->repeatMin, info->repeatMax);
770#ifdef DEBUG
771 dumpRange(info, xs, ring);
772#endif
773
774 // Quick pre-check for minimum.
775 assert(offset >= xs->offset);
776 if (offset - xs->offset < info->repeatMin) {
777 DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
778 return REPEAT_NOMATCH;
779 }
780
781 // We check the most recent offset first, as we can establish staleness.
782 u64a match = xs->offset + unaligned_load_u16(ring + xs->num - 1);
783 assert(offset >= match);
784 u64a diff = offset - match;
785 if (diff > info->repeatMax) {
786 DEBUG_PRINTF("range list is stale\n");
787 return REPEAT_STALE;
788 } else if (diff >= info->repeatMin && diff <= info->repeatMax) {
789 return REPEAT_MATCH;
790 }
791
792 // Check the other offsets in the list.
793 u32 count = xs->num - 1;
794 for (u32 i = 0; i < count; i++) {
795 match = xs->offset + unaligned_load_u16(ring + i);
796 assert(offset >= match);
797 diff = offset - match;
798 if (diff >= info->repeatMin && diff <= info->repeatMax) {
799 return REPEAT_MATCH;
800 }
801 }
802
803 return REPEAT_NOMATCH;
804}
805
806enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
807 const union RepeatControl *ctrl,
808 u64a offset) {
809 const struct RepeatBitmapControl *xs = &ctrl->bitmap;
810
811 DEBUG_PRINTF("checking if offset=%llu is a match\n", offset);
812
813#ifdef DEBUG
814 dumpBitmap(xs);
815#endif
816
817 u64a bitmap = xs->bitmap;
818 if (!bitmap) {
819 DEBUG_PRINTF("no tops; stale\n");
820 return REPEAT_STALE;
821 }
822
823 // Quick pre-check for minimum.
824 const u64a base = xs->offset;
825 assert(offset >= base);
826 if (offset - base < info->repeatMin) {
827 DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
828 return REPEAT_NOMATCH;
829 }
830
831 // We check the most recent offset first, as we can establish staleness.
832 u64a match = base + findAndClearMSB_64(&bitmap);
833 DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
834 assert(offset >= match);
835 u64a diff = offset - match;
836 if (diff > info->repeatMax) {
837 DEBUG_PRINTF("stale\n");
838 return REPEAT_STALE;
839 } else if (diff >= info->repeatMin && diff <= info->repeatMax) {
840 return REPEAT_MATCH;
841 }
842
843 while (bitmap) {
844 match = base + findAndClearLSB_64(&bitmap);
845 DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
846 assert(offset >= match);
847 diff = offset - match;
848 if (diff >= info->repeatMin && diff <= info->repeatMax) {
849 return REPEAT_MATCH;
850 }
851 }
852
853 return REPEAT_NOMATCH;
854}
855
856enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info,
857 const union RepeatControl *ctrl,
858 u64a offset) {
859 const struct RepeatTrailerControl *xs = &ctrl->trailer;
860 const u32 m_width = info->repeatMax - info->repeatMin;
861
862 DEBUG_PRINTF("offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n", offset,
863 xs->offset, xs->bitmap);
864
865 if (offset > xs->offset + m_width) {
866 DEBUG_PRINTF("stale\n");
867 return REPEAT_STALE;
868 }
869
870 if (offset >= xs->offset) {
871 DEBUG_PRINTF("in match window\n");
872 return REPEAT_MATCH;
873 }
874
875 if (offset >= xs->offset - info->repeatMin) {
876 u32 idx = xs->offset - offset - 1;
877 DEBUG_PRINTF("check bitmap idx %u\n", idx);
878 assert(idx < 64);
879 if (xs->bitmap & (1ULL << idx)) {
880 DEBUG_PRINTF("match in bitmap\n");
881 return REPEAT_MATCH;
882 }
883 }
884
885 DEBUG_PRINTF("no match\n");
886 return REPEAT_NOMATCH;
887}
888
889/** \brief True if the given value can be packed into len bytes. */
890static really_inline
891int fits_in_len_bytes(u64a val, u32 len) {
892 if (len >= 8) {
893 return 1;
894 }
895 return val <= (1ULL << (len * 8));
896}
897
898static really_inline
899void storePackedRelative(char *dest, u64a val, u64a offset, u64a max, u32 len) {
900 assert(val <= offset);
901 assert(fits_in_len_bytes(max, len));
902 u64a delta = offset - val;
903 if (delta >= max) {
904 delta = max;
905 }
906 DEBUG_PRINTF("delta %llu\n", delta);
907 assert(fits_in_len_bytes(delta, len));
908 partial_store_u64a(dest, delta, len);
909}
910
911static
912void repeatPackRing(char *dest, const struct RepeatInfo *info,
913 const union RepeatControl *ctrl, u64a offset) {
914 const struct RepeatRingControl *xs = &ctrl->ring;
915 const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
916 const u32 offset_len = info->packedCtrlSize - ring_indices_len;
917
918 // Write out packed relative base offset.
919 assert(info->packedCtrlSize > ring_indices_len);
920 storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
921
922 // Write out ring indices.
923 if (ring_indices_len == 4) {
924 unaligned_store_u16(dest + offset_len, xs->first);
925 unaligned_store_u16(dest + offset_len + 2, xs->last);
926 } else {
927 assert(xs->first < 256 && xs->last < 256);
928 u8 *indices = (u8 *)dest + offset_len;
929 indices[0] = xs->first;
930 indices[1] = xs->last;
931 }
932}
933
934static
935void repeatPackOffset(char *dest, const struct RepeatInfo *info,
936 const union RepeatControl *ctrl, u64a offset) {
937 const struct RepeatOffsetControl *xs = &ctrl->offset;
938 DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon);
939 if (!info->packedCtrlSize) {
940 assert(info->type == REPEAT_ALWAYS);
941 DEBUG_PRINTF("externally guarded .*\n");
942 return;
943 }
944 storePackedRelative(dest, xs->offset, offset, info->horizon,
945 info->packedCtrlSize);
946}
947
948static
949void repeatPackRange(char *dest, const struct RepeatInfo *info,
950 const union RepeatControl *ctrl, u64a offset) {
951 const struct RepeatRangeControl *xs = &ctrl->range;
952
953 // Write out packed relative base offset.
954 assert(info->packedCtrlSize > 1);
955 storePackedRelative(dest, xs->offset, offset, info->horizon,
956 info->packedCtrlSize - 1);
957
958 // Write out range number of elements.
959 dest[info->packedCtrlSize - 1] = xs->num;
960}
961
962static
963void repeatPackBitmap(char *dest, const struct RepeatInfo *info,
964 const union RepeatControl *ctrl, u64a offset) {
965 const struct RepeatBitmapControl *xs = &ctrl->bitmap;
966 const u32 bound = info->repeatMax;
967
968 assert(offset >= xs->offset);
969 u64a new_base = offset > bound ? offset - bound : 0;
970
971 // Shift bitmap to begin at new_base rather than xs->offset.
972 u64a bitmap = xs->bitmap;
973 if (new_base >= xs->offset) {
974 u64a shift = new_base - xs->offset;
975 bitmap = shift < 64 ? bitmap >> shift : 0;
976 } else {
977 u64a shift = xs->offset - new_base;
978 bitmap = shift < 64 ? bitmap << shift : 0;
979 }
980
981 DEBUG_PRINTF("packing %llu into %u bytes\n", bitmap, info->packedCtrlSize);
982
983 // Write out packed bitmap.
984 assert(fits_in_len_bytes(bitmap, info->packedCtrlSize));
985 partial_store_u64a(dest, bitmap, info->packedCtrlSize);
986}
987
988static
989void repeatPackSparseOptimalP(char *dest, const struct RepeatInfo *info,
990 const union RepeatControl *ctrl, u64a offset) {
991 const struct RepeatRingControl *xs = &ctrl->ring;
992 // set ring index pointer according to patch count
993 const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
994 const u32 offset_len = info->packedCtrlSize - ring_indices_len;
995
996 // Write out packed relative base offset.
997 assert(info->packedCtrlSize > ring_indices_len);
998 storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
999
1000 // Write out ring indices.
1001 if (ring_indices_len == 4) {
1002 unaligned_store_u16(dest + offset_len, xs->first);
1003 unaligned_store_u16(dest + offset_len + 2, xs->last);
1004 } else {
1005 assert(xs->first < 256 && xs->last < 256);
1006 u8 *indices = (u8 *)dest + offset_len;
1007 indices[0] = xs->first;
1008 indices[1] = xs->last;
1009 }
1010
1011}
1012
1013static
1014void repeatPackTrailer(char *dest, const struct RepeatInfo *info,
1015 const union RepeatControl *ctrl, u64a offset) {
1016 const struct RepeatTrailerControl *xs = &ctrl->trailer;
1017
1018 DEBUG_PRINTF("saving: offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n",
1019 offset, xs->offset, xs->bitmap);
1020
1021 // XXX: xs->offset may be zero in the NFA path (effectively uninitialized).
1022 u64a top;
1023 if (xs->offset) {
1024 assert(xs->offset >= info->repeatMin);
1025 top = xs->offset - info->repeatMin;
1026 } else {
1027 top = 0;
1028 }
1029
1030 top = offset - top; // Pack top relative to offset.
1031
1032 u64a v[2];
1033 v[0] = MIN(top, info->horizon);
1034 v[1] = xs->bitmap;
1035
1036 pack_bits_64(dest, v, info->packedFieldSizes, 2);
1037}
1038
1039void repeatPack(char *dest, const struct RepeatInfo *info,
1040 const union RepeatControl *ctrl, u64a offset) {
1041 assert(dest && info && ctrl);
1042
1043 switch ((enum RepeatType)info->type) {
1044 case REPEAT_RING:
1045 repeatPackRing(dest, info, ctrl, offset);
1046 break;
1047 case REPEAT_FIRST:
1048 case REPEAT_LAST:
1049 repeatPackOffset(dest, info, ctrl, offset);
1050 break;
1051 case REPEAT_RANGE:
1052 repeatPackRange(dest, info, ctrl, offset);
1053 break;
1054 case REPEAT_BITMAP:
1055 repeatPackBitmap(dest, info, ctrl, offset);
1056 break;
1057 case REPEAT_SPARSE_OPTIMAL_P:
1058 repeatPackSparseOptimalP(dest, info, ctrl, offset);
1059 break;
1060 case REPEAT_TRAILER:
1061 repeatPackTrailer(dest, info, ctrl, offset);
1062 break;
1063 case REPEAT_ALWAYS:
1064 /* nothing to do - no state */
1065 break;
1066 }
1067}
1068
1069static really_inline
1070u64a loadPackedRelative(const char *src, u64a offset, u32 len) {
1071 u64a delta = partial_load_u64a(src, len);
1072 DEBUG_PRINTF("delta %llu\n", delta);
1073 assert(offset >= delta);
1074 return offset - delta;
1075}
1076
1077static
1078void repeatUnpackRing(const char *src, const struct RepeatInfo *info,
1079 u64a offset, union RepeatControl *ctrl) {
1080 struct RepeatRingControl *xs = &ctrl->ring;
1081 const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
1082 const u32 offset_len = info->packedCtrlSize - ring_indices_len;
1083 xs->offset = loadPackedRelative(src, offset, offset_len);
1084 if (ring_indices_len == 4) {
1085 xs->first = unaligned_load_u16(src + offset_len);
1086 xs->last = unaligned_load_u16(src + offset_len + 2);
1087 } else {
1088 const u8 *indices = (const u8 *)src + offset_len;
1089 xs->first = indices[0];
1090 xs->last = indices[1];
1091 }
1092}
1093
1094static
1095void repeatUnpackOffset(const char *src, const struct RepeatInfo *info,
1096 u64a offset, union RepeatControl *ctrl) {
1097 struct RepeatOffsetControl *xs = &ctrl->offset;
1098 if (!info->packedCtrlSize) {
1099 assert(info->type == REPEAT_ALWAYS);
1100 DEBUG_PRINTF("externally guarded .*\n");
1101 xs->offset = 0;
1102 } else {
1103 xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize);
1104 }
1105 DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset,
1106 info->horizon);
1107}
1108
1109static
1110void repeatUnpackRange(const char *src, const struct RepeatInfo *info,
1111 u64a offset, union RepeatControl *ctrl) {
1112 struct RepeatRangeControl *xs = &ctrl->range;
1113 xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize - 1);
1114 xs->num = src[info->packedCtrlSize - 1];
1115}
1116
1117static
1118void repeatUnpackBitmap(const char *src, const struct RepeatInfo *info,
1119 u64a offset, union RepeatControl *ctrl) {
1120 struct RepeatBitmapControl *xs = &ctrl->bitmap;
1121 xs->offset = offset > info->repeatMax ? offset - info->repeatMax : 0;
1122 xs->bitmap = partial_load_u64a(src, info->packedCtrlSize);
1123}
1124
1125static
1126void repeatUnpackSparseOptimalP(const char *src, const struct RepeatInfo *info,
1127 u64a offset, union RepeatControl *ctrl) {
1128 struct RepeatRingControl *xs = &ctrl->ring;
1129 const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
1130 const u32 offset_len = info->packedCtrlSize - ring_indices_len;
1131 xs->offset = loadPackedRelative(src, offset, offset_len);
1132 if (ring_indices_len == 4) {
1133 xs->first = unaligned_load_u16(src + offset_len);
1134 xs->last = unaligned_load_u16(src + offset_len + 2);
1135 } else {
1136 const u8 *indices = (const u8 *)src + offset_len;
1137 xs->first = indices[0];
1138 xs->last = indices[1];
1139 }
1140}
1141
1142static
1143void repeatUnpackTrailer(const char *src, const struct RepeatInfo *info,
1144 u64a offset, union RepeatControl *ctrl) {
1145 struct RepeatTrailerControl *xs = &ctrl->trailer;
1146
1147 u64a v[2];
1148 unpack_bits_64(v, (const u8 *)src, info->packedFieldSizes, 2);
1149
1150 xs->offset = offset - v[0] + info->repeatMin;
1151 xs->bitmap = v[1];
1152
1153 DEBUG_PRINTF("loaded: xs->offset=%llu, xs->bitmap=0x%llx\n", xs->offset,
1154 xs->bitmap);
1155}
1156
1157void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
1158 union RepeatControl *ctrl) {
1159 assert(src && info && ctrl);
1160
1161 switch ((enum RepeatType)info->type) {
1162 case REPEAT_RING:
1163 repeatUnpackRing(src, info, offset, ctrl);
1164 break;
1165 case REPEAT_FIRST:
1166 case REPEAT_LAST:
1167 repeatUnpackOffset(src, info, offset, ctrl);
1168 break;
1169 case REPEAT_RANGE:
1170 repeatUnpackRange(src, info, offset, ctrl);
1171 break;
1172 case REPEAT_BITMAP:
1173 repeatUnpackBitmap(src, info, offset, ctrl);
1174 break;
1175 case REPEAT_SPARSE_OPTIMAL_P:
1176 repeatUnpackSparseOptimalP(src, info, offset, ctrl);
1177 break;
1178 case REPEAT_TRAILER:
1179 repeatUnpackTrailer(src, info, offset, ctrl);
1180 break;
1181 case REPEAT_ALWAYS:
1182 /* nothing to do - no state */
1183 break;
1184 }
1185}
1186
1187static really_inline
1188const u64a *getImplTable(const struct RepeatInfo *info) {
1189 const u64a *table = ((const u64a *)(ROUNDUP_PTR(
1190 ((const char *)(info) +
1191 sizeof(*info)),
1192 alignof(u64a))));
1193 return table;
1194}
1195
1196static
1197void storeInitialRingTopPatch(const struct RepeatInfo *info,
1198 struct RepeatRingControl *xs,
1199 u8 *state, u64a offset) {
1200 DEBUG_PRINTF("set the first patch, offset=%llu\n", offset);
1201 xs->offset = offset;
1202
1203 u8 *active = state;
1204 u32 patch_count = info->patchCount;
1205 mmbit_clear(active, patch_count);
1206 mmbit_set(active, patch_count, 0);
1207
1208 u8 *ring = active + info->patchesOffset;
1209 u32 encoding_size = info->encodingSize;
1210 partial_store_u64a(ring, 1ull, encoding_size);
1211 xs->first = 0;
1212 xs->last = 1;
1213}
1214
1215static
1216u32 getSparseOptimalTargetValue(const struct RepeatInfo *info,
1217 const u32 tval, u64a *val) {
1218 u32 patch_size = info->patchSize;
1219 const u64a *repeatTable = getImplTable(info);
1220 u32 loc = 0;
1221 DEBUG_PRINTF("val:%llu \n", *val);
1222 for (u32 i = 1; i <= patch_size - tval; i++) {
1223 u64a tmp = repeatTable[patch_size - i];
1224 if (*val >= tmp) {
1225 *val -= tmp;
1226 loc = i;
1227 i += (info->minPeriod - 1);
1228 }
1229 }
1230
1231 return loc;
1232}
1233
1234static
1235u64a sparseLastTop(const struct RepeatInfo *info,
1236 const struct RepeatRingControl *xs, const u8 *state) {
1237 DEBUG_PRINTF("looking for last top\n");
1238 u32 patch_size = info->patchSize;
1239 u32 patch_count = info->patchCount;
1240 u32 encoding_size = info->encodingSize;
1241
1242 u32 occ = ringOccupancy(xs, patch_count);
1243 u32 patch = xs->first + occ - 1;
1244 if (patch >= patch_count) {
1245 patch -= patch_count;
1246 }
1247
1248 DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ);
1249 const u8 *ring = state + info->patchesOffset;
1250 u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size);
1251
1252 DEBUG_PRINTF("val:%llu\n", val);
1253 const u64a *repeatTable = getImplTable(info);
1254 for (s32 i = patch_size - 1; i >= 0; i--) {
1255 if (val >= repeatTable[i]) {
1256 DEBUG_PRINTF("xs->offset%llu v%u p%llu\n",
1257 xs->offset, i, repeatTable[i]);
1258 return xs->offset + i + (occ - 1) * patch_size;
1259 }
1260 }
1261
1262 assert(0);
1263 return 0;
1264}
1265
1266u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
1267 const union RepeatControl *ctrl,
1268 const void *state) {
1269 return sparseLastTop(info, &ctrl->ring, state);
1270}
1271
1272u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
1273 const union RepeatControl *ctrl,
1274 const void *state, u64a offset) {
1275 const struct RepeatRingControl *xs = &ctrl->ring;
1276
1277 DEBUG_PRINTF("repeat [%u, %u] looking for match after %llu\n",
1278 info->repeatMin, info->repeatMax, offset);
1279
1280 assert(offset >= xs->offset);
1281
1282 u64a nextOffset = offset + 1;
1283
1284 u32 patch_size = info->patchSize;
1285 u32 patch;
1286 u32 tval;
1287 if (nextOffset <= xs->offset + info->repeatMin) {
1288 patch = xs->first;
1289 tval = 0;
1290 } else if (nextOffset > sparseLastTop(info, xs, state) + info->repeatMax) {
1291 DEBUG_PRINTF("ring is stale\n");
1292 return 0;
1293 } else {
1294 assert(nextOffset - xs->offset < UINT32_MAX); // ring is not stale
1295 u32 delta = (u32)(nextOffset - xs->offset);
1296 u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
1297 patch = lower / patch_size;
1298 tval = lower - patch * patch_size;
1299 }
1300
1301 DEBUG_PRINTF("patch %u\n", patch);
1302 u32 patch_count = info->patchCount;
1303 if (patch >= patch_count) {
1304 return 0;
1305 }
1306
1307 DEBUG_PRINTF("initial test for %u\n", tval);
1308
1309 u32 begin = xs->first + patch;
1310 if (begin >= patch_count) {
1311 begin -= patch_count;
1312 }
1313
1314 const u8 *active = (const u8 *)state;
1315 const u8 *ring = active + info->patchesOffset;
1316 u32 encoding_size = info->encodingSize;
1317 const u32 end = begin >= xs->last ? patch_count : xs->last;
1318 u32 low = tval;
1319 u64a diff = 0, loc = 0;
1320 DEBUG_PRINTF("begin %u end %u\n", begin, end);
1321 for (u32 p = mmbit_iterate_bounded(active, patch_count, begin, end);
1322 p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
1323 p + 1, end)) {
1324 if (p != begin) {
1325 low = 0;
1326 }
1327
1328 u64a val = partial_load_u64a(ring + encoding_size * p, encoding_size);
1329 u32 p1 = 0;
1330 if (p >= xs->first) {
1331 p1 = p - xs->first;
1332 } else {
1333 p1 = p + patch_count - xs->first;
1334 }
1335
1336 if (val) {
1337 loc = getSparseOptimalTargetValue(info, low, &val);
1338 diff = (p1 + 1) * patch_size - loc;
1339 }
1340 if (loc) {
1341 u64a rv = MAX(nextOffset, xs->offset + info->repeatMin + diff);
1342 DEBUG_PRINTF("offset%llu next match at %llu\n", xs->offset, rv);
1343 return rv;
1344 }
1345 low = 0;
1346 }
1347
1348 low = 0;
1349 if (begin >= xs->last) {
1350 for (u32 p = mmbit_iterate_bounded(active, patch_count, 0, xs->last);
1351 p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
1352 p + 1, xs->last)) {
1353
1354 u64a val = partial_load_u64a(ring + encoding_size * p,
1355 encoding_size);
1356 if (val) {
1357 loc = getSparseOptimalTargetValue(info, low, &val);
1358 diff = (p + 1) * patch_size - loc;
1359 }
1360 if (loc) {
1361 u64a rv = MAX(nextOffset, xs->offset + info->repeatMin +
1362 diff + (end - xs->first) * patch_size);
1363 DEBUG_PRINTF("next match at %llu\n", rv);
1364 return rv;
1365 }
1366 }
1367 }
1368
1369 DEBUG_PRINTF("next match\n");
1370 return 0;
1371}
1372
1373void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
1374 union RepeatControl *ctrl, void *state,
1375 u64a offset, char is_alive) {
1376 struct RepeatRingControl *xs = &ctrl->ring;
1377 u8 *active = (u8 *)state;
1378
1379 DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset,
1380 info->encodingSize);
1381
1382 // If (a) this is the first top, or (b) the ring is stale, initialize the
1383 // ring and write this offset in as the first top.
1384 if (!is_alive ||
1385 offset > sparseLastTop(info, xs, state) + info->repeatMax) {
1386 storeInitialRingTopPatch(info, xs, active, offset);
1387 return;
1388 }
1389
1390 // Tops should arrive in order, with no duplicates.
1391 assert(offset > sparseLastTop(info, xs, state));
1392
1393 // As the ring is not stale, our delta should fit within a u32.
1394 assert(offset - xs->offset <= UINT32_MAX);
1395 u32 delta = (u32)(offset - xs->offset);
1396 u32 patch_size = info->patchSize;
1397 u32 patch_count = info->patchCount;
1398 u32 encoding_size = info->encodingSize;
1399 u32 patch = delta / patch_size;
1400
1401 DEBUG_PRINTF("delta=%u, patch_size=%u, patch=%u\n", delta, patch_size,
1402 patch);
1403
1404 u8 *ring = active + info->patchesOffset;
1405 u32 occ = ringOccupancy(xs, patch_count);
1406 u64a val = 0;
1407 u32 idx;
1408
1409 DEBUG_PRINTF("patch: %u patch_count: %u occ: %u\n",
1410 patch, patch_count, occ);
1411 if (patch >= patch_count) {
1412 u32 patch_shift_count = patch - patch_count + 1;
1413 assert(patch >= patch_shift_count);
1414 DEBUG_PRINTF("shifting by %u\n", patch_shift_count);
1415 xs->offset += patch_size * patch_shift_count;
1416 xs->first += patch_shift_count;
1417 if (xs->first >= patch_count) {
1418 xs->first -= patch_count;
1419 }
1420 idx = xs->last + patch - occ;
1421 mmbit_unset_range(active, patch_count, xs->last,
1422 MIN(idx, patch_count));
1423 if (idx >= patch_count) {
1424 idx -= patch_count;
1425 mmbit_unset_range(active, patch_count, 0, idx + 1);
1426 }
1427 xs->last = idx + 1;
1428 if (xs->last == patch_count) {
1429 xs->last = 0;
1430 }
1431 } else if (patch < occ) {
1432 assert(patch == occ - 1);
1433 idx = xs->last == 0 ? patch_count - 1 : (u32)xs->last - 1;
1434 val = partial_load_u64a(ring + encoding_size * idx, encoding_size);
1435 } else {
1436 idx = xs->last + patch - occ;
1437 mmbit_unset_range(active, patch_count, xs->last,
1438 MIN(idx, patch_count));
1439 if (idx >= patch_count) {
1440 idx -= patch_count;
1441 mmbit_unset_range(active, patch_count, 0, idx + 1);
1442 }
1443 xs->last = idx + 1;
1444 if (xs->last == patch_count) {
1445 xs->last = 0;
1446 }
1447 }
1448
1449 assert((u64a)patch * patch_size <= delta);
1450 u32 diff = delta - patch * patch_size;
1451 const u64a *repeatTable = getImplTable(info);
1452 val += repeatTable[diff];
1453
1454 DEBUG_PRINTF("patch=%u, occ=%u\n", patch, occ);
1455 DEBUG_PRINTF("xs->first:%u xs->last:%u patch:%u\n",
1456 xs->first, xs->last, patch);
1457 DEBUG_PRINTF("value:%llu\n", val);
1458 assert(fits_in_len_bytes(val, encoding_size));
1459 partial_store_u64a(ring + encoding_size * idx, val, encoding_size);
1460 mmbit_set(active, patch_count, idx);
1461}
1462
1463static
1464char sparseHasMatch(const struct RepeatInfo *info, const u8 *state,
1465 u32 lower, u32 upper) {
1466 u32 patch_size = info->patchSize;
1467 u32 patch_count = info->patchCount;
1468 u32 encoding_size = info->encodingSize;
1469 u32 patch_lower = lower / patch_size;
1470 u32 patch_upper = upper / patch_size;
1471 u32 diff = lower - patch_lower * patch_size;
1472
1473 DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
1474 const u64a *repeatTable = getImplTable(info);
1475
1476 const u8 *ring = state + info->patchesOffset;
1477 const u8 *active = state;
1478 u64a val;
1479 // test the first patch
1480 if (mmbit_isset(active, patch_count, patch_lower)) {
1481 val = partial_load_u64a(ring + encoding_size * patch_lower,
1482 encoding_size);
1483 DEBUG_PRINTF("patch_size=%u, diff=%u, table=%llu\n",
1484 patch_size, diff, repeatTable[diff]);
1485 DEBUG_PRINTF("patch_lower=%u, patch_upper=%u\n",
1486 patch_lower, patch_upper);
1487 if (patch_upper == patch_lower) {
1488 u32 limit = upper - patch_lower * patch_size;
1489 getSparseOptimalTargetValue(info, limit + 1, &val);
1490 }
1491 if (val >= repeatTable[diff]) {
1492 return 1;
1493 }
1494 }
1495
1496 if (patch_lower == patch_upper) {
1497 return 0;
1498 }
1499
1500 // test the patches between first and last
1501 u32 m = mmbit_iterate_bounded(active, patch_count,
1502 patch_lower + 1, patch_upper);
1503 if (m != MMB_INVALID) {
1504 return 1;
1505 }
1506
1507 if (patch_upper == patch_count) {
1508 return 0;
1509 }
1510
1511 // test the last patch
1512 if (!mmbit_isset(active, patch_count, patch_upper)) {
1513 return 0;
1514 }
1515 diff = (patch_upper + 1) * patch_size - upper;
1516 DEBUG_PRINTF("diff=%u\n", diff);
1517 val = partial_load_u64a(ring + encoding_size * patch_upper, encoding_size);
1518 getSparseOptimalTargetValue(info, patch_size - diff + 1, &val);
1519 if (val) {
1520 DEBUG_PRINTF("last patch: val=%llu\n", val);
1521 return 1;
1522 }
1523
1524 return 0;
1525}
1526
1527enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info,
1528 const union RepeatControl *ctrl,
1529 const void *state, u64a offset) {
1530 DEBUG_PRINTF("check for match at %llu corresponding to trigger "
1531 "at [%llu, %llu]\n", offset, offset - info->repeatMax,
1532 offset - info->repeatMin);
1533
1534 const struct RepeatRingControl *xs = &ctrl->ring;
1535 const u8 *ring = (const u8 *)state;
1536
1537 assert(offset >= xs->offset);
1538
1539 if (offset < xs->offset + info->repeatMin) {
1540 DEBUG_PRINTF("too soon\n");
1541 return REPEAT_NOMATCH;
1542 } else if (offset > sparseLastTop(info, xs, state) + info->repeatMax) {
1543 DEBUG_PRINTF("stale\n");
1544 return REPEAT_STALE;
1545 }
1546
1547 // Our delta between the base offset of the ring and the current offset
1548 // must fit within the range [repeatMin, lastPossibleTop + repeatMax]. This
1549 // range fits comfortably within a u32.
1550 assert(offset - xs->offset <= UINT32_MAX);
1551
1552 u32 delta = (u32)(offset - xs->offset);
1553 u32 patch_size = info->patchSize;
1554 u32 patch_count = info->patchCount;
1555 u32 occ = ringOccupancy(xs, patch_count);
1556
1557 u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
1558 u32 upper = MIN(delta - info->repeatMin, occ * patch_size - 1);
1559
1560 DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
1561 u32 patch_lower = lower / patch_size;
1562 u32 patch_upper = upper / patch_size;
1563
1564 if (patch_lower >= occ) {
1565 DEBUG_PRINTF("too late\n");
1566 return REPEAT_NOMATCH;
1567 }
1568
1569 u32 remaining_lower = lower - patch_lower * patch_size;
1570 u32 remaining_upper = upper - patch_upper * patch_size;
1571 patch_lower += xs->first;
1572 patch_upper += xs->first;
1573 if (patch_lower >= patch_count) {
1574 patch_lower -= patch_count;
1575 patch_upper -= patch_count;
1576 } else if (patch_upper >= patch_count) {
1577 patch_upper -= patch_count;
1578 }
1579
1580 DEBUG_PRINTF("xs->first:%u xs->last:%u patch_lower:%u, patch_upper:%u\n",
1581 xs->first, xs->last, patch_lower, patch_upper);
1582
1583 u32 scan_end;
1584 const char is_not_wrapped = (patch_lower <= patch_upper);
1585 if (is_not_wrapped) {
1586 scan_end = patch_upper * patch_size + remaining_upper;
1587 } else {
1588 scan_end = patch_count * patch_size;
1589 }
1590
1591 lower = patch_lower * patch_size + remaining_lower;
1592 if (sparseHasMatch(info, ring, lower, scan_end)) {
1593 return REPEAT_MATCH;
1594 }
1595
1596 if (!is_not_wrapped) {
1597 upper -= (patch_count - xs->first) * patch_size;
1598 if (sparseHasMatch(info, ring, 0, upper)) {
1599 return REPEAT_MATCH;
1600 }
1601 }
1602
1603 return REPEAT_NOMATCH;
1604}
1605