1 | /* |
2 | * Copyright (c) 2015-2016, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "mpv.h" |
30 | |
31 | #include "mpv_internal.h" |
32 | #include "nfa_api.h" |
33 | #include "nfa_api_queue.h" |
34 | #include "nfa_internal.h" |
35 | #include "shufti.h" |
36 | #include "truffle.h" |
37 | #include "ue2common.h" |
38 | #include "vermicelli.h" |
39 | #include "vermicelli_run.h" |
40 | #include "util/multibit.h" |
41 | #include "util/partial_store.h" |
42 | #include "util/simd_utils.h" |
43 | #include "util/unaligned.h" |
44 | |
45 | #include <string.h> |
46 | |
47 | #define MIN_SKIP_REPEAT 32 |
48 | |
49 | typedef struct mpv_pq_item PQ_T; |
50 | #define PQ_COMP(pqc_items, a, b) \ |
51 | ((pqc_items)[a].trigger_loc < (pqc_items)[b].trigger_loc) |
52 | #define PQ_COMP_B(pqc_items, a, b_fixed) \ |
53 | ((pqc_items)[a].trigger_loc < (b_fixed).trigger_loc) |
54 | |
55 | #include "util/pqueue.h" |
56 | |
57 | static really_inline |
58 | u64a *get_counter_n(struct mpv_decomp_state *s, |
59 | const struct mpv *m, u32 n) { |
60 | return (u64a *)((char *)s + get_counter_info(m)[n].counter_offset); |
61 | } |
62 | |
63 | static really_inline |
64 | u64a *get_counter_for_kilo(struct mpv_decomp_state *s, |
65 | const struct mpv_kilopuff *kp) { |
66 | return (u64a *)((char *)s + kp->counter_offset); |
67 | } |
68 | |
69 | static really_inline |
70 | u64a get_counter_value_for_kilo(struct mpv_decomp_state *s, |
71 | const struct mpv_kilopuff *kp) { |
72 | return *get_counter_for_kilo(s, kp) + s->counter_adj; |
73 | } |
74 | |
75 | static really_inline |
76 | const u64a *get_counter_for_kilo_c(const struct mpv_decomp_state *s, |
77 | const struct mpv_kilopuff *kp) { |
78 | return (const u64a *)((const char *)s + kp->counter_offset); |
79 | } |
80 | |
81 | |
82 | static never_inline |
83 | void normalize_counters(struct mpv_decomp_state *dstate, const struct mpv *m) { |
84 | u64a adj = dstate->counter_adj; |
85 | u64a *counters = get_counter_n(dstate, m, 0); |
86 | |
87 | if (!adj) { |
88 | return; |
89 | } |
90 | |
91 | for (u32 i = 0; i < m->counter_count; i++) { |
92 | /* update all counters - alive or dead */ |
93 | counters[i] += adj; |
94 | DEBUG_PRINTF("counter %u: %llu\n" , i, counters[i]); |
95 | } |
96 | |
97 | dstate->counter_adj = 0; |
98 | } |
99 | |
100 | static really_inline |
101 | char processReports(const struct mpv *m, u8 *reporters, |
102 | const struct mpv_decomp_state *dstate, u64a counter_adj, |
103 | u64a report_offset, NfaCallback cb, void *ctxt, |
104 | ReportID *rl, u32 *rl_count_out) { |
105 | DEBUG_PRINTF("reporting at offset %llu\n" , report_offset); |
106 | const struct mpv_kilopuff *kp = (const void *)(m + 1); |
107 | u32 rl_count = 0; |
108 | |
109 | for (u32 i = mmbit_iterate(reporters, m->kilo_count, MMB_INVALID); |
110 | i != MMB_INVALID; i = mmbit_iterate(reporters, m->kilo_count, i)) { |
111 | const struct mpv_puffette *curr = dstate->active[i].curr; |
112 | u64a curr_counter_val = *get_counter_for_kilo_c(dstate, &kp[i]) |
113 | + counter_adj; |
114 | DEBUG_PRINTF("kilo %u, underlying counter: %llu current: %llu\n" , i, |
115 | *get_counter_for_kilo_c(dstate, &kp[i]), curr_counter_val); |
116 | assert(curr_counter_val != MPV_DEAD_VALUE); /* counter_adj should take |
117 | * care if underlying value |
118 | * is -1 */ |
119 | char did_stuff = 0; |
120 | |
121 | while (curr->report != INVALID_REPORT) { |
122 | assert(curr_counter_val >= curr->repeats); |
123 | if (curr->unbounded || curr_counter_val == curr->repeats) { |
124 | DEBUG_PRINTF("report %u at %llu\n" , curr->report, |
125 | report_offset); |
126 | |
127 | if (curr->unbounded && !curr->simple_exhaust) { |
128 | assert(rl_count < m->puffette_count); |
129 | *rl = curr->report; |
130 | ++rl; |
131 | rl_count++; |
132 | } |
133 | |
134 | if (cb(0, report_offset, curr->report, ctxt) == |
135 | MO_HALT_MATCHING) { |
136 | DEBUG_PRINTF("bailing\n" ); |
137 | return MO_HALT_MATCHING; |
138 | } |
139 | did_stuff = 1; |
140 | } |
141 | |
142 | curr--; |
143 | } |
144 | |
145 | if (!did_stuff) { |
146 | mmbit_unset(reporters, m->kilo_count, i); |
147 | } |
148 | } |
149 | |
150 | *rl_count_out = rl_count; |
151 | return MO_CONTINUE_MATCHING; |
152 | } |
153 | |
154 | static |
155 | ReportID *get_report_list(const struct mpv *m, struct mpv_decomp_state *s) { |
156 | return (ReportID *)((char *)s + m->report_list_offset); |
157 | } |
158 | |
159 | static really_inline |
160 | char processReportsForRange(const struct mpv *m, u8 *reporters, |
161 | struct mpv_decomp_state *dstate, u64a first_offset, |
162 | size_t length, NfaCallback cb, void *ctxt) { |
163 | if (!length) { |
164 | return MO_CONTINUE_MATCHING; |
165 | } |
166 | |
167 | u64a counter_adj = dstate->counter_adj; |
168 | u32 rl_count = 0; |
169 | ReportID *rl = get_report_list(m, dstate); |
170 | char rv = processReports(m, reporters, dstate, 1 + counter_adj, |
171 | first_offset + 1, cb, ctxt, rl, &rl_count); |
172 | if (rv != MO_CONTINUE_MATCHING) { |
173 | DEBUG_PRINTF("bailing\n" ); |
174 | return rv; |
175 | } |
176 | if (!rl_count) { |
177 | return MO_CONTINUE_MATCHING; |
178 | } |
179 | |
180 | DEBUG_PRINTF("length=%zu, rl_count=%u\n" , length, rl_count); |
181 | |
182 | for (size_t i = 2; i <= length; i++) { |
183 | for (u32 j = 0; j < rl_count; j++) { |
184 | if (cb(0, first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) { |
185 | DEBUG_PRINTF("bailing\n" ); |
186 | return MO_HALT_MATCHING; |
187 | } |
188 | } |
189 | } |
190 | |
191 | return MO_CONTINUE_MATCHING; |
192 | } |
193 | |
194 | /* returns last puffette that we have satisfied */ |
195 | static |
196 | const struct mpv_puffette *get_curr_puff(const struct mpv *m, |
197 | const struct mpv_kilopuff *kp, |
198 | struct mpv_decomp_state *dstate) { |
199 | u64a counter = *get_counter_for_kilo(dstate, kp); |
200 | assert(counter != MPV_DEAD_VALUE); |
201 | |
202 | const struct mpv_puffette *p = get_puff_array(m, kp); |
203 | DEBUG_PRINTF("looking for current puffette (counter = %llu)\n" , counter); |
204 | DEBUG_PRINTF("next: (%u, %u)\n" , p->repeats, p->report); |
205 | while (counter + 1 >= p->repeats && p->report != INVALID_REPORT) { |
206 | DEBUG_PRINTF("advancing\n" ); |
207 | ++p; |
208 | DEBUG_PRINTF("next: (%u, %u)\n" , p->repeats, p->report); |
209 | } |
210 | |
211 | return p - 1; |
212 | } |
213 | |
214 | static |
215 | const struct mpv_puffette *get_init_puff(const struct mpv *m, |
216 | const struct mpv_kilopuff *kp) { |
217 | const struct mpv_puffette *p = get_puff_array(m, kp); |
218 | while (p->repeats == 1) { |
219 | ++p; |
220 | } |
221 | return p - 1; |
222 | } |
223 | |
224 | |
225 | /* returns the last puffette whose repeats have been satisfied */ |
226 | static really_inline |
227 | const struct mpv_puffette *update_curr_puff(const struct mpv *m, u8 *reporters, |
228 | u64a counter, |
229 | const struct mpv_puffette *in, |
230 | u32 kilo_index) { |
231 | assert(counter != MPV_DEAD_VALUE); |
232 | |
233 | const struct mpv_puffette *p = in; |
234 | DEBUG_PRINTF("looking for current puffette (counter = %llu)\n" , counter); |
235 | DEBUG_PRINTF("curr: (%u, %u)\n" , p->repeats, p->report); |
236 | while (counter + 1 >= p[1].repeats && p[1].report != INVALID_REPORT) { |
237 | DEBUG_PRINTF("advancing\n" ); |
238 | ++p; |
239 | DEBUG_PRINTF("curr: (%u, %u)\n" , p->repeats, p->report); |
240 | } |
241 | |
242 | if (p != in) { |
243 | mmbit_set(reporters, m->kilo_count, kilo_index); |
244 | } |
245 | |
246 | return p; |
247 | } |
248 | |
249 | static really_inline |
250 | size_t limitByReach(const struct mpv_kilopuff *kp, const u8 *buf, |
251 | size_t length) { |
252 | if (kp->type == MPV_VERM) { |
253 | return vermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf; |
254 | } else if (kp->type == MPV_SHUFTI) { |
255 | m128 mask_lo = kp->u.shuf.mask_lo; |
256 | m128 mask_hi = kp->u.shuf.mask_hi; |
257 | return shuftiExec(mask_lo, mask_hi, buf, buf + length) - buf; |
258 | } else if (kp->type == MPV_TRUFFLE) { |
259 | return truffleExec(kp->u.truffle.mask1, kp->u.truffle.mask2, buf, buf + length) - buf; |
260 | } else if (kp->type == MPV_NVERM) { |
261 | return nvermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf; |
262 | } |
263 | |
264 | assert(kp->type == MPV_DOT); |
265 | return length; |
266 | } |
267 | |
268 | static never_inline |
269 | void fillLimits(const struct mpv *m, u8 *active, u8 *reporters, |
270 | struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, |
271 | const u8 *buf, size_t length) { |
272 | DEBUG_PRINTF("filling limits %zu\n" , length); |
273 | assert(!dstate->pq_size); |
274 | |
275 | if (!length) { |
276 | DEBUG_PRINTF("0 length\n" ); |
277 | return; |
278 | } |
279 | |
280 | const struct mpv_kilopuff *kp = (const void *)(m + 1); |
281 | |
282 | for (u32 i = mmbit_iterate(active, m->kilo_count, MMB_INVALID); |
283 | i != MMB_INVALID; i = mmbit_iterate(active, m->kilo_count, i)) { |
284 | dstate->active[i].curr = get_curr_puff(m, &kp[i], dstate); |
285 | if (dstate->active[i].curr->report != INVALID_REPORT) { |
286 | /* this kilo puff may fire reports */ |
287 | mmbit_set(reporters, m->kilo_count, i); |
288 | } |
289 | |
290 | u64a lim = limitByReach(&kp[i], buf, length); |
291 | DEBUG_PRINTF("lim %llu/%zu\n" , lim, length); |
292 | |
293 | if (kp[i].dead_point != MPV_DEAD_VALUE) { |
294 | assert(!kp[i].auto_restart); |
295 | u64a counter = get_counter_value_for_kilo(dstate, &kp[i]); |
296 | u64a dp_trigger = kp[i].dead_point - counter; |
297 | if (dp_trigger < lim) { |
298 | DEBUG_PRINTF("dead point trigger %llu\n" , dp_trigger); |
299 | lim = dp_trigger; |
300 | } |
301 | } |
302 | |
303 | if (kp[i].auto_restart && !lim) { |
304 | *get_counter_for_kilo(dstate, &kp[i]) = MPV_DEAD_VALUE; |
305 | mmbit_unset(reporters, m->kilo_count, i); |
306 | /* the counter value will cause the nex_trigger calculation below to |
307 | * adjust correctly */ |
308 | if (length == 1) { |
309 | dstate->active[i].limit = 0; |
310 | continue; |
311 | } |
312 | |
313 | lim = limitByReach(&kp[i], buf + 1, length - 1) + 1; |
314 | |
315 | |
316 | /* restart active counters */ |
317 | dstate->active[i].curr = get_init_puff(m, &kp[i]); |
318 | assert(dstate->active[i].curr[0].report == INVALID_REPORT); |
319 | |
320 | DEBUG_PRINTF("lim now %llu/%zu\n" , lim, length); |
321 | } |
322 | |
323 | dstate->active[i].limit = lim; |
324 | if (!lim) { |
325 | mmbit_unset(active, m->kilo_count, i); |
326 | mmbit_unset(reporters, m->kilo_count, i); |
327 | continue; |
328 | } |
329 | if (dstate->active[i].curr[1].report != INVALID_REPORT) { |
330 | u32 next_trigger = dstate->active[i].curr[1].repeats - 1ULL |
331 | - *get_counter_for_kilo(dstate, &kp[i]); |
332 | DEBUG_PRINTF("next trigger %u\n" , next_trigger); |
333 | lim = MIN(lim, next_trigger); |
334 | } |
335 | |
336 | if (lim != length) { |
337 | struct mpv_pq_item temp = { |
338 | .trigger_loc = lim, |
339 | .kilo = i |
340 | }; |
341 | |
342 | DEBUG_PRINTF("push for %u at %llu\n" , i, lim); |
343 | pq_insert(pq, dstate->pq_size, temp); |
344 | ++dstate->pq_size; |
345 | } |
346 | |
347 | assert(lim || kp[i].auto_restart); |
348 | } |
349 | |
350 | DEBUG_PRINTF("filled\n" ); |
351 | dstate->filled = 1; |
352 | } |
353 | |
354 | static never_inline |
355 | void handleTopN(const struct mpv *m, s64a loc, u8 *active, u8 *reporters, |
356 | struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, |
357 | const u8 *buf, size_t length, u32 i) { |
358 | assert(i < m->kilo_count); |
359 | DEBUG_PRINTF("MQE_TOP + %u @%lld\n" , i, loc); |
360 | if (mmbit_set(active, m->kilo_count, i)) { |
361 | DEBUG_PRINTF("kilo is already alive and kicking\n" ); |
362 | return; |
363 | } |
364 | |
365 | const struct mpv_kilopuff *kp = (const struct mpv_kilopuff *)(m + 1); |
366 | |
367 | assert(!kp[i].auto_restart); /* handle later/never */ |
368 | |
369 | /* we need to ensure that the counters are upto date */ |
370 | normalize_counters(dstate, m); |
371 | |
372 | /* reset counter */ |
373 | *get_counter_for_kilo(dstate, &kp[i]) = 0; |
374 | |
375 | if ((size_t)loc == length) { |
376 | /* end of buffer, just make sure it is active */ |
377 | dstate->active[i].limit = loc; |
378 | dstate->active[i].curr = get_init_puff(m, &kp[i]); |
379 | return; |
380 | } |
381 | |
382 | /* find the limit */ |
383 | u64a lim = limitByReach(&kp[i], buf + loc, length - loc) + loc; |
384 | |
385 | /* no need to worry about dead_point triggers here as kilopuff must first |
386 | * update chain (to fire a report) before it goes dead. */ |
387 | |
388 | if (lim == (u64a)loc) { |
389 | DEBUG_PRINTF("dead on arrival\n" ); |
390 | mmbit_unset(active, m->kilo_count, i); |
391 | return; |
392 | } |
393 | dstate->active[i].limit = lim; |
394 | |
395 | /* setup puffette, find next trigger */ |
396 | dstate->active[i].curr = get_init_puff(m, &kp[i]); |
397 | if (dstate->active[i].curr[1].report != INVALID_REPORT) { |
398 | u32 next_trigger = dstate->active[i].curr[1].repeats - 1ULL + loc; |
399 | lim = MIN(lim, next_trigger); |
400 | } |
401 | |
402 | assert(dstate->active[i].curr[0].repeats == 1 |
403 | || dstate->active[i].curr[0].report == INVALID_REPORT); |
404 | if (dstate->active[i].curr[0].repeats == 1) { |
405 | DEBUG_PRINTF("yippee\n" ); |
406 | mmbit_set(reporters, m->kilo_count, i); |
407 | } |
408 | |
409 | assert(lim > (u64a)loc); |
410 | |
411 | /* add to pq */ |
412 | if (lim != length) { |
413 | struct mpv_pq_item temp = { |
414 | .trigger_loc = lim, |
415 | .kilo = i |
416 | }; |
417 | |
418 | DEBUG_PRINTF("push for %u at %llu\n" , i, lim); |
419 | pq_insert(pq, dstate->pq_size, temp); |
420 | ++dstate->pq_size; |
421 | } |
422 | } |
423 | |
424 | static really_inline |
425 | void killKilo(const struct mpv *m, u8 *active, u8 *reporters, |
426 | struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, u32 i) { |
427 | DEBUG_PRINTF("squashing kilo %u (progress %llu, limit %llu)\n" , |
428 | i, pq_top(pq)->trigger_loc, dstate->active[i].limit); |
429 | mmbit_unset(active, m->kilo_count, i); |
430 | mmbit_unset(reporters, m->kilo_count, i); |
431 | |
432 | pq_pop(pq, dstate->pq_size); |
433 | dstate->pq_size--; |
434 | } |
435 | |
436 | static really_inline |
437 | void updateKiloChains(const struct mpv *m, u8 *reporters, |
438 | struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, |
439 | u64a curr_loc, size_t buf_length, u32 i) { |
440 | const struct mpv_kilopuff *kp = (const void *)(m + 1); |
441 | u64a counter = get_counter_value_for_kilo(dstate, &kp[i]); |
442 | |
443 | DEBUG_PRINTF("updating active puff for kilo %u\n" , i); |
444 | dstate->active[i].curr = update_curr_puff(m, reporters, counter, |
445 | dstate->active[i].curr, i); |
446 | |
447 | u64a next_trigger = dstate->active[i].limit; |
448 | |
449 | if (dstate->active[i].curr[1].report != INVALID_REPORT) { |
450 | u64a next_rep_trigger = dstate->active[i].curr[1].repeats - 1 - counter |
451 | + curr_loc; |
452 | |
453 | next_trigger = MIN(next_trigger, next_rep_trigger); |
454 | } else if (kp[i].dead_point != MPV_DEAD_VALUE) { |
455 | u64a dp_trigger = kp[i].dead_point - counter + curr_loc; |
456 | DEBUG_PRINTF("dead point trigger %llu\n" , dp_trigger); |
457 | if (dp_trigger < dstate->active[i].limit) { |
458 | dstate->active[i].limit = dp_trigger; |
459 | next_trigger = dp_trigger; |
460 | } |
461 | } |
462 | |
463 | DEBUG_PRINTF("next trigger location is %llu\n" , next_trigger); |
464 | |
465 | if (next_trigger < buf_length) { |
466 | assert(dstate->pq_size <= m->kilo_count); |
467 | assert(next_trigger > pq_top(pq)->trigger_loc); |
468 | struct mpv_pq_item temp = { |
469 | .trigger_loc = next_trigger, |
470 | .kilo = i |
471 | }; |
472 | |
473 | DEBUG_PRINTF("(replace) push for %u at %llu\n" , i, next_trigger); |
474 | pq_replace_top(pq, dstate->pq_size, temp); |
475 | } else { |
476 | pq_pop(pq, dstate->pq_size); |
477 | dstate->pq_size--; |
478 | DEBUG_PRINTF("PQ_POP\n" ); |
479 | } |
480 | DEBUG_PRINTF("pq size now %u next top %llu\n" , dstate->pq_size, |
481 | pq_top(pq)->trigger_loc); |
482 | } |
483 | |
484 | static really_inline |
485 | u8 do_single_shufti(const m128 l, const m128 h, u8 c) { |
486 | const u8 *lo = (const u8 *)&l; |
487 | const u8 *hi = (const u8 *)&h; |
488 | return lo[c & 0xf] & hi[c >> 4]; |
489 | } |
490 | |
491 | static really_inline |
492 | size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf, |
493 | size_t length, size_t curr, u32 min_rep) { |
494 | assert(kp->type != MPV_DOT); |
495 | |
496 | DEBUG_PRINTF("repeats = %u\n" , min_rep); |
497 | /* TODO: this should be replace by some sort of simd stuff */ |
498 | |
499 | if (kp->type == MPV_VERM) { |
500 | if (min_rep < MIN_SKIP_REPEAT) { |
501 | return find_nverm_run(kp->u.verm.c, 0, min_rep, buf, buf + curr, |
502 | buf + length) - buf - 1; |
503 | } |
504 | |
505 | verm_restart:; |
506 | assert(buf[curr] == kp->u.verm.c); |
507 | size_t test = curr; |
508 | if (curr + min_rep < length) { |
509 | test = curr + min_rep; |
510 | } else { |
511 | test = length - 1; |
512 | } |
513 | |
514 | while (test > curr) { |
515 | if (buf[test] == kp->u.verm.c) { |
516 | curr = test; |
517 | if (curr == length - 1) { |
518 | return curr; |
519 | } |
520 | goto verm_restart; |
521 | } |
522 | --test; |
523 | } |
524 | } else if (kp->type == MPV_SHUFTI) { |
525 | m128 lo = kp->u.shuf.mask_lo; |
526 | m128 hi = kp->u.shuf.mask_hi; |
527 | shuf_restart: |
528 | assert(do_single_shufti(lo, hi, buf[curr])); |
529 | size_t test = curr; |
530 | if (curr + min_rep < length) { |
531 | test = curr + min_rep; |
532 | } else { |
533 | test = length - 1; |
534 | } |
535 | |
536 | while (test > curr) { |
537 | if (do_single_shufti(lo, hi, buf[test])) { |
538 | DEBUG_PRINTF("updating curr from %zu to %zu\n" , curr, test); |
539 | curr = test; |
540 | if (curr == length - 1) { |
541 | return curr; |
542 | } |
543 | goto shuf_restart; |
544 | } |
545 | --test; |
546 | } |
547 | } else if (kp->type == MPV_TRUFFLE) { |
548 | const m128 mask1 = kp->u.truffle.mask1; |
549 | const m128 mask2 = kp->u.truffle.mask2; |
550 | truffle_restart:; |
551 | size_t test = curr; |
552 | if (curr + min_rep < length) { |
553 | test = curr + min_rep; |
554 | } else { |
555 | test = length - 1; |
556 | } |
557 | |
558 | while (test > curr) { |
559 | const u8 *rv = truffleExec(mask1, mask2, buf + test, buf + test + 1); |
560 | if (rv == buf + test) { |
561 | curr = test; |
562 | if (curr == length - 1) { |
563 | return curr; |
564 | } |
565 | goto truffle_restart; |
566 | } |
567 | --test; |
568 | } |
569 | } else if (kp->type == MPV_NVERM) { |
570 | if (min_rep < MIN_SKIP_REPEAT) { |
571 | return find_verm_run(kp->u.verm.c, 0, min_rep, buf, buf + curr, |
572 | buf + length) - buf - 1; |
573 | } |
574 | |
575 | nverm_restart:; |
576 | assert(buf[curr] != kp->u.verm.c); |
577 | size_t test = curr; |
578 | if (curr + min_rep < length) { |
579 | test = curr + min_rep; |
580 | } else { |
581 | test = length - 1; |
582 | } |
583 | |
584 | while (test > curr) { |
585 | if (buf[test] != kp->u.verm.c) { |
586 | curr = test; |
587 | if (curr == length - 1) { |
588 | return curr; |
589 | } |
590 | goto nverm_restart; |
591 | } |
592 | --test; |
593 | } |
594 | } else { |
595 | assert(0); |
596 | } |
597 | |
598 | return curr; |
599 | } |
600 | |
601 | static really_inline |
602 | void restartKilo(const struct mpv *m, UNUSED u8 *active, u8 *reporters, |
603 | struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, |
604 | const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) { |
605 | const struct mpv_kilopuff *kp = (const void *)(m + 1); |
606 | assert(kp[i].auto_restart); |
607 | assert(mmbit_isset(active, m->kilo_count, i)); |
608 | |
609 | DEBUG_PRINTF("we got to %llu,%llu\n" , prev_limit, dstate->active[i].limit); |
610 | assert(prev_limit == dstate->active[i].limit); |
611 | |
612 | DEBUG_PRINTF("resetting counter\n" ); |
613 | |
614 | /* we need to ensure that the counters are upto date */ |
615 | normalize_counters(dstate, m); |
616 | |
617 | /* current byte is dead, will wrap to 0 after processing this byte */ |
618 | assert(MPV_DEAD_VALUE + 1 == 0); |
619 | *get_counter_for_kilo(dstate, &kp[i]) = MPV_DEAD_VALUE; |
620 | |
621 | DEBUG_PRINTF("resetting puffettes\n" ); |
622 | dstate->active[i].curr = get_init_puff(m, &kp[i]); |
623 | |
624 | assert(dstate->active[i].curr[0].report == INVALID_REPORT); |
625 | /* TODO: handle restart .{1,}s */ |
626 | |
627 | mmbit_unset(reporters, m->kilo_count, i); |
628 | |
629 | if (prev_limit != buf_length - 1) { |
630 | size_t last_bad = find_last_bad(&kp[i], buf, buf_length, prev_limit, |
631 | dstate->active[i].curr[1].repeats); |
632 | assert(last_bad >= prev_limit && last_bad < buf_length); |
633 | if (last_bad != prev_limit) { |
634 | /* there is no point in getting restarted at this location */ |
635 | dstate->active[i].limit = last_bad; |
636 | assert(dstate->pq_size <= m->kilo_count); |
637 | struct mpv_pq_item temp = { |
638 | .trigger_loc = last_bad, |
639 | .kilo = i |
640 | }; |
641 | |
642 | pq_replace_top(pq, dstate->pq_size, temp); |
643 | return; |
644 | } |
645 | } |
646 | |
647 | /* TODO: skipping would really come in handy about now */ |
648 | u64a lim; |
649 | if (buf_length > prev_limit + 1) { |
650 | lim = limitByReach(&kp[i], buf + prev_limit + 1, |
651 | buf_length - (prev_limit + 1)) + |
652 | prev_limit + 1; |
653 | } else { |
654 | assert(buf_length == prev_limit + 1); |
655 | lim = buf_length; |
656 | } |
657 | DEBUG_PRINTF("next limit is %llu\n" , lim); |
658 | |
659 | assert(lim > prev_limit); |
660 | |
661 | dstate->active[i].limit = lim; |
662 | |
663 | if (dstate->active[i].curr[1].report != INVALID_REPORT) { |
664 | u32 next_trigger = dstate->active[i].curr[1].repeats + prev_limit; |
665 | lim = MIN(lim, next_trigger); |
666 | } |
667 | |
668 | DEBUG_PRINTF("next trigger for kilo at %llu\n" , lim); |
669 | |
670 | if (lim < buf_length) { |
671 | assert(dstate->pq_size <= m->kilo_count); |
672 | assert(lim >= prev_limit); |
673 | struct mpv_pq_item temp = { |
674 | .trigger_loc = lim, |
675 | .kilo = i |
676 | }; |
677 | |
678 | pq_replace_top(pq, dstate->pq_size, temp); |
679 | } else { |
680 | pq_pop(pq, dstate->pq_size); |
681 | dstate->pq_size--; |
682 | } |
683 | } |
684 | |
685 | static really_inline |
686 | void handle_events(const struct mpv *m, u8 *active, u8 *reporters, |
687 | struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, |
688 | u64a loc, const u8 *buf, size_t buf_length) { |
689 | const struct mpv_kilopuff *kp = (const void *)(m + 1); |
690 | |
691 | while (dstate->pq_size && pq_top(pq)->trigger_loc <= loc) { |
692 | assert(pq_top(pq)->trigger_loc == loc); |
693 | |
694 | u32 kilo = pq_top(pq)->kilo; |
695 | |
696 | DEBUG_PRINTF("pop for kilo %u at %llu\n" , kilo, |
697 | pq_top(pq)->trigger_loc); |
698 | |
699 | if (dstate->active[kilo].limit <= loc) { |
700 | if (!kp[kilo].auto_restart) { |
701 | killKilo(m, active, reporters, dstate, pq, kilo); |
702 | } else { |
703 | restartKilo(m, active, reporters, dstate, pq, buf, loc, |
704 | buf_length, kilo); |
705 | } |
706 | } else { |
707 | updateKiloChains(m, reporters, dstate, pq, loc, buf_length, kilo); |
708 | } |
709 | } |
710 | } |
711 | |
712 | static really_inline |
713 | u64a find_next_limit(const struct mpv *m, u8 *active, u8 *reporters, |
714 | struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, |
715 | const u8 *buf, u64a prev_limit, u64a ep, |
716 | size_t buf_length) { |
717 | u64a limit = ep; |
718 | |
719 | DEBUG_PRINTF("length %llu (prev %llu), pq %u\n" , limit, prev_limit, |
720 | dstate->pq_size); |
721 | |
722 | handle_events(m, active, reporters, dstate, pq, prev_limit, buf, |
723 | buf_length); |
724 | |
725 | if (dstate->pq_size) { |
726 | limit = MIN(pq_top(pq)->trigger_loc, limit); |
727 | assert(limit > prev_limit); |
728 | } |
729 | |
730 | DEBUG_PRINTF("limit now %llu\n" , limit); |
731 | return limit; |
732 | } |
733 | |
734 | static really_inline |
735 | char mpvExec(const struct mpv *m, u8 *active, u8 *reporters, |
736 | struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, |
737 | const u8 *buf, s64a start, size_t length, size_t buf_length, |
738 | u64a offsetAdj, NfaCallback cb, void *ctxt) { |
739 | DEBUG_PRINTF("running mpv (s %lliu, l %zu, o %llu)\n" , |
740 | *get_counter_n(dstate, m, 0) + dstate->counter_adj, length, |
741 | offsetAdj); |
742 | |
743 | u64a progress = start; /* progress is relative to buffer offsets */ |
744 | |
745 | while (progress < length) { |
746 | DEBUG_PRINTF("progress %llu\n" , progress); |
747 | |
748 | /* find next limit and update chains */ |
749 | u64a limit = find_next_limit(m, active, reporters, dstate, pq, buf, |
750 | progress, length, buf_length); |
751 | assert(limit != progress); |
752 | u64a incr = limit - progress; |
753 | DEBUG_PRINTF("incr = %llu\n" , incr); |
754 | |
755 | /* report matches upto next limit */ |
756 | char rv = processReportsForRange(m, reporters, dstate, |
757 | offsetAdj + progress, limit - progress, |
758 | cb, ctxt); |
759 | |
760 | if (rv != MO_CONTINUE_MATCHING) { |
761 | DEBUG_PRINTF("mpvExec done %llu/%zu\n" , progress, length); |
762 | return rv; |
763 | } |
764 | |
765 | dstate->counter_adj += incr; |
766 | progress = limit; |
767 | } |
768 | |
769 | assert(progress == length); |
770 | |
771 | DEBUG_PRINTF("mpvExec done\n" ); |
772 | return MO_CONTINUE_MATCHING; |
773 | } |
774 | |
775 | static really_inline |
776 | void mpvLoadState(struct mpv_decomp_state *out, const struct NFA *n, |
777 | const char *state) { |
778 | assert(16 >= sizeof(struct mpv_decomp_kilo)); |
779 | assert(sizeof(*out) <= n->scratchStateSize); |
780 | assert(ISALIGNED(out)); |
781 | |
782 | const struct mpv *m = getImplNfa(n); |
783 | const struct mpv_counter_info *counter_info = get_counter_info(m); |
784 | u64a *counters = get_counter_n(out, m, 0); |
785 | const char *comp_counter = state; |
786 | for (u32 i = 0; i < m->counter_count; i++) { |
787 | u32 counter_size = counter_info[i].counter_size; |
788 | counters[i] = partial_load_u64a(comp_counter, counter_size); |
789 | DEBUG_PRINTF("loaded %llu counter %u\n" , counters[i], i); |
790 | comp_counter += counter_size; |
791 | } |
792 | |
793 | out->filled = 0; /* _Q_i will fill limits, curr puffetes, and populate pq |
794 | * on first call */ |
795 | out->counter_adj = 0; |
796 | out->pq_size = 0; |
797 | |
798 | u8 *reporters = (u8 *)out + m->reporter_offset; |
799 | |
800 | mmbit_clear(reporters, m->kilo_count); |
801 | } |
802 | |
803 | static really_inline |
804 | void mpvStoreState(const struct NFA *n, char *state, |
805 | const struct mpv_decomp_state *in) { |
806 | assert(ISALIGNED(in)); |
807 | const struct mpv *m = getImplNfa(n); |
808 | const struct mpv_counter_info *counter_info = get_counter_info(m); |
809 | |
810 | const u64a *counters = (const u64a *)((const char *)in |
811 | + get_counter_info(m)[0].counter_offset); |
812 | u64a adj = in->counter_adj; |
813 | char *comp_counter = state; |
814 | for (u32 i = 0; i < m->counter_count; i++) { |
815 | /* clamp counter to allow storage in smaller ints */ |
816 | u64a curr_counter = MIN(counters[i] + adj, counter_info[i].max_counter); |
817 | |
818 | u32 counter_size = counter_info[i].counter_size; |
819 | partial_store_u64a(comp_counter, curr_counter, counter_size); |
820 | DEBUG_PRINTF("stored %llu counter %u (orig %llu)\n" , curr_counter, i, |
821 | counters[i]); |
822 | /* assert(counters[i] != MPV_DEAD_VALUE); /\* should have process 1 byte */ |
823 | /* * since a clear *\/ */ |
824 | comp_counter += counter_size; |
825 | } |
826 | } |
827 | |
828 | char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q, |
829 | UNUSED s64a loc) { |
830 | void *dest = q->streamState; |
831 | const void *src = q->state; |
832 | mpvStoreState(nfa, dest, src); |
833 | return 0; |
834 | } |
835 | |
836 | char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, |
837 | UNUSED u64a offset, UNUSED u8 key) { |
838 | mpvLoadState(dest, nfa, src); |
839 | return 0; |
840 | } |
841 | |
842 | char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q) { |
843 | const struct mpv *m = getImplNfa(n); |
844 | u64a offset = q_cur_offset(q); |
845 | struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; |
846 | |
847 | DEBUG_PRINTF("report current: offset %llu\n" , offset); |
848 | |
849 | u8 *active = (u8 *)q->streamState + m->active_offset; |
850 | u32 rl_count = 0; |
851 | ReportID *rl = get_report_list(m, s); |
852 | |
853 | processReports(m, active, s, s->counter_adj, offset, q->cb, q->context, rl, |
854 | &rl_count); |
855 | return 0; |
856 | } |
857 | |
858 | char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q) { |
859 | struct mpv_decomp_state *out = (void *)q->state; |
860 | const struct mpv *m = getImplNfa(n); |
861 | assert(sizeof(*out) <= n->scratchStateSize); |
862 | |
863 | DEBUG_PRINTF("queue init state\n" ); |
864 | |
865 | u64a *counters = get_counter_n(out, m, 0); |
866 | for (u32 i = 0; i < m->counter_count; i++) { |
867 | counters[i] = MPV_DEAD_VALUE; |
868 | } |
869 | |
870 | out->filled = 0; |
871 | out->counter_adj = 0; |
872 | out->pq_size = 0; |
873 | out->active[0].curr = NULL; |
874 | |
875 | assert(q->streamState); |
876 | u8 *active_kpuff = (u8 *)q->streamState + m->active_offset; |
877 | u8 *reporters = (u8 *)q->state + m->reporter_offset; |
878 | mmbit_clear(active_kpuff, m->kilo_count); |
879 | mmbit_clear(reporters, m->kilo_count); |
880 | return 0; |
881 | } |
882 | |
883 | char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset, |
884 | void *state, UNUSED u8 key) { |
885 | const struct mpv *m = getImplNfa(n); |
886 | memset(state, 0, m->active_offset); /* active_offset marks end of comp |
887 | * counters */ |
888 | u8 *active_kpuff = (u8 *)state + m->active_offset; |
889 | if (!offset) { |
890 | mmbit_init_range(active_kpuff, m->kilo_count, m->top_kilo_begin, |
891 | m->top_kilo_end); |
892 | return 1; |
893 | } else { |
894 | return 0; |
895 | } |
896 | } |
897 | |
898 | static really_inline |
899 | char nfaExecMpv_Q_i(const struct NFA *n, struct mq *q, s64a end) { |
900 | u64a offset = q->offset; |
901 | const u8 *buffer = q->buffer; |
902 | size_t length = q->length; |
903 | NfaCallback cb = q->cb; |
904 | void *context = q->context; |
905 | s64a sp; |
906 | const struct mpv *m = getImplNfa(n); |
907 | struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; |
908 | u8 *active = (u8 *)q->streamState + m->active_offset; |
909 | u8 *reporters = (u8 *)q->state + m->reporter_offset; |
910 | struct mpv_pq_item *pq = (struct mpv_pq_item *)(q->state + m->pq_offset); |
911 | |
912 | if (!s->filled) { |
913 | fillLimits(m, active, reporters, s, pq, q->buffer, q->length); |
914 | } |
915 | |
916 | assert(!q->report_current); |
917 | |
918 | if (q->cur == q->end) { |
919 | return 1; |
920 | } |
921 | |
922 | assert(q->cur + 1 < q->end); /* require at least two items */ |
923 | |
924 | assert(q_cur_type(q) == MQE_START); |
925 | assert(q_cur_loc(q) >= 0); |
926 | sp = q->items[q->cur].location; |
927 | q->cur++; |
928 | |
929 | if (q->items[q->cur - 1].location > end) { |
930 | /* this is as far as we go */ |
931 | q->cur--; |
932 | q->items[q->cur].type = MQE_START; |
933 | q->items[q->cur].location = end; |
934 | return MO_ALIVE; |
935 | } |
936 | |
937 | while (q->cur < q->end) { |
938 | s64a ep = q->items[q->cur].location; |
939 | |
940 | ep = MIN(ep, end); |
941 | |
942 | assert(ep >= sp); |
943 | |
944 | assert(sp >= 0); /* mpv should be an outfix; outfixes are not lazy */ |
945 | |
946 | if (sp >= ep) { |
947 | goto scan_done; |
948 | } |
949 | |
950 | /* do main buffer region */ |
951 | assert((u64a)ep <= length); |
952 | char rv = mpvExec(m, active, reporters, s, pq, buffer, sp, ep, length, |
953 | offset, cb, context); |
954 | if (rv == MO_HALT_MATCHING) { |
955 | q->cur = q->end; |
956 | return 0; |
957 | } |
958 | |
959 | scan_done: |
960 | if (q->items[q->cur].location > end) { |
961 | /* this is as far as we go */ |
962 | q->cur--; |
963 | q->items[q->cur].type = MQE_START; |
964 | q->items[q->cur].location = end; |
965 | return MO_ALIVE; |
966 | } |
967 | |
968 | sp = ep; |
969 | |
970 | switch (q->items[q->cur].type) { |
971 | case MQE_TOP: |
972 | DEBUG_PRINTF("top %u %u\n" , m->top_kilo_begin, m->top_kilo_end); |
973 | /* MQE_TOP initialise all counters to 0; activates all kilos */ |
974 | { |
975 | u64a *counters = get_counter_n(s, m, 0); |
976 | assert(counters[0] == MPV_DEAD_VALUE); |
977 | assert(!s->counter_adj); |
978 | for (u32 i = 0; i < m->counter_count; i++) { |
979 | counters[i] = 0; |
980 | } |
981 | mmbit_init_range(active, m->kilo_count, m->top_kilo_begin, |
982 | m->top_kilo_end); |
983 | fillLimits(m, active, reporters, s, pq, buffer, length); |
984 | } |
985 | break; |
986 | case MQE_START: |
987 | case MQE_END: |
988 | break; |
989 | default: |
990 | /* MQE_TOP_N --> switch on kilo puff N */ |
991 | assert(q->items[q->cur].type >= MQE_TOP_FIRST); |
992 | assert(q->items[q->cur].type < MQE_INVALID); |
993 | u32 i = q->items[q->cur].type - MQE_TOP_FIRST; |
994 | handleTopN(m, sp, active, reporters, s, pq, buffer, length, i); |
995 | break; |
996 | } |
997 | |
998 | q->cur++; |
999 | } |
1000 | |
1001 | char alive = 0; |
1002 | assert(q->items[q->cur - 1].type == MQE_END); |
1003 | if (q->items[q->cur - 1].location == (s64a)q->length) { |
1004 | normalize_counters(s, m); |
1005 | |
1006 | const struct mpv_kilopuff *kp = (const struct mpv_kilopuff *)(m + 1); |
1007 | for (u32 i = mmbit_iterate(active, m->kilo_count, MMB_INVALID); |
1008 | i != MMB_INVALID; i = mmbit_iterate(active, m->kilo_count, i)) { |
1009 | if (*get_counter_for_kilo(s, &kp[i]) >= kp[i].dead_point) { |
1010 | mmbit_unset(active, m->kilo_count, i); |
1011 | } else { |
1012 | alive = 1; |
1013 | } |
1014 | } |
1015 | } else { |
1016 | alive |
1017 | = mmbit_iterate(active, m->kilo_count, MMB_INVALID) != MMB_INVALID; |
1018 | } |
1019 | |
1020 | DEBUG_PRINTF("finished %d\n" , (int)alive); |
1021 | return alive; |
1022 | } |
1023 | |
1024 | char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end) { |
1025 | DEBUG_PRINTF("_Q %lld\n" , end); |
1026 | return nfaExecMpv_Q_i(n, q, end); |
1027 | } |
1028 | |
1029 | s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { |
1030 | DEBUG_PRINTF("nfa=%p end=%lld\n" , nfa, end); |
1031 | #ifdef DEBUG |
1032 | debugQueue(q); |
1033 | #endif |
1034 | |
1035 | assert(nfa->type == MPV_NFA); |
1036 | assert(q && q->context && q->state); |
1037 | assert(end >= 0); |
1038 | assert(q->cur < q->end); |
1039 | assert(q->end <= MAX_MQE_LEN); |
1040 | assert(ISALIGNED_16(nfa) && ISALIGNED_16(getImplNfa(nfa))); |
1041 | assert(end < q->items[q->end - 1].location |
1042 | || q->items[q->end - 1].type == MQE_END); |
1043 | |
1044 | if (q->items[q->cur].location > end) { |
1045 | return 1; |
1046 | } |
1047 | |
1048 | char q_trimmed = 0; |
1049 | |
1050 | assert(end <= (s64a)q->length || !q->hlength); |
1051 | /* due to reverse accel in block mode some queues may work on a truncated |
1052 | * buffer */ |
1053 | if (end > (s64a)q->length) { |
1054 | end = q->length; |
1055 | q_trimmed = 1; |
1056 | } |
1057 | |
1058 | /* TODO: restore max offset stuff, if/when _interesting_ max offset stuff |
1059 | * is filled in */ |
1060 | |
1061 | char rv = nfaExecMpv_Q_i(nfa, q, end); |
1062 | |
1063 | assert(!q->report_current); |
1064 | DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n" , rv, q_trimmed); |
1065 | if (q_trimmed || !rv) { |
1066 | return 0; |
1067 | } else { |
1068 | const struct mpv *m = getImplNfa(nfa); |
1069 | u8 *reporters = (u8 *)q->state + m->reporter_offset; |
1070 | |
1071 | if (mmbit_any_precise(reporters, m->kilo_count)) { |
1072 | DEBUG_PRINTF("next byte\n" ); |
1073 | return 1; /* need to match at next byte */ |
1074 | } else { |
1075 | s64a next_event = q->length; |
1076 | s64a next_pq = q->length; |
1077 | |
1078 | if (q->cur < q->end) { |
1079 | next_event = q->items[q->cur].location; |
1080 | } |
1081 | |
1082 | struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; |
1083 | struct mpv_pq_item *pq |
1084 | = (struct mpv_pq_item *)(q->state + m->pq_offset); |
1085 | if (s->pq_size) { |
1086 | next_pq = pq_top(pq)->trigger_loc; |
1087 | } |
1088 | |
1089 | assert(next_event); |
1090 | assert(next_pq); |
1091 | |
1092 | DEBUG_PRINTF("next pq %lld event %lld\n" , next_pq, next_event); |
1093 | return MIN(next_pq, next_event); |
1094 | } |
1095 | } |
1096 | } |
1097 | |