1/*
2 * Copyright (c) 2015-2016, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief SOM runtime code.
31 *
32 *
33 * Runtime code for SOM handling called by the Rose callback adaptors.
34 *
35 * Note:
36 * Races between escapes making a som loc writeable and attempts to write to it
37 * at the same to_offset are always resolved as if the escape arrived first
38 * and then the request to write to that location.
39 */
40
41#include "hs_internal.h"
42#include "som_operation.h"
43#include "som_runtime.h"
44#include "scratch.h"
45#include "ue2common.h"
46#include "rose/rose_internal.h"
47#include "nfa/nfa_api.h"
48#include "nfa/nfa_internal.h"
49#include "util/fatbit.h"
50#include "util/multibit.h"
51
52static really_inline
53void setSomLoc(struct fatbit *som_set_now, u64a *som_store, u32 som_store_count,
54 const struct som_operation *ri, u64a to_offset) {
55 /* validity handled by callers */
56 assert(to_offset >= ri->aux.somDistance);
57 u64a start_offset = to_offset - ri->aux.somDistance;
58 u32 som_loc = ri->onmatch;
59
60 /* resolve any races for matches at this point in favour of the earliest som
61 */
62 if (!fatbit_set(som_set_now, som_store_count, som_loc)) {
63 som_store[som_loc] = start_offset;
64 } else {
65 LIMIT_TO_AT_MOST(&som_store[som_loc], start_offset);
66 }
67
68 DEBUG_PRINTF("som_store[%u] set to %llu\n", som_loc, som_store[som_loc]);
69}
70
71static really_inline
72char ok_and_mark_if_write(u8 *som_store_valid, struct fatbit *som_set_now,
73 u8 *som_store_writable, u32 som_store_count,
74 u32 loc) {
75 return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */
76 || fatbit_isset(som_set_now, som_store_count, loc) /* write here, need
77 * to resolve race */
78 || mmbit_isset(som_store_writable, som_store_count, loc); /* writable */
79}
80
81static really_inline
82char ok_and_mark_if_unset(u8 *som_store_valid, struct fatbit *som_set_now,
83 u32 som_store_count, u32 loc) {
84 return !mmbit_set(som_store_valid, som_store_count, loc) /* unwritten */
85 || fatbit_isset(som_set_now, som_store_count, loc); /* write here, need
86 * to resolve race */
87}
88
89static
90int somRevCallback(UNUSED u64a start, u64a end, ReportID id, void *ctx) {
91 DEBUG_PRINTF("offset=%llu, id=%u\n", end, id);
92
93 // We use the id to store the offset adjustment (for assertions like a
94 // leading \b or multiline mode).
95 assert(id <= 1);
96 u64a *from_offset = ctx;
97 LIMIT_TO_AT_MOST(from_offset, end + id);
98 return 1; // continue matching.
99}
100
101static really_inline
102const struct NFA *getSomRevNFA(const struct RoseEngine *t, u32 i) {
103 assert(t->somRevOffsetOffset);
104 const u32 *rev_offsets
105 = (const u32 *)((const u8 *)t + t->somRevOffsetOffset);
106 u32 nfa_offset = rev_offsets[i];
107 assert(nfa_offset && nfa_offset < t->size);
108 const struct NFA *n = (const struct NFA *)(((const u8 *)t + nfa_offset));
109 assert(ISALIGNED(n));
110
111 return n;
112}
113
114static
115void runRevNfa(struct hs_scratch *scratch, const struct som_operation *ri,
116 const u64a to_offset, u64a *from_offset) {
117 struct core_info *ci = &scratch->core_info;
118
119 DEBUG_PRINTF("buf has %zu bytes total, history has %zu\n",
120 ci->len, ci->hlen);
121
122 u32 nfa_idx = ri->aux.revNfaIndex;
123 DEBUG_PRINTF("run rev nfa %u from to_offset=%llu\n", nfa_idx, to_offset);
124 const struct NFA *nfa = getSomRevNFA(ci->rose, nfa_idx);
125
126 assert(nfa->maxWidth); // No inf width rev NFAs.
127
128 size_t buf_bytes = to_offset - ci->buf_offset;
129 size_t history_bytes = ci->hlen;
130
131 DEBUG_PRINTF("nfa min/max widths [%u,%u], %zu in buffer, %zu in history\n",
132 nfa->minWidth, nfa->maxWidth, buf_bytes, history_bytes);
133 assert(nfa->minWidth <= buf_bytes + history_bytes);
134
135 const u8 *buf = ci->buf;
136 const u8 *hbuf = ci->hbuf;
137
138 // Work out if we need to scan any history as well.
139 if (history_bytes && buf_bytes < nfa->maxWidth) {
140 assert(hbuf);
141 size_t remainder = nfa->maxWidth - buf_bytes;
142 if (remainder < history_bytes) {
143 hbuf += history_bytes - remainder;
144 history_bytes = remainder;
145 }
146 }
147
148 DEBUG_PRINTF("scanning %zu from buffer and %zu from history\n", buf_bytes,
149 history_bytes);
150
151 *from_offset = to_offset;
152
153 nfaBlockExecReverse(nfa, to_offset, buf, buf_bytes, hbuf, history_bytes,
154 somRevCallback, from_offset);
155
156 assert(*from_offset <= to_offset);
157}
158
159static really_inline
160void setSomLocRevNfa(struct hs_scratch *scratch, struct fatbit *som_set_now,
161 u64a *som_store, u32 som_store_count,
162 const struct som_operation *ri, u64a to_offset) {
163 /* validity handled by callers */
164 u64a from_offset = 0;
165 runRevNfa(scratch, ri, to_offset, &from_offset);
166
167 u32 som_loc = ri->onmatch;
168
169 /* resolve any races for matches at this point in favour of the earliest som
170 */
171 if (!fatbit_set(som_set_now, som_store_count, som_loc)) {
172 som_store[som_loc] = from_offset;
173 } else {
174 LIMIT_TO_AT_MOST(&som_store[som_loc], from_offset);
175 }
176
177 DEBUG_PRINTF("som_store[%u] set to %llu\n", som_loc, som_store[som_loc]);
178}
179
180void handleSomInternal(struct hs_scratch *scratch,
181 const struct som_operation *ri, const u64a to_offset) {
182 assert(scratch);
183 assert(ri);
184 DEBUG_PRINTF("-->som action required at %llu\n", to_offset);
185
186 // SOM handling at scan time operates on data held in scratch. In
187 // streaming mode, this data is read from / written out to stream state at
188 // stream write boundaries.
189
190 struct core_info *ci = &scratch->core_info;
191 const struct RoseEngine *rose = ci->rose;
192 assert(rose->hasSom);
193
194 const u32 som_store_count = rose->somLocationCount;
195 u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
196 u8 *som_store_writable = (u8 *)ci->state + rose->stateOffsets.somWritable;
197 struct fatbit *som_set_now = scratch->som_set_now;
198 struct fatbit *som_attempted_set = scratch->som_attempted_set;
199 u64a *som_store = scratch->som_store;
200 u64a *som_failed_store = scratch->som_attempted_store;
201
202 if (to_offset != scratch->som_set_now_offset) {
203 assert(scratch->som_set_now_offset == ~0ULL
204 || to_offset > scratch->som_set_now_offset);
205 DEBUG_PRINTF("setting som_set_now_offset=%llu\n", to_offset);
206 fatbit_clear(som_set_now);
207 fatbit_clear(som_attempted_set);
208 scratch->som_set_now_offset = to_offset;
209 }
210
211 switch (ri->type) {
212 case SOM_INTERNAL_LOC_SET:
213 DEBUG_PRINTF("SOM_INTERNAL_LOC_SET\n");
214 mmbit_set(som_store_valid, som_store_count, ri->onmatch);
215 setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
216 return;
217 case SOM_INTERNAL_LOC_SET_IF_UNSET:
218 DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_IF_UNSET\n");
219 if (ok_and_mark_if_unset(som_store_valid, som_set_now, som_store_count,
220 ri->onmatch)) {
221 setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
222 }
223 return;
224 case SOM_INTERNAL_LOC_SET_IF_WRITABLE: {
225 u32 slot = ri->onmatch;
226 DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_IF_WRITABLE\n");
227 if (ok_and_mark_if_write(som_store_valid, som_set_now,
228 som_store_writable, som_store_count, slot)) {
229 setSomLoc(som_set_now, som_store, som_store_count, ri, to_offset);
230 mmbit_unset(som_store_writable, som_store_count, slot);
231 } else {
232 /* not writable, stash as an attempted write in case we are
233 * racing our escape. */
234 DEBUG_PRINTF("not writable, stashing attempt\n");
235 assert(to_offset >= ri->aux.somDistance);
236 u64a start_offset = to_offset - ri->aux.somDistance;
237
238 if (!fatbit_set(som_attempted_set, som_store_count, slot)) {
239 som_failed_store[slot] = start_offset;
240 } else {
241 LIMIT_TO_AT_MOST(&som_failed_store[slot], start_offset);
242 }
243 DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot,
244 som_failed_store[slot]);
245 }
246 return;
247 }
248 case SOM_INTERNAL_LOC_SET_REV_NFA:
249 DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_REV_NFA\n");
250 mmbit_set(som_store_valid, som_store_count, ri->onmatch);
251 setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count, ri,
252 to_offset);
253 return;
254 case SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET:
255 DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET\n");
256 if (ok_and_mark_if_unset(som_store_valid, som_set_now, som_store_count,
257 ri->onmatch)) {
258 setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count,
259 ri, to_offset);
260 }
261 return;
262 case SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE: {
263 u32 slot = ri->onmatch;
264 DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_IF_WRITABLE\n");
265 if (ok_and_mark_if_write(som_store_valid, som_set_now,
266 som_store_writable, som_store_count, slot)) {
267 setSomLocRevNfa(scratch, som_set_now, som_store, som_store_count,
268 ri, to_offset);
269 mmbit_unset(som_store_writable, som_store_count, slot);
270 } else {
271 /* not writable, stash as an attempted write in case we are
272 * racing our escape. */
273 DEBUG_PRINTF("not writable, stashing attempt\n");
274
275 u64a from_offset = 0;
276 runRevNfa(scratch, ri, to_offset, &from_offset);
277
278 if (!fatbit_set(som_attempted_set, som_store_count, slot)) {
279 som_failed_store[slot] = from_offset;
280 } else {
281 LIMIT_TO_AT_MOST(&som_failed_store[slot], from_offset);
282 }
283 DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot,
284 som_failed_store[slot]);
285 }
286 return;
287 }
288 case SOM_INTERNAL_LOC_COPY: {
289 u32 slot_in = ri->aux.somDistance;
290 u32 slot_out = ri->onmatch;
291 DEBUG_PRINTF("SOM_INTERNAL_LOC_COPY S[%u] = S[%u]\n", slot_out,
292 slot_in);
293 assert(mmbit_isset(som_store_valid, som_store_count, slot_in));
294 mmbit_set(som_store_valid, som_store_count, slot_out);
295 fatbit_set(som_set_now, som_store_count, slot_out);
296 som_store[slot_out] = som_store[slot_in];
297
298 return;
299 }
300 case SOM_INTERNAL_LOC_COPY_IF_WRITABLE: {
301 u32 slot_in = ri->aux.somDistance;
302 u32 slot_out = ri->onmatch;
303 DEBUG_PRINTF("SOM_INTERNAL_LOC_COPY_IF_WRITABLE S[%u] = S[%u]\n",
304 slot_out, slot_in);
305 assert(mmbit_isset(som_store_valid, som_store_count, slot_in));
306 if (ok_and_mark_if_write(som_store_valid, som_set_now,
307 som_store_writable, som_store_count,
308 slot_out)) {
309 DEBUG_PRINTF("copy, set som_store[%u]=%llu\n", slot_out,
310 som_store[slot_in]);
311 som_store[slot_out] = som_store[slot_in];
312 fatbit_set(som_set_now, som_store_count, slot_out);
313 mmbit_unset(som_store_writable, som_store_count, slot_out);
314 } else {
315 /* not writable, stash as an attempted write in case we are
316 * racing our escape */
317 DEBUG_PRINTF("not writable, stashing attempt\n");
318 fatbit_set(som_attempted_set, som_store_count, slot_out);
319 som_failed_store[slot_out] = som_store[slot_in];
320 DEBUG_PRINTF("som_failed_store[%u] = %llu\n", slot_out,
321 som_failed_store[slot_out]);
322 }
323 return;
324 }
325 case SOM_INTERNAL_LOC_MAKE_WRITABLE: {
326 u32 slot = ri->onmatch;
327 DEBUG_PRINTF("SOM_INTERNAL_LOC_MAKE_WRITABLE\n");
328 /* if just written to the loc, ignore the racing escape */
329 if (fatbit_isset(som_set_now, som_store_count, slot)) {
330 DEBUG_PRINTF("just written\n");
331 return;
332 }
333 if (fatbit_isset(som_attempted_set, som_store_count, slot)) {
334 /* writes were waiting for an escape to arrive */
335 DEBUG_PRINTF("setting som_store[%u] = %llu from "
336 "som_failed_store[%u]\n", slot, som_failed_store[slot],
337 slot);
338 som_store[slot] = som_failed_store[slot];
339 fatbit_set(som_set_now, som_store_count, slot);
340 return;
341 }
342 mmbit_set(som_store_writable, som_store_count, slot);
343 return;
344 }
345 default:
346 DEBUG_PRINTF("unknown report type!\n");
347 break;
348 }
349
350 // All valid som_operation types should be handled and returned above.
351 assert(0);
352 return;
353}
354
355// Returns the SOM offset.
356u64a handleSomExternal(struct hs_scratch *scratch,
357 const struct som_operation *ri,
358 const u64a to_offset) {
359 assert(scratch);
360 assert(ri);
361
362 // SOM handling at scan time operates on data held in scratch. In
363 // streaming mode, this data is read from / written out to stream state at
364 // stream write boundaries.
365
366 struct core_info *ci = &scratch->core_info;
367 const struct RoseEngine *rose = ci->rose;
368 assert(rose->hasSom);
369
370 switch (ri->type) {
371 case SOM_EXTERNAL_CALLBACK_REL:
372 DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_REL: som is %llu chars back\n",
373 ri->aux.somDistance);
374 assert(to_offset >= ri->aux.somDistance);
375 return to_offset - ri->aux.somDistance;
376 case SOM_EXTERNAL_CALLBACK_ABS:
377 DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_ABS: som is at %llu\n",
378 ri->aux.somDistance);
379 assert(to_offset >= ri->aux.somDistance);
380 return ri->aux.somDistance;
381 case SOM_EXTERNAL_CALLBACK_STORED: {
382 const u64a *som_store = scratch->som_store;
383 u32 slot = ri->aux.somDistance;
384 DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_STORED: <- som_store[%u]=%llu\n",
385 slot, som_store[slot]);
386
387 UNUSED const u32 som_store_count = rose->somLocationCount;
388 UNUSED const u8 *som_store_valid = (u8 *)ci->state
389 + rose->stateOffsets.somValid;
390
391 assert(mmbit_isset(som_store_valid, som_store_count, slot));
392 return som_store[slot];
393 }
394 case SOM_EXTERNAL_CALLBACK_REV_NFA: {
395 DEBUG_PRINTF("SOM_EXTERNAL_CALLBACK_REV_NFA\n");
396 u64a from_offset = 0;
397 runRevNfa(scratch, ri, to_offset, &from_offset);
398 return from_offset;
399 }
400 default:
401 DEBUG_PRINTF("unknown report type!\n");
402 break;
403 }
404
405 // All valid som_operation types should be handled and returned above.
406 assert(0);
407 return 0;
408}
409
410void setSomFromSomAware(struct hs_scratch *scratch,
411 const struct som_operation *ri, u64a from_offset,
412 u64a to_offset) {
413 assert(scratch);
414 assert(ri);
415 assert(to_offset);
416 assert(ri->type == SOM_INTERNAL_LOC_SET_FROM
417 || ri->type == SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE);
418
419 struct core_info *ci = &scratch->core_info;
420 const struct RoseEngine *rose = ci->rose;
421 assert(rose->hasSom);
422
423 const u32 som_store_count = rose->somLocationCount;
424 u8 *som_store_valid = (u8 *)ci->state + rose->stateOffsets.somValid;
425 u8 *som_store_writable = (u8 *)ci->state + rose->stateOffsets.somWritable;
426 struct fatbit *som_set_now = scratch->som_set_now;
427 struct fatbit *som_attempted_set = scratch->som_attempted_set;
428 u64a *som_store = scratch->som_store;
429 u64a *som_failed_store = scratch->som_attempted_store;
430
431 if (to_offset != scratch->som_set_now_offset) {
432 DEBUG_PRINTF("setting som_set_now_offset=%llu\n", to_offset);
433 fatbit_clear(som_set_now);
434 fatbit_clear(som_attempted_set);
435 scratch->som_set_now_offset = to_offset;
436 }
437
438 if (ri->type == SOM_INTERNAL_LOC_SET_FROM) {
439 DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_FROM\n");
440 mmbit_set(som_store_valid, som_store_count, ri->onmatch);
441 setSomLoc(som_set_now, som_store, som_store_count, ri, from_offset);
442 } else {
443 DEBUG_PRINTF("SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE\n");
444 if (ok_and_mark_if_write(som_store_valid, som_set_now,
445 som_store_writable, som_store_count,
446 ri->onmatch)) {
447 setSomLoc(som_set_now, som_store, som_store_count, ri, from_offset);
448 mmbit_unset(som_store_writable, som_store_count, ri->onmatch);
449 } else {
450 /* not writable, stash as an attempted write in case we are
451 * racing our escape. */
452 DEBUG_PRINTF("not writable, stashing attempt\n");
453 assert(to_offset >= ri->aux.somDistance);
454 u32 som_loc = ri->onmatch;
455
456 if (!fatbit_set(som_attempted_set, som_store_count, ri->onmatch)) {
457 som_failed_store[som_loc] = from_offset;
458 } else {
459 LIMIT_TO_AT_MOST(&som_failed_store[som_loc], from_offset);
460 }
461 DEBUG_PRINTF("som_failed_store[%u] = %llu\n", som_loc,
462 som_failed_store[som_loc]);
463 }
464 }
465}
466
467static really_inline
468int clearSomLog(struct hs_scratch *scratch, u64a offset, struct fatbit *log,
469 const u64a *starts) {
470 DEBUG_PRINTF("at %llu\n", offset);
471 struct core_info *ci = &scratch->core_info;
472 const struct RoseEngine *rose = ci->rose;
473 const u32 dkeyCount = rose->dkeyCount;
474 const u32 *dkey_to_report = (const u32 *)
475 ((const char *)rose + rose->invDkeyOffset);
476 u32 flags = 0;
477#ifndef RELEASE_BUILD
478 if (scratch->deduper.current_report_offset != offset) {
479 flags |= HS_MATCH_FLAG_ADJUSTED;
480 }
481#endif
482
483 for (u32 it = fatbit_iterate(log, dkeyCount, MMB_INVALID);
484 it != MMB_INVALID; it = fatbit_iterate(log, dkeyCount, it)) {
485 u64a from_offset = starts[it];
486 u32 onmatch = dkey_to_report[it];
487 int halt = ci->userCallback(onmatch, from_offset, offset, flags,
488 ci->userContext);
489 if (halt) {
490 ci->status |= STATUS_TERMINATED;
491 return 1;
492 }
493 }
494 fatbit_clear(log);
495 return 0;
496}
497
498int flushStoredSomMatches_i(struct hs_scratch *scratch, u64a offset) {
499 DEBUG_PRINTF("flush som matches\n");
500 int halt = 0;
501
502 assert(!told_to_stop_matching(scratch));
503
504 if (scratch->deduper.current_report_offset == ~0ULL) {
505 /* no matches recorded yet; just need to clear the logs */
506 fatbit_clear(scratch->deduper.som_log[0]);
507 fatbit_clear(scratch->deduper.som_log[1]);
508 scratch->deduper.som_log_dirty = 0;
509 return 0;
510 }
511
512 /* fire any reports from the logs and clear them */
513 if (offset == scratch->deduper.current_report_offset + 1) {
514 struct fatbit *done_log = scratch->deduper.som_log[offset % 2];
515 u64a *done_starts = scratch->deduper.som_start_log[offset % 2];
516
517 halt = clearSomLog(scratch, scratch->deduper.current_report_offset - 1,
518 done_log, done_starts);
519 scratch->deduper.som_log_dirty >>= 1;
520 } else {
521 /* need to report both logs */
522 u64a f_offset = scratch->deduper.current_report_offset - 1;
523 u64a s_offset = scratch->deduper.current_report_offset;
524 struct fatbit *first_log = scratch->deduper.som_log[f_offset % 2];
525 u64a *first_starts = scratch->deduper.som_start_log[f_offset % 2];
526 struct fatbit *second_log = scratch->deduper.som_log[s_offset % 2];
527 u64a *second_starts = scratch->deduper.som_start_log[s_offset % 2];
528
529 halt = clearSomLog(scratch, f_offset, first_log, first_starts) ||
530 clearSomLog(scratch, s_offset, second_log, second_starts);
531 scratch->deduper.som_log_dirty = 0;
532 }
533
534 return halt;
535}
536