1/*
2 * Copyright (c) 2015-2016, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Large Bounded Repeat (LBR) engine: runtime impl X-macros.
31 */
32
33#include "util/join.h"
34
35#define ENGINE_EXEC_NAME JOIN(nfaExecLbr, ENGINE_ROOT_NAME)
36#define EXEC_FN JOIN(lbrExec, ENGINE_ROOT_NAME)
37#define FWDSCAN_FN JOIN(lbrFwdScan, ENGINE_ROOT_NAME)
38#define REVSCAN_FN JOIN(lbrRevScan, ENGINE_ROOT_NAME)
39
40char JOIN(ENGINE_EXEC_NAME, _queueCompressState)(const struct NFA *nfa,
41 const struct mq *q, s64a loc) {
42 assert(nfa && q);
43 assert(isLbrType(nfa->type));
44 DEBUG_PRINTF("entry, q->offset=%llu, loc=%lld\n", q->offset, loc);
45
46 const struct lbr_common *l = getImplNfa(nfa);
47 const struct lbr_state *lstate = (const struct lbr_state *)q->state;
48
49 u64a offset = q->offset + loc;
50 lbrCompressState(l, offset, lstate, q->streamState);
51 return 0;
52}
53
54char JOIN(ENGINE_EXEC_NAME, _expandState)(const struct NFA *nfa, void *dest,
55 const void *src, u64a offset,
56 UNUSED u8 key) {
57 assert(nfa);
58 assert(isLbrType(nfa->type));
59 DEBUG_PRINTF("entry, offset=%llu\n", offset);
60
61 const struct lbr_common *l = getImplNfa(nfa);
62 struct lbr_state *lstate = (struct lbr_state *)dest;
63 lbrExpandState(l, offset, src, lstate);
64 return 0;
65}
66
67char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa,
68 struct mq *q) {
69 assert(nfa && q);
70 assert(isLbrType(nfa->type));
71
72 const struct lbr_common *l = getImplNfa(nfa);
73 u64a offset = q_cur_offset(q);
74 DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset);
75 q->cb(0, offset, l->report, q->context);
76 return 0;
77}
78
79char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa,
80 ReportID report, struct mq *q) {
81 assert(nfa && q);
82 assert(isLbrType(nfa->type));
83 DEBUG_PRINTF("entry\n");
84
85 const struct lbr_common *l = getImplNfa(nfa);
86 const struct RepeatInfo *info = getRepeatInfo(l);
87 const struct lbr_state *lstate = (const struct lbr_state *)q->state;
88 if (repeatIsDead(info, lstate)) {
89 DEBUG_PRINTF("repeat is dead\n");
90 return 0;
91 }
92
93 u64a offset = q->offset + q_last_loc(q);
94 return lbrInAccept(l, lstate, q->streamState, offset, report);
95}
96
97char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
98 assert(nfa && q);
99 assert(isLbrType(nfa->type));
100 DEBUG_PRINTF("entry\n");
101
102 const struct lbr_common *l = getImplNfa(nfa);
103 return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q);
104}
105
106char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
107 struct mq *q) {
108 assert(nfa && q);
109 assert(isLbrType(nfa->type));
110 DEBUG_PRINTF("entry\n");
111
112 const struct lbr_common *l = getImplNfa(nfa);
113 const struct RepeatInfo *info = getRepeatInfo(l);
114
115 assert(q->state);
116 struct lbr_state *lstate = (struct lbr_state *)q->state;
117 assert(ISALIGNED(lstate));
118
119 lstate->lastEscape = 0;
120 clearRepeat(info, lstate);
121
122 return 0;
123}
124
125char JOIN(ENGINE_EXEC_NAME, _initCompressedState)(const struct NFA *nfa,
126 u64a offset,
127 void *state, UNUSED u8 key) {
128 assert(nfa && state);
129 assert(isLbrType(nfa->type));
130 DEBUG_PRINTF("entry\n");
131
132 const struct lbr_common *l = getImplNfa(nfa);
133 const struct RepeatInfo *info = getRepeatInfo(l);
134 struct lbr_state lstate; // temp control block on stack.
135 clearRepeat(info, &lstate);
136 lbrTop(l, &lstate, state, offset);
137 lbrCompressState(l, offset, &lstate, state);
138
139 return 1; // LBR is alive
140}
141
142// FIXME: this function could be much simpler for a Dot LBR, as all it needs to
143// do is find the next top.
144static really_inline
145char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q,
146 s64a end) {
147 const struct lbr_common *l = getImplNfa(nfa);
148 const struct RepeatInfo *info = getRepeatInfo(l);
149
150 const u64a offset = q->offset;
151 struct lbr_state *lstate = (struct lbr_state *)q->state;
152 assert(ISALIGNED(lstate));
153
154 assert(repeatIsDead(info, lstate));
155 assert(q->cur < q->end);
156
157 DEBUG_PRINTF("entry, end=%lld, offset=%llu, lastEscape=%llu\n", end,
158 offset, lstate->lastEscape);
159
160 while (1) {
161 // Find the next top with location >= the last escape we saw.
162 for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) {
163 u32 event = q_cur_type(q);
164 if ((event == MQE_TOP || event == MQE_TOP_FIRST) &&
165 q_cur_offset(q) >= lstate->lastEscape) {
166 goto found_top;
167 }
168 DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q));
169 }
170
171 // No more tops, we're done.
172 break;
173
174found_top:;
175 assert(q->cur < q->end);
176
177 u64a sp = q_cur_offset(q);
178 u64a first_match = sp + info->repeatMin;
179 DEBUG_PRINTF("first possible match is at %llu\n", first_match);
180
181 u64a ep = MIN(MIN(end, (s64a)q->length) + offset, first_match);
182 if (ep > sp && sp >= offset) {
183 size_t eloc;
184 DEBUG_PRINTF("rev b%llu e%llu/%zu\n", sp - offset, ep - offset,
185 q->length);
186 assert(ep - offset <= q->length);
187 if (REVSCAN_FN(nfa, q->buffer, sp - offset, ep - offset, &eloc)) {
188 DEBUG_PRINTF("escape found at %llu\n", offset + eloc);
189 lstate->lastEscape = eloc;
190 q->cur++;
191 continue;
192 }
193 }
194
195 lbrTop(l, lstate, q->streamState, sp);
196 return 1;
197 }
198
199 DEBUG_PRINTF("exhausted queue\n");
200 return 0;
201}
202
203static really_inline
204char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q,
205 s64a end, enum MatchMode mode) {
206 assert(nfa && q);
207 assert(isLbrType(nfa->type));
208
209 const struct lbr_common *l = getImplNfa(nfa);
210 const struct RepeatInfo *info = getRepeatInfo(l);
211
212 struct lbr_state *lstate = (struct lbr_state *)q->state;
213 assert(ISALIGNED(lstate));
214
215
216 if (q->report_current) {
217 DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q));
218 int rv = q->cb(0, q_cur_offset(q), l->report, q->context);
219 q->report_current = 0;
220 if (rv == MO_HALT_MATCHING) {
221 return MO_HALT_MATCHING;
222 }
223 }
224
225 if (q->cur == q->end) {
226 return 1;
227 }
228
229 assert(q->cur + 1 < q->end); /* require at least two items */
230 assert(q_cur_type(q) == MQE_START);
231 u64a sp = q_cur_offset(q);
232 q->cur++;
233 DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset);
234
235 while (q->cur < q->end) {
236 DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
237 q_cur_offset(q));
238
239 assert(sp >= q->offset); // not in history
240
241 if (repeatIsDead(info, lstate)) {
242 DEBUG_PRINTF("repeat is currently dead, skipping scan\n");
243 goto scan_done;
244 }
245
246 u64a ep = q_cur_offset(q);
247 ep = MIN(ep, q->offset + end);
248 if (sp < ep) {
249 size_t eloc = 0;
250 char escape_found = 0;
251 DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep);
252 assert(sp >= q->offset && ep >= q->offset);
253 if (FWDSCAN_FN(nfa, q->buffer, sp - q->offset, ep - q->offset, &eloc)) {
254 escape_found = 1;
255 ep = q->offset + eloc;
256 DEBUG_PRINTF("escape found at %llu\n", ep);
257 assert(ep >= sp);
258 }
259
260 assert(sp <= ep);
261
262 if (mode == STOP_AT_MATCH) {
263 size_t mloc;
264 if (lbrFindMatch(l, sp, ep, lstate, q->streamState, &mloc)) {
265 DEBUG_PRINTF("storing match at %llu\n", sp + mloc);
266 q->cur--;
267 assert(q->cur < MAX_MQE_LEN);
268 q->items[q->cur].type = MQE_START;
269 q->items[q->cur].location = (s64a)(sp - q->offset) + mloc;
270 return MO_MATCHES_PENDING;
271 }
272 } else {
273 assert(mode == CALLBACK_OUTPUT);
274 char rv = lbrMatchLoop(l, sp, ep, lstate, q->streamState, q->cb,
275 q->context);
276 if (rv == MO_HALT_MATCHING) {
277 return MO_HALT_MATCHING;
278 }
279 assert(rv == MO_CONTINUE_MATCHING);
280 }
281
282 if (escape_found) {
283 DEBUG_PRINTF("clearing repeat due to escape\n");
284 clearRepeat(info, lstate);
285 }
286 }
287
288 scan_done:
289 if (q_cur_loc(q) > end) {
290 q->cur--;
291 assert(q->cur < MAX_MQE_LEN);
292 q->items[q->cur].type = MQE_START;
293 q->items[q->cur].location = end;
294 return MO_ALIVE;
295 }
296
297 if (repeatIsDead(info, lstate)) {
298 if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, end)) {
299 assert(repeatIsDead(info, lstate));
300 if (q->cur < q->end && q_cur_loc(q) > end) {
301 q->cur--;
302 assert(q->cur < MAX_MQE_LEN);
303 q->items[q->cur].type = MQE_START;
304 q->items[q->cur].location = end;
305 return MO_ALIVE;
306 }
307 return 0;
308 }
309 DEBUG_PRINTF("cur offset = %llu\n", q_cur_offset(q));
310 } else {
311 switch (q_cur_type(q)) {
312 case MQE_TOP:
313 case MQE_TOP_FIRST:
314 lbrTop(l, lstate, q->streamState, q_cur_offset(q));
315 break;
316 case MQE_START:
317 case MQE_END:
318 break;
319 default:
320 DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q));
321 assert(0);
322 break;
323 }
324 }
325
326 sp = q_cur_offset(q);
327 q->cur++;
328 }
329
330 return lbrIsAlive(l, lstate, q->streamState, sp);
331}
332
333char JOIN(ENGINE_EXEC_NAME, _Q)(const struct NFA *nfa, struct mq *q, s64a end) {
334 DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end);
335 return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, CALLBACK_OUTPUT);
336}
337
338char JOIN(ENGINE_EXEC_NAME, _Q2)(const struct NFA *nfa, struct mq *q, s64a end) {
339 DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end);
340 return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, STOP_AT_MATCH);
341}
342
343static really_inline
344void JOIN(ENGINE_EXEC_NAME, _StreamSilent)(const struct NFA *nfa, struct mq *q,
345 const u8 *buf, size_t length) {
346 const struct lbr_common *l = getImplNfa(nfa);
347 const struct RepeatInfo *info = getRepeatInfo(l);
348 struct lbr_state *lstate = (struct lbr_state *)q->state;
349 assert(ISALIGNED(lstate));
350
351 assert(!repeatIsDead(info, lstate));
352
353 // This call doesn't produce matches, so we elide the lbrMatchLoop call
354 // entirely and just do escape scans to maintain the repeat.
355
356 size_t eloc = 0;
357 char escaped = FWDSCAN_FN(nfa, buf, 0, length, &eloc);
358 if (escaped) {
359 assert(eloc < length);
360 DEBUG_PRINTF("escape found at %zu, clearing repeat\n", eloc);
361 clearRepeat(info, lstate);
362 }
363}
364
365// Rose infix path.
366char JOIN(ENGINE_EXEC_NAME, _QR)(const struct NFA *nfa, struct mq *q,
367 ReportID report) {
368 assert(nfa && q);
369 assert(isLbrType(nfa->type));
370
371 if (q->cur == q->end) {
372 return 1;
373 }
374
375 assert(q->cur + 1 < q->end); /* require at least two items */
376 assert(q_cur_type(q) == MQE_START);
377 u64a sp = q_cur_offset(q);
378 q->cur++;
379 DEBUG_PRINTF("sp=%llu\n", sp);
380
381 const struct lbr_common *l = getImplNfa(nfa);
382 const struct RepeatInfo *info = getRepeatInfo(l);
383 struct lbr_state *lstate = (struct lbr_state *)q->state;
384 assert(ISALIGNED(lstate));
385 const s64a lastLoc = q_last_loc(q);
386
387 while (q->cur < q->end) {
388 DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
389 q_cur_offset(q));
390
391 if (repeatIsDead(info, lstate)) {
392 DEBUG_PRINTF("repeat is dead\n");
393 goto scan_done;
394 }
395
396 u64a ep = q_cur_offset(q);
397
398 if (sp < q->offset) {
399 DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
400 assert(q->offset - sp <= q->hlength);
401 u64a local_ep = MIN(q->offset, ep);
402 const u8 *ptr = q->history + q->hlength + sp - q->offset;
403 JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, local_ep - sp);
404 sp = local_ep;
405 }
406
407 if (repeatIsDead(info, lstate)) {
408 DEBUG_PRINTF("repeat is dead\n");
409 goto scan_done;
410 }
411
412 if (sp < ep) {
413 DEBUG_PRINTF("MAIN BUFFER SCAN\n");
414 assert(ep - q->offset <= q->length);
415 const u8 *ptr = q->buffer + sp - q->offset;
416 JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, ep - sp);
417 }
418
419 if (repeatIsDead(info, lstate)) {
420scan_done:
421 if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, lastLoc)) {
422 assert(repeatIsDead(info, lstate));
423 assert(q->cur == q->end);
424 return 0;
425 }
426 } else {
427 switch (q_cur_type(q)) {
428 case MQE_TOP:
429 case MQE_TOP_FIRST:
430 lbrTop(l, lstate, q->streamState, q_cur_offset(q));
431 break;
432 case MQE_START:
433 case MQE_END:
434 break;
435 default:
436 DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q));
437 assert(0);
438 break;
439 }
440 }
441
442 sp = q_cur_offset(q);
443 q->cur++;
444 }
445
446 if (repeatIsDead(info, lstate)) {
447 DEBUG_PRINTF("repeat is dead\n");
448 return 0;
449 }
450
451 if (lbrInAccept(l, lstate, q->streamState, sp, report)) {
452 return MO_MATCHES_PENDING;
453 }
454
455 return lbrIsActive(l, lstate, q->streamState, sp);
456}
457
458#undef ENGINE_EXEC_NAME
459#undef EXEC_FN
460#undef FWDSCAN_FN
461#undef REVSCAN_FN
462#undef ENGINE_ROOT_NAME
463