1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Puff construction from NGHolder.
31 */
32#include "ng_puff.h"
33
34#include "grey.h"
35#include "ng_depth.h"
36#include "ng_holder.h"
37#include "ng_prune.h"
38#include "ng_repeat.h"
39#include "ng_reports.h"
40#include "ng_util.h"
41#include "ue2common.h"
42#include "nfa/nfa_api_queue.h"
43#include "nfa/mpvcompile.h"
44#include "rose/rose_build.h"
45#include "util/compile_context.h"
46#include "util/graph_range.h"
47#include "util/report_manager.h"
48
49#include <vector>
50
51using namespace std;
52
53namespace ue2 {
54
55static const unsigned MIN_PUFF_LENGTH = 16;
56static const unsigned HEAD_BACKOFF = 16;
57
58static
59size_t countChain(const NGHolder &g, NFAVertex v) {
60 size_t count = 0;
61 while (v) {
62 DEBUG_PRINTF("counting vertex %zu\n", g[v].index);
63 if (is_special(v, g)) {
64 break;
65 }
66
67 count++;
68 v = getSoleDestVertex(g, v);
69 }
70 DEBUG_PRINTF("done %zu\n", count);
71 return count;
72}
73
74static
75void wireNewAccepts(NGHolder &g, NFAVertex head,
76 const flat_set<ReportID> &chain_reports) {
77 for (auto u : inv_adjacent_vertices_range(head, g)) {
78 if (is_special(u, g)) {
79 continue;
80 }
81
82 DEBUG_PRINTF("adding edge: %zu -> accept\n", g[u].index);
83 assert(!edge(u, g.accept, g).second);
84 assert(!edge(u, g.acceptEod, g).second);
85 add_edge(u, g.accept, g);
86
87 // Replace reports with our chain reports.
88 auto &u_reports = g[u].reports;
89 u_reports.clear();
90 u_reports.insert(chain_reports.begin(), chain_reports.end());
91 }
92}
93
94static
95bool isFixedDepth(const NGHolder &g, NFAVertex v) {
96 // If the vertex is reachable from startDs, it can't be fixed depth.
97 auto depthFromStartDs = calcDepthsFrom(g, g.startDs);
98
99 u32 idx = g[v].index;
100 const DepthMinMax &ds = depthFromStartDs.at(idx);
101 if (ds.min.is_reachable()) {
102 DEBUG_PRINTF("vertex reachable from startDs\n");
103 return false;
104 }
105
106 auto depthFromStart = calcDepthsFrom(g, g.start);
107
108 /* we can still consider the head of a puff chain as at fixed depth if
109 * it has a self-loop: so we look at all the preds of v (other than v
110 * itself) */
111
112 assert(v && !is_special(v, g));
113
114 u32 count = 0;
115 for (auto u : inv_adjacent_vertices_range(v, g)) {
116 if (u == v) {
117 continue; // self-loop
118 }
119 count++;
120
121 idx = g[u].index;
122 const DepthMinMax &d = depthFromStart.at(idx);
123 if (d.min != d.max) {
124 return false;
125 }
126 }
127
128 return count != 0; // at least one fixed-depth pred
129}
130
131static
132bool singleStart(const NGHolder &g) {
133 set<NFAVertex> seen;
134
135 for (auto v : adjacent_vertices_range(g.start, g)) {
136 if (!is_special(v, g)) {
137 DEBUG_PRINTF("saw %zu\n", g[v].index);
138 seen.insert(v);
139 }
140 }
141 for (auto v : adjacent_vertices_range(g.startDs, g)) {
142 if (!is_special(v, g)) {
143 DEBUG_PRINTF("saw %zu\n", g[v].index);
144 seen.insert(v);
145 }
146 }
147
148 DEBUG_PRINTF("comp has %zu starts\n", seen.size());
149
150 return seen.size() == 1;
151}
152
153static
154bool triggerResetsPuff(const NGHolder &g, NFAVertex head) {
155 const CharReach puff_escapes = ~g[head].char_reach;
156
157 for (auto u : inv_adjacent_vertices_range(head, g)) {
158 if (!g[u].char_reach.isSubsetOf(puff_escapes)) {
159 DEBUG_PRINTF("no reset on trigger %zu %zu\n", g[u].index,
160 g[head].index);
161 return false;
162 }
163 }
164
165 DEBUG_PRINTF("reset on trigger\n");
166 return true;
167}
168
169/** ".*[X]{N}" can be treated as ".*[X]{N,}" (misc_opt does reverse transform)
170 * */
171static
172bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) {
173 DEBUG_PRINTF("head = %zu\n", g[head].index);
174
175 const CharReach &puff_cr = g[head].char_reach;
176
177 /* we can use the pred of the head as the base of our check if it the cr
178 * matches as if
179 * head cr subsetof pred cr: if head is being pushed on then puff must
180 * still being pushed on
181 * pred cr subsetof head cr: if the puff matches then head must be also
182 * always be on if the is connected to a wide enough cyclic
183 */
184 if (proper_in_degree(head, g) == 1
185 && puff_cr == g[getSoleSourceVertex(g, head)].char_reach) {
186 head = getSoleSourceVertex(g, head);
187 DEBUG_PRINTF("temp new head = %zu\n", g[head].index);
188 }
189
190 for (auto s : inv_adjacent_vertices_range(head, g)) {
191 DEBUG_PRINTF("s = %zu\n", g[s].index);
192 if (!puff_cr.isSubsetOf(g[s].char_reach)) {
193 DEBUG_PRINTF("no flood on trigger %zu %zu\n", g[s].index,
194 g[head].index);
195 return false;
196 }
197
198 if (!hasSelfLoop(s, g) && s != g.start) {
199 DEBUG_PRINTF("no self loop\n");
200 return false;
201 }
202
203 if (s == g.start && !edge(g.startDs, head, g).second) {
204 DEBUG_PRINTF("not float\n");
205 return false;
206 }
207 }
208
209 DEBUG_PRINTF("reset on trigger\n");
210 return true;
211}
212
213static
214u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g,
215 NFAVertex pv, bool prefilter) {
216 CharReach accept_cr;
217 DEBUG_PRINTF("hello |cr|=%zu %d\n", cr.count(), (int)cr.find_first());
218
219 if (prefilter) {
220 /* a later prefilter stage make weaken the lead up so we can't be sure
221 * that all the triggers will be squashing the puffette. */
222 return 0;
223 }
224
225 /* TODO: inspect further back in the pattern */
226 for (auto u : inv_adjacent_vertices_range(pv, g)) {
227 accept_cr |= g[u].char_reach;
228 }
229
230 DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
231
232 if ((accept_cr & cr).any()) {
233 return 0; /* the accept byte doesn't always kill the puffette. TODO:
234 * maybe if we look further back we could find something that
235 * would kill the puffette... */
236 }
237 DEBUG_PRINTF("returning squash distance of %u\n", min_width);
238 return min_width;
239}
240
241/** Gives a stronger puff trigger when the trigger is connected to a wide
242 * cyclic state (aside from sds) */
243static
244void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) {
245 DEBUG_PRINTF("attempting to improve puff trigger\n");
246 assert(!nodes->empty());
247 const CharReach &puff_cr = g[nodes->back()].char_reach;
248 if (puff_cr.all()) {
249 return; /* we can't really do much with this one */
250 }
251
252 /* add the runway */
253 DEBUG_PRINTF("backing off - allowing a decent header\n");
254 assert(nodes->size() > HEAD_BACKOFF);
255 for (u32 i = 0; i < HEAD_BACKOFF - 1; i++) {
256 nodes->pop_back();
257 }
258 *a = nodes->back();
259 nodes->pop_back();
260}
261
262static
263void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv,
264 const CharReach &cr, const ReportID report, u32 width,
265 bool fixed_depth, bool unbounded, bool auto_restart,
266 RoseBuild &rose, ReportManager &rm,
267 flat_set<ReportID> &chain_reports, bool prefilter) {
268 DEBUG_PRINTF("constructing Puff for report %u\n", report);
269 DEBUG_PRINTF("a = %zu\n", g[a].index);
270
271 const Report &puff_report = rm.getReport(report);
272 const bool simple_exhaust = isSimpleExhaustible(puff_report);
273
274 const bool pureAnchored = a == g.start && singleStart(g);
275 if (!pureAnchored) {
276 if (a == g.startDs || a == g.start) {
277 DEBUG_PRINTF("add outfix ar(false)\n");
278
279 raw_puff rp(width, unbounded, report, cr, auto_restart,
280 simple_exhaust);
281 rose.addOutfix(rp);
282 return;
283 }
284
285 DEBUG_PRINTF("add chain tail\n");
286 u32 qi = ~0U;
287 u32 event = MQE_TOP;
288 raw_puff rp(width, unbounded, report, cr);
289 rose.addChainTail(rp, &qi, &event);
290 assert(qi != ~0U);
291 u32 squashDistance = allowedSquashDistance(cr, width, g, puffv,
292 prefilter);
293
294 Report ir = makeMpvTrigger(event, squashDistance);
295 /* only need to trigger once if floatingUnboundedDot */
296 bool floatingUnboundedDot = unbounded && cr.all() && !fixed_depth;
297 if (floatingUnboundedDot) {
298 ir.ekey = rm.getUnassociatedExhaustibleKey();
299 }
300 ReportID id = rm.getInternalId(ir);
301 chain_reports.insert(id);
302 } else {
303 DEBUG_PRINTF("add outfix ar(%d)\n", (int)auto_restart);
304 assert(!auto_restart || unbounded);
305 raw_puff rp(width, unbounded, report, cr, auto_restart, simple_exhaust);
306 rose.addOutfix(rp);
307 }
308}
309
310static
311bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a,
312 set<NFAVertex> &dead, const CompileContext &cc,
313 bool prefilter) {
314 DEBUG_PRINTF("hello\n");
315 vector<NFAVertex> nodes;
316 const CharReach &cr = g[a].char_reach;
317 bool isDot = cr.all();
318 bool unbounded = false;
319 bool exhaustible = can_exhaust(g, rm);
320
321 while (true) {
322 if (is_special(a, g)) {
323 DEBUG_PRINTF("stopped puffing due to special vertex\n");
324 break;
325 }
326
327 if (g[a].char_reach != cr) {
328 DEBUG_PRINTF("stopped puffing due to change in character "
329 "reachability\n");
330 break;
331 }
332
333 if (proper_in_degree(a, g) != 1) {
334 DEBUG_PRINTF("stopped puffing due to in degree != 1\n");
335 break;
336 }
337
338 size_t outDegree = out_degree(a, g);
339 if (outDegree != 1 && (!hasSelfLoop(a, g) || outDegree != 2)) {
340 DEBUG_PRINTF("stopping puffing due to out degree\n");
341 break;
342 }
343
344 if (hasSelfLoop(a, g)) {
345 DEBUG_PRINTF("has self-loop, marking unbounded\n");
346 unbounded = true;
347 }
348
349 nodes.push_back(a);
350 DEBUG_PRINTF("vertex %zu has in_degree %zu\n", g[a].index,
351 in_degree(a, g));
352
353 a = getSoleSourceVertex(g, a);
354
355 assert(a); /* already checked that old a had a proper in degree of 1 */
356
357 // Snark: we can't handle this case, because we can only handle a
358 // single report ID on a vertex
359 if (is_match_vertex(a, g)) {
360 DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n");
361 if (!nodes.empty()) {
362 nodes.pop_back();
363 }
364 break;
365 }
366 }
367
368 if (!nodes.empty() && proper_in_degree(nodes.back(), g) != 1) {
369 for (auto u : inv_adjacent_vertices_range(nodes.back(), g)) {
370 if (is_special(u, g)) {
371 DEBUG_PRINTF("pop\n");
372 a = nodes.back();
373 nodes.pop_back();
374 break;
375 }
376 }
377 }
378
379 if (a != g.startDs && edge(g.startDs, a, g).second
380 && proper_out_degree(a, g) == 1
381 && g[a].char_reach == cr) {
382 nodes.push_back(a);
383 a = g.startDs;
384 }
385
386 bool auto_restart = false;
387
388 DEBUG_PRINTF("a = %zu\n", g[a].index);
389
390 if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) {
391 DEBUG_PRINTF("bad %zu %zu\n", nodes.size(), g[a].index);
392 if (nodes.size() < MIN_PUFF_LENGTH) {
393 return false;
394 } else {
395 DEBUG_PRINTF("mark unbounded\n");
396 unbounded = true;
397 a = g.start;
398 auto_restart = !isDot;
399 }
400 }
401
402 bool supported = false;
403 bool fixed_depth = isFixedDepth(g, nodes.back());
404
405 if (exhaustible) {
406 supported = true;
407 } else if (fixed_depth) {
408 supported = true;
409 } else if (unbounded) {
410 /* any C{n, } can be supported as all ranges will be squashed together
411 * only need to track the first */
412 supported = true;
413 } else if (triggerResetsPuff(g, nodes.back())) {
414 supported = true;
415 } else if (triggerFloodsPuff(g, nodes.back())) {
416 DEBUG_PRINTF("trigger floods puff\n");
417 supported = true;
418 unbounded = true;
419 }
420
421 if (!supported) {
422 DEBUG_PRINTF("not supported\n");
423 return false;
424 }
425
426 if (cc.grey.puffImproveHead && a != g.start) {
427 if (edge(g.startDs, a, g).second) {
428 goto skip_improve; /* direct sds cases are better handled by auto
429 * restarting puffettes */
430 }
431
432 if (fixed_depth) {
433 goto skip_improve; /* no danger of trigger floods */
434 }
435
436 /* if we come after something literalish don't bother */
437 if (g[a].char_reach.count() <= 2
438 && in_degree(a, g) == 1
439 && g[getSoleSourceVertex(g, a)].char_reach.count() <= 2) {
440 goto skip_improve;
441 }
442
443 if (nodes.size() < MIN_PUFF_LENGTH + HEAD_BACKOFF) {
444 return false; /* not enough of the puff left to worth bothering
445 about */
446 }
447
448 improveHead(g, &a, &nodes);
449 skip_improve:;
450 }
451
452 assert(!nodes.empty());
453 const auto &reports = g[nodes[0]].reports;
454 assert(!reports.empty());
455
456 for (auto report : reports) {
457 const Report &ir = rm.getReport(report);
458 const bool highlander = ir.ekey != INVALID_EKEY;
459 if (!unbounded && highlander && !isSimpleExhaustible(ir)) {
460 DEBUG_PRINTF("report %u is bounded highlander but not simple "
461 "exhaustible\n",
462 report);
463 return false;
464 }
465
466 if (ir.type == INTERNAL_ROSE_CHAIN) {
467 DEBUG_PRINTF("puffettes cannot be chained together\n");
468 return false;
469 }
470 }
471
472 NFAVertex puffv = nodes.back();
473 assert(puffv != NGHolder::null_vertex());
474 u32 width = countChain(g, nodes.back());
475
476 flat_set<ReportID> chain_reports;
477
478 for (auto report : reports) {
479 constructPuff(g, a, puffv, cr, report, width, fixed_depth, unbounded,
480 auto_restart, rose, rm, chain_reports, prefilter);
481 }
482
483 if (!chain_reports.empty()) {
484 wireNewAccepts(g, puffv, chain_reports);
485 }
486
487 dead.insert(nodes.begin(), nodes.end());
488 return true;
489}
490
491bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g,
492 bool prefilter, const CompileContext &cc) {
493 if (!cc.grey.allowPuff) {
494 return false;
495 }
496
497 size_t count = 0;
498 set<NFAVertex> dead;
499
500 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
501 if (doComponent(rose, rm, g, v, dead, cc, prefilter)) {
502 count++;
503 }
504 }
505
506 if (!dead.empty()) {
507 remove_vertices(dead, g);
508 pruneUseless(g);
509 }
510
511 DEBUG_PRINTF("puffs: %zu\n", count);
512 return num_vertices(g) <= N_SPECIALS;
513}
514
515bool isPuffable(const NGHolder &g, bool fixed_depth,
516 const ReportManager &rm, const Grey &grey) {
517 if (!grey.allowPuff) {
518 return false;
519 }
520
521 if (!onlyOneTop(g)) {
522 DEBUG_PRINTF("more than one top\n");
523 return false;
524 }
525
526 const set<ReportID> reports = all_reports(g);
527 if (reports.size() != 1) {
528 DEBUG_PRINTF("too many reports\n");
529 return false;
530 }
531
532 const Report &ir = rm.getReport(*reports.begin());
533
534 if (ir.type == INTERNAL_ROSE_CHAIN) {
535 DEBUG_PRINTF("puffettes cannot be chained together\n");
536 return false;
537 }
538
539 PureRepeat repeat;
540 if (!isPureRepeat(g, repeat)) {
541 DEBUG_PRINTF("not pure bounded repeat\n");
542 return false;
543 }
544
545 if (repeat.bounds.min == depth(0)) {
546 DEBUG_PRINTF("repeat min bound is zero\n");
547 return false;
548 }
549
550 // We can puff if:
551 // (a) repeat is {N,}; or
552 // (b) repeat is {N} and fixed-depth, or highlander (and will accept the
553 // first match)
554
555 DEBUG_PRINTF("repeat is %s\n", repeat.bounds.str().c_str());
556
557 if (repeat.bounds.max.is_infinite()) {
558 return true;
559 }
560
561 if (repeat.bounds.min == repeat.bounds.max) {
562 if (fixed_depth) {
563 DEBUG_PRINTF("fixed depth\n");
564 return true;
565 }
566
567 const bool highlander = ir.ekey != INVALID_EKEY;
568
569 // If we're highlander, we must be simple-exhaustible as well.
570 if (highlander && isSimpleExhaustible(ir)) {
571 return true;
572 }
573 }
574
575 return false;
576}
577
578} // namespace ue2
579