1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Puff construction from NGHolder. |
31 | */ |
32 | #include "ng_puff.h" |
33 | |
34 | #include "grey.h" |
35 | #include "ng_depth.h" |
36 | #include "ng_holder.h" |
37 | #include "ng_prune.h" |
38 | #include "ng_repeat.h" |
39 | #include "ng_reports.h" |
40 | #include "ng_util.h" |
41 | #include "ue2common.h" |
42 | #include "nfa/nfa_api_queue.h" |
43 | #include "nfa/mpvcompile.h" |
44 | #include "rose/rose_build.h" |
45 | #include "util/compile_context.h" |
46 | #include "util/graph_range.h" |
47 | #include "util/report_manager.h" |
48 | |
49 | #include <vector> |
50 | |
51 | using namespace std; |
52 | |
53 | namespace ue2 { |
54 | |
55 | static const unsigned MIN_PUFF_LENGTH = 16; |
56 | static const unsigned HEAD_BACKOFF = 16; |
57 | |
58 | static |
59 | size_t countChain(const NGHolder &g, NFAVertex v) { |
60 | size_t count = 0; |
61 | while (v) { |
62 | DEBUG_PRINTF("counting vertex %zu\n" , g[v].index); |
63 | if (is_special(v, g)) { |
64 | break; |
65 | } |
66 | |
67 | count++; |
68 | v = getSoleDestVertex(g, v); |
69 | } |
70 | DEBUG_PRINTF("done %zu\n" , count); |
71 | return count; |
72 | } |
73 | |
74 | static |
75 | void wireNewAccepts(NGHolder &g, NFAVertex head, |
76 | const flat_set<ReportID> &chain_reports) { |
77 | for (auto u : inv_adjacent_vertices_range(head, g)) { |
78 | if (is_special(u, g)) { |
79 | continue; |
80 | } |
81 | |
82 | DEBUG_PRINTF("adding edge: %zu -> accept\n" , g[u].index); |
83 | assert(!edge(u, g.accept, g).second); |
84 | assert(!edge(u, g.acceptEod, g).second); |
85 | add_edge(u, g.accept, g); |
86 | |
87 | // Replace reports with our chain reports. |
88 | auto &u_reports = g[u].reports; |
89 | u_reports.clear(); |
90 | u_reports.insert(chain_reports.begin(), chain_reports.end()); |
91 | } |
92 | } |
93 | |
94 | static |
95 | bool isFixedDepth(const NGHolder &g, NFAVertex v) { |
96 | // If the vertex is reachable from startDs, it can't be fixed depth. |
97 | auto depthFromStartDs = calcDepthsFrom(g, g.startDs); |
98 | |
99 | u32 idx = g[v].index; |
100 | const DepthMinMax &ds = depthFromStartDs.at(idx); |
101 | if (ds.min.is_reachable()) { |
102 | DEBUG_PRINTF("vertex reachable from startDs\n" ); |
103 | return false; |
104 | } |
105 | |
106 | auto depthFromStart = calcDepthsFrom(g, g.start); |
107 | |
108 | /* we can still consider the head of a puff chain as at fixed depth if |
109 | * it has a self-loop: so we look at all the preds of v (other than v |
110 | * itself) */ |
111 | |
112 | assert(v && !is_special(v, g)); |
113 | |
114 | u32 count = 0; |
115 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
116 | if (u == v) { |
117 | continue; // self-loop |
118 | } |
119 | count++; |
120 | |
121 | idx = g[u].index; |
122 | const DepthMinMax &d = depthFromStart.at(idx); |
123 | if (d.min != d.max) { |
124 | return false; |
125 | } |
126 | } |
127 | |
128 | return count != 0; // at least one fixed-depth pred |
129 | } |
130 | |
131 | static |
132 | bool singleStart(const NGHolder &g) { |
133 | set<NFAVertex> seen; |
134 | |
135 | for (auto v : adjacent_vertices_range(g.start, g)) { |
136 | if (!is_special(v, g)) { |
137 | DEBUG_PRINTF("saw %zu\n" , g[v].index); |
138 | seen.insert(v); |
139 | } |
140 | } |
141 | for (auto v : adjacent_vertices_range(g.startDs, g)) { |
142 | if (!is_special(v, g)) { |
143 | DEBUG_PRINTF("saw %zu\n" , g[v].index); |
144 | seen.insert(v); |
145 | } |
146 | } |
147 | |
148 | DEBUG_PRINTF("comp has %zu starts\n" , seen.size()); |
149 | |
150 | return seen.size() == 1; |
151 | } |
152 | |
153 | static |
154 | bool triggerResetsPuff(const NGHolder &g, NFAVertex head) { |
155 | const CharReach puff_escapes = ~g[head].char_reach; |
156 | |
157 | for (auto u : inv_adjacent_vertices_range(head, g)) { |
158 | if (!g[u].char_reach.isSubsetOf(puff_escapes)) { |
159 | DEBUG_PRINTF("no reset on trigger %zu %zu\n" , g[u].index, |
160 | g[head].index); |
161 | return false; |
162 | } |
163 | } |
164 | |
165 | DEBUG_PRINTF("reset on trigger\n" ); |
166 | return true; |
167 | } |
168 | |
169 | /** ".*[X]{N}" can be treated as ".*[X]{N,}" (misc_opt does reverse transform) |
170 | * */ |
171 | static |
172 | bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) { |
173 | DEBUG_PRINTF("head = %zu\n" , g[head].index); |
174 | |
175 | const CharReach &puff_cr = g[head].char_reach; |
176 | |
177 | /* we can use the pred of the head as the base of our check if it the cr |
178 | * matches as if |
179 | * head cr subsetof pred cr: if head is being pushed on then puff must |
180 | * still being pushed on |
181 | * pred cr subsetof head cr: if the puff matches then head must be also |
182 | * always be on if the is connected to a wide enough cyclic |
183 | */ |
184 | if (proper_in_degree(head, g) == 1 |
185 | && puff_cr == g[getSoleSourceVertex(g, head)].char_reach) { |
186 | head = getSoleSourceVertex(g, head); |
187 | DEBUG_PRINTF("temp new head = %zu\n" , g[head].index); |
188 | } |
189 | |
190 | for (auto s : inv_adjacent_vertices_range(head, g)) { |
191 | DEBUG_PRINTF("s = %zu\n" , g[s].index); |
192 | if (!puff_cr.isSubsetOf(g[s].char_reach)) { |
193 | DEBUG_PRINTF("no flood on trigger %zu %zu\n" , g[s].index, |
194 | g[head].index); |
195 | return false; |
196 | } |
197 | |
198 | if (!hasSelfLoop(s, g) && s != g.start) { |
199 | DEBUG_PRINTF("no self loop\n" ); |
200 | return false; |
201 | } |
202 | |
203 | if (s == g.start && !edge(g.startDs, head, g).second) { |
204 | DEBUG_PRINTF("not float\n" ); |
205 | return false; |
206 | } |
207 | } |
208 | |
209 | DEBUG_PRINTF("reset on trigger\n" ); |
210 | return true; |
211 | } |
212 | |
213 | static |
214 | u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g, |
215 | NFAVertex pv, bool prefilter) { |
216 | CharReach accept_cr; |
217 | DEBUG_PRINTF("hello |cr|=%zu %d\n" , cr.count(), (int)cr.find_first()); |
218 | |
219 | if (prefilter) { |
220 | /* a later prefilter stage make weaken the lead up so we can't be sure |
221 | * that all the triggers will be squashing the puffette. */ |
222 | return 0; |
223 | } |
224 | |
225 | /* TODO: inspect further back in the pattern */ |
226 | for (auto u : inv_adjacent_vertices_range(pv, g)) { |
227 | accept_cr |= g[u].char_reach; |
228 | } |
229 | |
230 | DEBUG_PRINTF("|accept_cr|=%zu\n" , accept_cr.count()); |
231 | |
232 | if ((accept_cr & cr).any()) { |
233 | return 0; /* the accept byte doesn't always kill the puffette. TODO: |
234 | * maybe if we look further back we could find something that |
235 | * would kill the puffette... */ |
236 | } |
237 | DEBUG_PRINTF("returning squash distance of %u\n" , min_width); |
238 | return min_width; |
239 | } |
240 | |
241 | /** Gives a stronger puff trigger when the trigger is connected to a wide |
242 | * cyclic state (aside from sds) */ |
243 | static |
244 | void (NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) { |
245 | DEBUG_PRINTF("attempting to improve puff trigger\n" ); |
246 | assert(!nodes->empty()); |
247 | const CharReach &puff_cr = g[nodes->back()].char_reach; |
248 | if (puff_cr.all()) { |
249 | return; /* we can't really do much with this one */ |
250 | } |
251 | |
252 | /* add the runway */ |
253 | DEBUG_PRINTF("backing off - allowing a decent header\n" ); |
254 | assert(nodes->size() > HEAD_BACKOFF); |
255 | for (u32 i = 0; i < HEAD_BACKOFF - 1; i++) { |
256 | nodes->pop_back(); |
257 | } |
258 | *a = nodes->back(); |
259 | nodes->pop_back(); |
260 | } |
261 | |
262 | static |
263 | void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv, |
264 | const CharReach &cr, const ReportID report, u32 width, |
265 | bool fixed_depth, bool unbounded, bool auto_restart, |
266 | RoseBuild &rose, ReportManager &rm, |
267 | flat_set<ReportID> &chain_reports, bool prefilter) { |
268 | DEBUG_PRINTF("constructing Puff for report %u\n" , report); |
269 | DEBUG_PRINTF("a = %zu\n" , g[a].index); |
270 | |
271 | const Report &puff_report = rm.getReport(report); |
272 | const bool simple_exhaust = isSimpleExhaustible(puff_report); |
273 | |
274 | const bool pureAnchored = a == g.start && singleStart(g); |
275 | if (!pureAnchored) { |
276 | if (a == g.startDs || a == g.start) { |
277 | DEBUG_PRINTF("add outfix ar(false)\n" ); |
278 | |
279 | raw_puff rp(width, unbounded, report, cr, auto_restart, |
280 | simple_exhaust); |
281 | rose.addOutfix(rp); |
282 | return; |
283 | } |
284 | |
285 | DEBUG_PRINTF("add chain tail\n" ); |
286 | u32 qi = ~0U; |
287 | u32 event = MQE_TOP; |
288 | raw_puff rp(width, unbounded, report, cr); |
289 | rose.addChainTail(rp, &qi, &event); |
290 | assert(qi != ~0U); |
291 | u32 squashDistance = allowedSquashDistance(cr, width, g, puffv, |
292 | prefilter); |
293 | |
294 | Report ir = makeMpvTrigger(event, squashDistance); |
295 | /* only need to trigger once if floatingUnboundedDot */ |
296 | bool floatingUnboundedDot = unbounded && cr.all() && !fixed_depth; |
297 | if (floatingUnboundedDot) { |
298 | ir.ekey = rm.getUnassociatedExhaustibleKey(); |
299 | } |
300 | ReportID id = rm.getInternalId(ir); |
301 | chain_reports.insert(id); |
302 | } else { |
303 | DEBUG_PRINTF("add outfix ar(%d)\n" , (int)auto_restart); |
304 | assert(!auto_restart || unbounded); |
305 | raw_puff rp(width, unbounded, report, cr, auto_restart, simple_exhaust); |
306 | rose.addOutfix(rp); |
307 | } |
308 | } |
309 | |
310 | static |
311 | bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a, |
312 | set<NFAVertex> &dead, const CompileContext &cc, |
313 | bool prefilter) { |
314 | DEBUG_PRINTF("hello\n" ); |
315 | vector<NFAVertex> nodes; |
316 | const CharReach &cr = g[a].char_reach; |
317 | bool isDot = cr.all(); |
318 | bool unbounded = false; |
319 | bool exhaustible = can_exhaust(g, rm); |
320 | |
321 | while (true) { |
322 | if (is_special(a, g)) { |
323 | DEBUG_PRINTF("stopped puffing due to special vertex\n" ); |
324 | break; |
325 | } |
326 | |
327 | if (g[a].char_reach != cr) { |
328 | DEBUG_PRINTF("stopped puffing due to change in character " |
329 | "reachability\n" ); |
330 | break; |
331 | } |
332 | |
333 | if (proper_in_degree(a, g) != 1) { |
334 | DEBUG_PRINTF("stopped puffing due to in degree != 1\n" ); |
335 | break; |
336 | } |
337 | |
338 | size_t outDegree = out_degree(a, g); |
339 | if (outDegree != 1 && (!hasSelfLoop(a, g) || outDegree != 2)) { |
340 | DEBUG_PRINTF("stopping puffing due to out degree\n" ); |
341 | break; |
342 | } |
343 | |
344 | if (hasSelfLoop(a, g)) { |
345 | DEBUG_PRINTF("has self-loop, marking unbounded\n" ); |
346 | unbounded = true; |
347 | } |
348 | |
349 | nodes.push_back(a); |
350 | DEBUG_PRINTF("vertex %zu has in_degree %zu\n" , g[a].index, |
351 | in_degree(a, g)); |
352 | |
353 | a = getSoleSourceVertex(g, a); |
354 | |
355 | assert(a); /* already checked that old a had a proper in degree of 1 */ |
356 | |
357 | // Snark: we can't handle this case, because we can only handle a |
358 | // single report ID on a vertex |
359 | if (is_match_vertex(a, g)) { |
360 | DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n" ); |
361 | if (!nodes.empty()) { |
362 | nodes.pop_back(); |
363 | } |
364 | break; |
365 | } |
366 | } |
367 | |
368 | if (!nodes.empty() && proper_in_degree(nodes.back(), g) != 1) { |
369 | for (auto u : inv_adjacent_vertices_range(nodes.back(), g)) { |
370 | if (is_special(u, g)) { |
371 | DEBUG_PRINTF("pop\n" ); |
372 | a = nodes.back(); |
373 | nodes.pop_back(); |
374 | break; |
375 | } |
376 | } |
377 | } |
378 | |
379 | if (a != g.startDs && edge(g.startDs, a, g).second |
380 | && proper_out_degree(a, g) == 1 |
381 | && g[a].char_reach == cr) { |
382 | nodes.push_back(a); |
383 | a = g.startDs; |
384 | } |
385 | |
386 | bool auto_restart = false; |
387 | |
388 | DEBUG_PRINTF("a = %zu\n" , g[a].index); |
389 | |
390 | if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) { |
391 | DEBUG_PRINTF("bad %zu %zu\n" , nodes.size(), g[a].index); |
392 | if (nodes.size() < MIN_PUFF_LENGTH) { |
393 | return false; |
394 | } else { |
395 | DEBUG_PRINTF("mark unbounded\n" ); |
396 | unbounded = true; |
397 | a = g.start; |
398 | auto_restart = !isDot; |
399 | } |
400 | } |
401 | |
402 | bool supported = false; |
403 | bool fixed_depth = isFixedDepth(g, nodes.back()); |
404 | |
405 | if (exhaustible) { |
406 | supported = true; |
407 | } else if (fixed_depth) { |
408 | supported = true; |
409 | } else if (unbounded) { |
410 | /* any C{n, } can be supported as all ranges will be squashed together |
411 | * only need to track the first */ |
412 | supported = true; |
413 | } else if (triggerResetsPuff(g, nodes.back())) { |
414 | supported = true; |
415 | } else if (triggerFloodsPuff(g, nodes.back())) { |
416 | DEBUG_PRINTF("trigger floods puff\n" ); |
417 | supported = true; |
418 | unbounded = true; |
419 | } |
420 | |
421 | if (!supported) { |
422 | DEBUG_PRINTF("not supported\n" ); |
423 | return false; |
424 | } |
425 | |
426 | if (cc.grey.puffImproveHead && a != g.start) { |
427 | if (edge(g.startDs, a, g).second) { |
428 | goto skip_improve; /* direct sds cases are better handled by auto |
429 | * restarting puffettes */ |
430 | } |
431 | |
432 | if (fixed_depth) { |
433 | goto skip_improve; /* no danger of trigger floods */ |
434 | } |
435 | |
436 | /* if we come after something literalish don't bother */ |
437 | if (g[a].char_reach.count() <= 2 |
438 | && in_degree(a, g) == 1 |
439 | && g[getSoleSourceVertex(g, a)].char_reach.count() <= 2) { |
440 | goto skip_improve; |
441 | } |
442 | |
443 | if (nodes.size() < MIN_PUFF_LENGTH + HEAD_BACKOFF) { |
444 | return false; /* not enough of the puff left to worth bothering |
445 | about */ |
446 | } |
447 | |
448 | improveHead(g, &a, &nodes); |
449 | skip_improve:; |
450 | } |
451 | |
452 | assert(!nodes.empty()); |
453 | const auto &reports = g[nodes[0]].reports; |
454 | assert(!reports.empty()); |
455 | |
456 | for (auto report : reports) { |
457 | const Report &ir = rm.getReport(report); |
458 | const bool highlander = ir.ekey != INVALID_EKEY; |
459 | if (!unbounded && highlander && !isSimpleExhaustible(ir)) { |
460 | DEBUG_PRINTF("report %u is bounded highlander but not simple " |
461 | "exhaustible\n" , |
462 | report); |
463 | return false; |
464 | } |
465 | |
466 | if (ir.type == INTERNAL_ROSE_CHAIN) { |
467 | DEBUG_PRINTF("puffettes cannot be chained together\n" ); |
468 | return false; |
469 | } |
470 | } |
471 | |
472 | NFAVertex puffv = nodes.back(); |
473 | assert(puffv != NGHolder::null_vertex()); |
474 | u32 width = countChain(g, nodes.back()); |
475 | |
476 | flat_set<ReportID> chain_reports; |
477 | |
478 | for (auto report : reports) { |
479 | constructPuff(g, a, puffv, cr, report, width, fixed_depth, unbounded, |
480 | auto_restart, rose, rm, chain_reports, prefilter); |
481 | } |
482 | |
483 | if (!chain_reports.empty()) { |
484 | wireNewAccepts(g, puffv, chain_reports); |
485 | } |
486 | |
487 | dead.insert(nodes.begin(), nodes.end()); |
488 | return true; |
489 | } |
490 | |
491 | bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g, |
492 | bool prefilter, const CompileContext &cc) { |
493 | if (!cc.grey.allowPuff) { |
494 | return false; |
495 | } |
496 | |
497 | size_t count = 0; |
498 | set<NFAVertex> dead; |
499 | |
500 | for (auto v : inv_adjacent_vertices_range(g.accept, g)) { |
501 | if (doComponent(rose, rm, g, v, dead, cc, prefilter)) { |
502 | count++; |
503 | } |
504 | } |
505 | |
506 | if (!dead.empty()) { |
507 | remove_vertices(dead, g); |
508 | pruneUseless(g); |
509 | } |
510 | |
511 | DEBUG_PRINTF("puffs: %zu\n" , count); |
512 | return num_vertices(g) <= N_SPECIALS; |
513 | } |
514 | |
515 | bool isPuffable(const NGHolder &g, bool fixed_depth, |
516 | const ReportManager &rm, const Grey &grey) { |
517 | if (!grey.allowPuff) { |
518 | return false; |
519 | } |
520 | |
521 | if (!onlyOneTop(g)) { |
522 | DEBUG_PRINTF("more than one top\n" ); |
523 | return false; |
524 | } |
525 | |
526 | const set<ReportID> reports = all_reports(g); |
527 | if (reports.size() != 1) { |
528 | DEBUG_PRINTF("too many reports\n" ); |
529 | return false; |
530 | } |
531 | |
532 | const Report &ir = rm.getReport(*reports.begin()); |
533 | |
534 | if (ir.type == INTERNAL_ROSE_CHAIN) { |
535 | DEBUG_PRINTF("puffettes cannot be chained together\n" ); |
536 | return false; |
537 | } |
538 | |
539 | PureRepeat repeat; |
540 | if (!isPureRepeat(g, repeat)) { |
541 | DEBUG_PRINTF("not pure bounded repeat\n" ); |
542 | return false; |
543 | } |
544 | |
545 | if (repeat.bounds.min == depth(0)) { |
546 | DEBUG_PRINTF("repeat min bound is zero\n" ); |
547 | return false; |
548 | } |
549 | |
550 | // We can puff if: |
551 | // (a) repeat is {N,}; or |
552 | // (b) repeat is {N} and fixed-depth, or highlander (and will accept the |
553 | // first match) |
554 | |
555 | DEBUG_PRINTF("repeat is %s\n" , repeat.bounds.str().c_str()); |
556 | |
557 | if (repeat.bounds.max.is_infinite()) { |
558 | return true; |
559 | } |
560 | |
561 | if (repeat.bounds.min == repeat.bounds.max) { |
562 | if (fixed_depth) { |
563 | DEBUG_PRINTF("fixed depth\n" ); |
564 | return true; |
565 | } |
566 | |
567 | const bool highlander = ir.ekey != INVALID_EKEY; |
568 | |
569 | // If we're highlander, we must be simple-exhaustible as well. |
570 | if (highlander && isSimpleExhaustible(ir)) { |
571 | return true; |
572 | } |
573 | } |
574 | |
575 | return false; |
576 | } |
577 | |
578 | } // namespace ue2 |
579 | |