1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * \file
31 * \brief Propagate extended parameters to vertex reports and reduce graph if
32 * possible.
33 *
34 * This code handles the propagation of the extension parameters specified by
35 * the user with the \ref hs_expr_ext structure into the reports on the graph's
36 * vertices.
37 *
38 * There are also some analyses that prune edges that cannot contribute to a
39 * match given these constraints, or transform the graph in order to make a
40 * constraint implicit.
41 */
42
43#include "ng_extparam.h"
44
45#include "ng.h"
46#include "ng_depth.h"
47#include "ng_dump.h"
48#include "ng_prune.h"
49#include "ng_reports.h"
50#include "ng_som_util.h"
51#include "ng_width.h"
52#include "ng_util.h"
53#include "ue2common.h"
54#include "compiler/compiler.h"
55#include "parser/position.h"
56#include "util/compile_context.h"
57#include "util/compile_error.h"
58#include "util/container.h"
59#include "util/graph.h"
60#include "util/graph_range.h"
61
62#include <sstream>
63#include <string>
64
65using namespace std;
66
67namespace ue2 {
68
69static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000;
70static const u32 MAX_MINLENGTH_TO_CONVERT = 2000;
71
72/** True if all the given reports have the same extparam bounds. */
73template<typename Container>
74bool hasSameBounds(const Container &reports, const ReportManager &rm) {
75 assert(!reports.empty());
76
77 const auto &first = rm.getReport(*reports.begin());
78 for (auto id : reports) {
79 const auto &report = rm.getReport(id);
80 if (report.minOffset != first.minOffset ||
81 report.maxOffset != first.maxOffset ||
82 report.minLength != first.minLength) {
83 return false;
84 }
85 }
86
87 return true;
88}
89
90/**
91 * \brief Find the (min, max) offset adjustment for the reports on a given
92 * vertex.
93 */
94static
95pair<s32,s32> getMinMaxOffsetAdjust(const ReportManager &rm,
96 const NGHolder &g, NFAVertex v) {
97 s32 minAdj = 0, maxAdj = 0;
98 const auto &reports = g[v].reports;
99 for (auto ri = reports.begin(), re = reports.end(); ri != re; ++ri) {
100 const Report &ir = rm.getReport(*ri);
101 if (ri == reports.begin()) {
102 minAdj = ir.offsetAdjust;
103 maxAdj = ir.offsetAdjust;
104 } else {
105 minAdj = min(minAdj, ir.offsetAdjust);
106 maxAdj = max(maxAdj, ir.offsetAdjust);
107 }
108 }
109
110 return make_pair(minAdj, maxAdj);
111}
112
113/** \brief Find the (min, max) length of any match for the given holder. */
114static
115DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) {
116 DepthMinMax match_depths;
117
118 vector<DepthMinMax> depths = getDistancesFromSOM(g);
119
120 pair<s32, s32> adj;
121
122 for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
123 u32 idx = g[v].index;
124 DepthMinMax d = depths[idx]; // copy
125 adj = getMinMaxOffsetAdjust(rm, g, v);
126 DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
127 d.str().c_str(), adj.first, adj.second);
128 d.min += adj.first;
129 d.max += adj.second;
130 match_depths = unionDepthMinMax(match_depths, d);
131 }
132
133 for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
134 if (v == g.accept) {
135 continue;
136 }
137 u32 idx = g[v].index;
138 DepthMinMax d = depths[idx]; // copy
139 adj = getMinMaxOffsetAdjust(rm, g, v);
140 DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
141 d.str().c_str(), adj.first, adj.second);
142 d.min += adj.first;
143 d.max += adj.second;
144 match_depths = unionDepthMinMax(match_depths, d);
145 }
146
147 DEBUG_PRINTF("match_depths=%s\n", match_depths.str().c_str());
148
149 assert(match_depths.min.is_reachable());
150 assert(match_depths.max.is_reachable());
151 return match_depths;
152}
153
154template<typename Function>
155void replaceReports(NGHolder &g, NFAVertex accept, flat_set<NFAVertex> &seen,
156 Function func) {
157 for (auto v : inv_adjacent_vertices_range(accept, g)) {
158 if (v == g.accept) {
159 // Don't operate on accept: the accept->acceptEod edge is stylised.
160 assert(accept == g.acceptEod);
161 assert(g[v].reports.empty());
162 continue;
163 }
164
165 if (!seen.insert(v).second) {
166 continue; // We have already processed v.
167 }
168
169 auto &reports = g[v].reports;
170 if (reports.empty()) {
171 continue;
172 }
173 decltype(g[v].reports) new_reports;
174 for (auto id : g[v].reports) {
175 new_reports.insert(func(v, id));
176 }
177 reports = std::move(new_reports);
178 }
179}
180
181/**
182 * Generic function for replacing all the reports in the graph.
183 *
184 * Pass this a function that takes a vertex and a ReportID returns another
185 * ReportID (or the same one) to replace it with.
186 */
187template<typename Function>
188void replaceReports(NGHolder &g, Function func) {
189 flat_set<NFAVertex> seen;
190 replaceReports(g, g.accept, seen, func);
191 replaceReports(g, g.acceptEod, seen, func);
192}
193
194/** \brief Replace the graph's reports with new reports that specify bounds. */
195static
196void updateReportBounds(ReportManager &rm, NGHolder &g,
197 const ExpressionInfo &expr) {
198 DEBUG_PRINTF("updating report bounds\n");
199 replaceReports(g, [&](NFAVertex, ReportID id) {
200 Report report = rm.getReport(id); // make a copy
201 assert(!report.hasBounds());
202
203 // Note that we need to cope with offset adjustment here.
204
205 report.minOffset = expr.min_offset - report.offsetAdjust;
206 if (expr.max_offset == MAX_OFFSET) {
207 report.maxOffset = MAX_OFFSET;
208 } else {
209 report.maxOffset = expr.max_offset - report.offsetAdjust;
210 }
211 assert(report.maxOffset >= report.minOffset);
212
213 report.minLength = expr.min_length;
214 if (expr.min_length && !expr.som) {
215 report.quashSom = true;
216 }
217
218 DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, "
219 "min_length=%llu\n", id, report.minOffset,
220 report.maxOffset, report.minLength);
221
222 return rm.getInternalId(report);
223 });
224}
225
226static
227bool hasVirtualStarts(const NGHolder &g) {
228 for (auto v : adjacent_vertices_range(g.start, g)) {
229 if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
230 return true;
231 }
232 }
233 return false;
234}
235
236/** Set the min_length param for all reports to zero. */
237static
238void clearMinLengthParam(NGHolder &g, ReportManager &rm) {
239 DEBUG_PRINTF("clearing min length\n");
240 replaceReports(g, [&rm](NFAVertex, ReportID id) {
241 const auto &report = rm.getReport(id);
242 if (report.minLength) {
243 Report new_report = report;
244 new_report.minLength = 0;
245 return rm.getInternalId(new_report);
246 }
247 return id;
248 });
249}
250
251/**
252 * Set the min_offset param to zero and the max_offset param to MAX_OFFSET for
253 * all reports.
254 */
255static
256void clearOffsetParams(NGHolder &g, ReportManager &rm) {
257 DEBUG_PRINTF("clearing min and max offset\n");
258 replaceReports(g, [&rm](NFAVertex, ReportID id) {
259 const auto &report = rm.getReport(id);
260 if (report.minLength) {
261 Report new_report = report;
262 new_report.minOffset = 0;
263 new_report.maxOffset = MAX_OFFSET;
264 return rm.getInternalId(new_report);
265 }
266 return id;
267 });
268}
269
270/**
271 * If the pattern is unanchored, has a max_offset and has not asked for SOM, we
272 * can use that knowledge to anchor it which will limit its lifespan. Note that
273 * we can't use this transformation if there's a min_length, as it's currently
274 * handled using "sly SOM".
275 *
276 * Note that it is possible to handle graphs that have a combination of
277 * anchored and unanchored paths, but it's too tricky for the moment.
278 */
279static
280bool anchorPatternWithBoundedRepeat(NGHolder &g, ReportManager &rm) {
281 if (!isFloating(g)) {
282 return false;
283 }
284
285 const auto &reports = all_reports(g);
286 if (reports.empty()) {
287 return false;
288 }
289
290 if (any_of_in(reports, [&](ReportID id) {
291 const auto &report = rm.getReport(id);
292 return report.maxOffset == MAX_OFFSET || report.minLength ||
293 report.offsetAdjust;
294 })) {
295 return false;
296 }
297
298 if (!hasSameBounds(reports, rm)) {
299 DEBUG_PRINTF("mixed report bounds\n");
300 return false;
301 }
302
303 const depth minWidth = findMinWidth(g);
304 const depth maxWidth = findMaxWidth(g);
305
306 assert(minWidth <= maxWidth);
307 assert(maxWidth.is_reachable());
308
309 const auto &first_report = rm.getReport(*reports.begin());
310 const auto min_offset = first_report.minOffset;
311 const auto max_offset = first_report.maxOffset;
312 assert(max_offset < MAX_OFFSET);
313
314 DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
315 minWidth.str().c_str(), maxWidth.str().c_str(),
316 min_offset, max_offset);
317
318 if (max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
319 return false;
320 }
321
322 if (max_offset < minWidth) {
323 assert(0);
324 return false;
325 }
326
327 // If the pattern has virtual starts, we probably don't want to touch it.
328 if (hasVirtualStarts(g)) {
329 DEBUG_PRINTF("virtual starts, bailing\n");
330 return false;
331 }
332
333 // Similarly, bail if the pattern is vacuous. TODO: this could be done, we
334 // would just need to be a little careful with reports.
335 if (isVacuous(g)) {
336 DEBUG_PRINTF("vacuous, bailing\n");
337 return false;
338 }
339
340 u32 min_bound, max_bound;
341 if (maxWidth.is_infinite()) {
342 min_bound = 0;
343 max_bound = max_offset - minWidth;
344 } else {
345 min_bound = min_offset > maxWidth ? min_offset - maxWidth : 0;
346 max_bound = max_offset - minWidth;
347 }
348
349 DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);
350
351 vector<NFAVertex> initials;
352 for (auto v : adjacent_vertices_range(g.startDs, g)) {
353 if (v == g.startDs) {
354 continue;
355 }
356 initials.push_back(v);
357 }
358 if (initials.empty()) {
359 DEBUG_PRINTF("no initial vertices\n");
360 return false;
361 }
362
363 // Wire up 'min_offset' mandatory dots from anchored start.
364 NFAVertex u = g.start;
365 for (u32 i = 0; i < min_bound; i++) {
366 NFAVertex v = add_vertex(g);
367 g[v].char_reach.setall();
368 add_edge(u, v, g);
369 u = v;
370 }
371
372 NFAVertex head = u;
373
374 // Wire up optional dots for (max_offset - min_offset).
375 for (u32 i = 0; i < max_bound - min_bound; i++) {
376 NFAVertex v = add_vertex(g);
377 g[v].char_reach.setall();
378 if (head != u) {
379 add_edge(head, v, g);
380 }
381 add_edge(u, v, g);
382 u = v;
383 }
384
385 // Remove edges from starts and wire both head and u to our initials.
386 for (auto v : initials) {
387 remove_edge(g.startDs, v, g);
388 remove_edge(g.start, v, g);
389
390 if (head != u) {
391 add_edge(head, v, g);
392 }
393 add_edge(u, v, g);
394 }
395
396 renumber_vertices(g);
397 renumber_edges(g);
398
399 if (minWidth == maxWidth) {
400 // For a fixed width pattern, we can retire the offsets as
401 // they are implicit in the graph now.
402 clearOffsetParams(g, rm);
403 }
404
405 clearReports(g);
406 return true;
407}
408
409static
410NFAVertex findSingleCyclic(const NGHolder &g) {
411 NFAVertex v = NGHolder::null_vertex();
412 for (const auto &e : edges_range(g)) {
413 if (source(e, g) == target(e, g)) {
414 if (source(e, g) == g.startDs) {
415 continue;
416 }
417 if (v != NGHolder::null_vertex()) {
418 // More than one cyclic vertex.
419 return NGHolder::null_vertex();
420 }
421 v = source(e, g);
422 }
423 }
424
425 if (v != NGHolder::null_vertex()) {
426 DEBUG_PRINTF("cyclic is %zu\n", g[v].index);
427 assert(!is_special(v, g));
428 }
429 return v;
430}
431
432static
433bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g,
434 int *adjust) {
435 const auto &reports = all_reports(g);
436 if (reports.empty()) {
437 assert(0);
438 return false;
439 }
440
441 int offsetAdjust = rm.getReport(*reports.begin()).offsetAdjust;
442 for (auto report : reports) {
443 const Report &ir = rm.getReport(report);
444 if (ir.offsetAdjust != offsetAdjust) {
445 DEBUG_PRINTF("different adjusts!\n");
446 return false;
447 }
448 }
449
450 *adjust = offsetAdjust;
451 return true;
452}
453
454/**
455 * If the pattern has a min_length and is of "ratchet" form with one unbounded
456 * repeat, that repeat can become a bounded repeat.
457 *
458 * /foo.*bar/{min_length=100} --> /foo.{94,}bar/
459 */
460static
461bool transformMinLengthToRepeat(NGHolder &g, ReportManager &rm) {
462 const auto &reports = all_reports(g);
463
464 if (reports.empty()) {
465 return false;
466 }
467
468 if (!hasSameBounds(reports, rm)) {
469 DEBUG_PRINTF("mixed report bounds\n");
470 return false;
471 }
472
473 const auto &min_length = rm.getReport(*reports.begin()).minLength;
474 if (!min_length || min_length > MAX_MINLENGTH_TO_CONVERT) {
475 return false;
476 }
477
478 // If the pattern has virtual starts, we probably don't want to touch it.
479 if (hasVirtualStarts(g)) {
480 DEBUG_PRINTF("virtual starts, bailing\n");
481 return false;
482 }
483
484 // The graph must contain a single cyclic vertex (other than startDs), and
485 // that vertex can have one pred and one successor.
486 NFAVertex cyclic = findSingleCyclic(g);
487 if (cyclic == NGHolder::null_vertex()) {
488 return false;
489 }
490
491 NGHolder::adjacency_iterator ai, ae;
492 tie(ai, ae) = adjacent_vertices(g.start, g);
493 if (*ai == g.startDs) {
494 ++ai;
495 }
496 NFAVertex v = *ai;
497 if (++ai != ae) {
498 DEBUG_PRINTF("more than one initial vertex\n");
499 return false;
500 }
501
502 u32 width = 0;
503
504 // Walk from the start vertex to the cyclic state and ensure we have a
505 // chain of vertices.
506 while (v != cyclic) {
507 DEBUG_PRINTF("vertex %zu\n", g[v].index);
508 width++;
509 auto succ = succs(v, g);
510 if (contains(succ, cyclic)) {
511 if (succ.size() == 1) {
512 v = cyclic;
513 } else if (succ.size() == 2) {
514 // Cyclic and jump edge.
515 succ.erase(cyclic);
516 NFAVertex v2 = *succ.begin();
517 if (!edge(cyclic, v2, g).second) {
518 DEBUG_PRINTF("bad form\n");
519 return false;
520 }
521 v = cyclic;
522 } else {
523 DEBUG_PRINTF("bad form\n");
524 return false;
525 }
526 } else {
527 if (succ.size() != 1) {
528 DEBUG_PRINTF("bad form\n");
529 return false;
530 }
531 v = *succ.begin();
532 }
533 }
534
535 // Check the cyclic state is A-OK.
536 v = getSoleDestVertex(g, cyclic);
537 if (v == NGHolder::null_vertex()) {
538 DEBUG_PRINTF("cyclic has more than one successor\n");
539 return false;
540 }
541
542 // Walk from the cyclic state to an accept and ensure we have a chain of
543 // vertices.
544 while (!is_any_accept(v, g)) {
545 DEBUG_PRINTF("vertex %zu\n", g[v].index);
546 width++;
547 auto succ = succs(v, g);
548 if (succ.size() != 1) {
549 DEBUG_PRINTF("bad form\n");
550 return false;
551 }
552 v = *succ.begin();
553 }
554
555 int offsetAdjust = 0;
556 if (!hasOffsetAdjust(rm, g, &offsetAdjust)) {
557 return false;
558 }
559 DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust);
560 width += offsetAdjust;
561
562 DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width,
563 g[cyclic].index);
564
565 if (width >= min_length) {
566 DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
567 min_length, width);
568 clearMinLengthParam(g, rm);
569 return true;
570 }
571
572 vector<NFAVertex> preds;
573 vector<NFAEdge> dead;
574 for (auto u : inv_adjacent_vertices_range(cyclic, g)) {
575 DEBUG_PRINTF("pred %zu\n", g[u].index);
576 if (u == cyclic) {
577 continue;
578 }
579 preds.push_back(u);
580
581 // We want to delete the out-edges of each predecessor, but need to
582 // make sure we don't delete the startDs self loop.
583 for (const auto &e : out_edges_range(u, g)) {
584 if (target(e, g) != g.startDs) {
585 dead.push_back(e);
586 }
587 }
588 }
589
590 remove_edges(dead, g);
591
592 assert(!preds.empty());
593
594 const CharReach &cr = g[cyclic].char_reach;
595
596 for (u32 i = 0; i < min_length - width - 1; ++i) {
597 v = add_vertex(g);
598 g[v].char_reach = cr;
599
600 for (auto u : preds) {
601 add_edge(u, v, g);
602 }
603 preds.clear();
604 preds.push_back(v);
605 }
606 assert(!preds.empty());
607 for (auto u : preds) {
608 add_edge(u, cyclic, g);
609 }
610
611 renumber_vertices(g);
612 renumber_edges(g);
613 clearMinLengthParam(g, rm);
614 clearReports(g);
615 return true;
616}
617
618static
619bool hasExtParams(const ExpressionInfo &expr) {
620 if (expr.min_length != 0) {
621 return true;
622 }
623 if (expr.min_offset != 0) {
624 return true;
625 }
626 if (expr.max_offset != MAX_OFFSET) {
627 return true;
628 }
629 return false;
630}
631
632static
633const depth& maxDistToAccept(const NFAVertexBidiDepth &d) {
634 if (d.toAccept.max.is_unreachable()) {
635 return d.toAcceptEod.max;
636 } else if (d.toAcceptEod.max.is_unreachable()) {
637 return d.toAccept.max;
638 }
639 return max(d.toAccept.max, d.toAcceptEod.max);
640}
641
642static
643const depth& minDistFromStart(const NFAVertexBidiDepth &d) {
644 return min(d.fromStartDotStar.min, d.fromStart.min);
645}
646
647static
648const depth& minDistToAccept(const NFAVertexBidiDepth &d) {
649 return min(d.toAccept.min, d.toAcceptEod.min);
650}
651
652static
653bool isEdgePrunable(const NGHolder &g, const Report &report,
654 const vector<NFAVertexBidiDepth> &depths,
655 const NFAEdge &e) {
656 const NFAVertex u = source(e, g);
657 const NFAVertex v = target(e, g);
658
659 DEBUG_PRINTF("edge (%zu,%zu)\n", g[u].index, g[v].index);
660
661 // Leave our special-to-special edges alone.
662 if (is_special(u, g) && is_special(v, g)) {
663 DEBUG_PRINTF("ignoring special-to-special\n");
664 return false;
665 }
666
667 // We must be careful around start: we don't want to remove (start, v) if
668 // (startDs, v) exists as well, since later code will assume the presence
669 // of both edges, but other cases are OK.
670 if (u == g.start && edge(g.startDs, v, g).second) {
671 DEBUG_PRINTF("ignoring unanchored start edge\n");
672 return false;
673 }
674
675 u32 u_idx = g[u].index;
676 u32 v_idx = g[v].index;
677 assert(u_idx < depths.size() && v_idx < depths.size());
678
679 const NFAVertexBidiDepth &du = depths.at(u_idx);
680 const NFAVertexBidiDepth &dv = depths.at(v_idx);
681
682 if (report.minOffset) {
683 depth max_offset = maxDistFromStartOfData(du) + maxDistToAccept(dv);
684 if (max_offset.is_finite() && max_offset < report.minOffset) {
685 DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str());
686 return true;
687 }
688 }
689
690 if (report.maxOffset != MAX_OFFSET) {
691 depth min_offset = minDistFromStart(du) + minDistToAccept(dv);
692 assert(min_offset.is_finite());
693
694 if (min_offset > report.maxOffset) {
695 DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str());
696 return true;
697 }
698 }
699
700 if (report.minLength && is_any_accept(v, g)) {
701 // Simple take on min_length. If we're an edge to accept and our max
702 // dist from start is too small, we can be pruned.
703 const depth &width = maxDistFromInit(du);
704 if (width.is_finite() && width < report.minLength) {
705 DEBUG_PRINTF("max width %s from start too small for min_length\n",
706 width.str().c_str());
707 return true;
708 }
709 }
710
711 return false;
712}
713
714static
715void pruneExtUnreachable(NGHolder &g, const ReportManager &rm) {
716 const auto &reports = all_reports(g);
717 if (reports.empty()) {
718 return;
719 }
720
721 if (!hasSameBounds(reports, rm)) {
722 DEBUG_PRINTF("report bounds vary\n");
723 return;
724 }
725
726 const auto &report = rm.getReport(*reports.begin());
727
728 auto depths = calcBidiDepths(g);
729
730 vector<NFAEdge> dead;
731
732 for (const auto &e : edges_range(g)) {
733 if (isEdgePrunable(g, report, depths, e)) {
734 DEBUG_PRINTF("pruning\n");
735 dead.push_back(e);
736 }
737 }
738
739 if (dead.empty()) {
740 return;
741 }
742
743 remove_edges(dead, g);
744 pruneUseless(g);
745 clearReports(g);
746}
747
748/**
749 * Remove vacuous edges in graphs where the min_offset or min_length
750 * constraints dictate that they can never produce a match.
751 */
752static
753void pruneVacuousEdges(NGHolder &g, const ReportManager &rm) {
754 vector<NFAEdge> dead;
755
756 auto has_min_offset = [&](NFAVertex v) {
757 assert(!g[v].reports.empty()); // must be reporter
758 return all_of_in(g[v].reports, [&](ReportID id) {
759 return rm.getReport(id).minOffset > 0;
760 });
761 };
762
763 auto has_min_length = [&](NFAVertex v) {
764 assert(!g[v].reports.empty()); // must be reporter
765 return all_of_in(g[v].reports, [&](ReportID id) {
766 return rm.getReport(id).minLength > 0;
767 });
768 };
769
770 for (const auto &e : edges_range(g)) {
771 const NFAVertex u = source(e, g);
772 const NFAVertex v = target(e, g);
773
774 // Special case: Crudely remove vacuous edges from start in graphs with
775 // a min_offset.
776 if (u == g.start && is_any_accept(v, g) && has_min_offset(u)) {
777 DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
778 dead.push_back(e);
779 continue;
780 }
781
782 // If a min_length is set, vacuous edges can be removed.
783 if (is_any_start(u, g) && is_any_accept(v, g) && has_min_length(u)) {
784 DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
785 dead.push_back(e);
786 continue;
787 }
788 }
789
790 if (dead.empty()) {
791 return;
792 }
793
794 DEBUG_PRINTF("removing %zu vacuous edges\n", dead.size());
795 remove_edges(dead, g);
796 pruneUseless(g);
797 clearReports(g);
798}
799
800static
801void pruneUnmatchable(NGHolder &g, const vector<DepthMinMax> &depths,
802 const ReportManager &rm, NFAVertex accept) {
803 vector<NFAEdge> dead;
804
805 for (const auto &e : in_edges_range(accept, g)) {
806 NFAVertex v = source(e, g);
807 if (v == g.accept) {
808 assert(accept == g.acceptEod); // stylised edge
809 continue;
810 }
811
812 if (!hasSameBounds(g[v].reports, rm)) {
813 continue;
814 }
815 const auto &report = rm.getReport(*g[v].reports.begin());
816
817 u32 idx = g[v].index;
818 DepthMinMax d = depths[idx]; // copy
819 pair<s32, s32> adj = getMinMaxOffsetAdjust(rm, g, v);
820 DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
821 d.str().c_str(), adj.first, adj.second);
822 d.min += adj.first;
823 d.max += adj.second;
824
825 if (d.max.is_finite() && d.max < report.minLength) {
826 DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n",
827 d.max.str().c_str(), report.minLength);
828 dead.push_back(e);
829 continue;
830 }
831
832 if (report.maxOffset != MAX_OFFSET && d.min > report.maxOffset) {
833 DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n",
834 d.min.str().c_str(), report.maxOffset);
835 dead.push_back(e);
836 continue;
837 }
838 }
839
840 remove_edges(dead, g);
841}
842
843/**
844 * Remove edges to accepts that can never produce a match long enough to
845 * satisfy our min_length and max_offset constraints.
846 */
847static
848void pruneUnmatchable(NGHolder &g, const ReportManager &rm) {
849 if (!any_of_in(all_reports(g), [&](ReportID id) {
850 return rm.getReport(id).minLength > 0;
851 })) {
852 return;
853 }
854
855 vector<DepthMinMax> depths = getDistancesFromSOM(g);
856
857 pruneUnmatchable(g, depths, rm, g.accept);
858 pruneUnmatchable(g, depths, rm, g.acceptEod);
859
860 pruneUseless(g);
861 clearReports(g);
862}
863
864static
865bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) {
866 return any_of_in(all_reports(g), [&rm](ReportID id) {
867 return rm.getReport(id).offsetAdjust != 0;
868 });
869}
870
871void propagateExtendedParams(NGHolder &g, ExpressionInfo &expr,
872 ReportManager &rm) {
873 if (!hasExtParams(expr)) {
874 return;
875 }
876
877 depth minWidth = findMinWidth(g);
878 depth maxWidth = findMaxWidth(g);
879 bool is_anchored = !has_proper_successor(g.startDs, g)
880 && out_degree(g.start, g);
881
882 DepthMinMax match_depths = findMatchLengths(rm, g);
883 DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());
884
885 if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) {
886 ostringstream oss;
887 oss << "Expression is anchored and cannot satisfy min_offset="
888 << expr.min_offset << " as it can only produce matches of length "
889 << maxWidth << " bytes at most.";
890 throw CompileError(expr.index, oss.str());
891 }
892
893 if (minWidth > expr.max_offset) {
894 ostringstream oss;
895 oss << "Expression has max_offset=" << expr.max_offset
896 << " but requires " << minWidth << " bytes to match.";
897 throw CompileError(expr.index, oss.str());
898 }
899
900 if (maxWidth.is_finite() && match_depths.max < expr.min_length) {
901 ostringstream oss;
902 oss << "Expression has min_length=" << expr.min_length << " but can "
903 "only produce matches of length " << match_depths.max <<
904 " bytes at most.";
905 throw CompileError(expr.index, oss.str());
906 }
907
908 if (expr.min_length && expr.min_length <= match_depths.min) {
909 DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
910 expr.min_length);
911 expr.min_length = 0;
912 }
913
914 if (!hasExtParams(expr)) {
915 return;
916 }
917
918 updateReportBounds(rm, g, expr);
919}
920
921/**
922 * If the pattern is completely anchored and has a min_length set, this can
923 * be converted to a min_offset.
924 */
925static
926void replaceMinLengthWithOffset(NGHolder &g, ReportManager &rm) {
927 if (has_proper_successor(g.startDs, g)) {
928 return; // not wholly anchored
929 }
930
931 replaceReports(g, [&rm](NFAVertex, ReportID id) {
932 const auto &report = rm.getReport(id);
933 if (report.minLength) {
934 Report new_report = report;
935 u64a min_len_offset = report.minLength - report.offsetAdjust;
936 new_report.minOffset = max(report.minOffset, min_len_offset);
937 new_report.minLength = 0;
938 return rm.getInternalId(new_report);
939 }
940 return id;
941 });
942}
943
944/**
945 * Clear offset bounds on reports that are not needed because they're satisfied
946 * by vertex depth.
947 */
948static
949void removeUnneededOffsetBounds(NGHolder &g, ReportManager &rm) {
950 auto depths = calcDepths(g);
951
952 replaceReports(g, [&](NFAVertex v, ReportID id) {
953 const auto &d = depths.at(g[v].index);
954 const depth &min_depth = min(d.fromStartDotStar.min, d.fromStart.min);
955 const depth &max_depth = maxDistFromStartOfData(d);
956
957 DEBUG_PRINTF("vertex %zu has min_depth=%s, max_depth=%s\n", g[v].index,
958 min_depth.str().c_str(), max_depth.str().c_str());
959
960 Report report = rm.getReport(id); // copy
961 bool modified = false;
962 if (report.minOffset && !report.offsetAdjust &&
963 report.minOffset <= min_depth) {
964 report.minOffset = 0;
965 modified = true;
966 }
967 if (report.maxOffset != MAX_OFFSET && max_depth.is_finite() &&
968 report.maxOffset >= max_depth) {
969 report.maxOffset = MAX_OFFSET;
970 modified = true;
971 }
972 if (modified) {
973 DEBUG_PRINTF("vertex %zu, changed bounds to [%llu,%llu]\n",
974 g[v].index, report.minOffset, report.maxOffset);
975 return rm.getInternalId(report);
976 }
977
978 return id;
979 });
980}
981
982void reduceExtendedParams(NGHolder &g, ReportManager &rm, som_type som) {
983 if (!any_of_in(all_reports(g),
984 [&](ReportID id) { return rm.getReport(id).hasBounds(); })) {
985 DEBUG_PRINTF("no extparam bounds\n");
986 return;
987 }
988
989 DEBUG_PRINTF("graph has extparam bounds\n");
990
991 pruneVacuousEdges(g, rm);
992 if (can_never_match(g)) {
993 return;
994 }
995
996 pruneUnmatchable(g, rm);
997 if (can_never_match(g)) {
998 return;
999 }
1000
1001 if (!hasOffsetAdjustments(rm, g)) {
1002 pruneExtUnreachable(g, rm);
1003 if (can_never_match(g)) {
1004 return;
1005 }
1006 }
1007
1008 replaceMinLengthWithOffset(g, rm);
1009 if (can_never_match(g)) {
1010 return;
1011 }
1012
1013 // If the pattern has a min_length and is of "ratchet" form with one
1014 // unbounded repeat, that repeat can become a bounded repeat.
1015 // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
1016 transformMinLengthToRepeat(g, rm);
1017 if (can_never_match(g)) {
1018 return;
1019 }
1020
1021 // If the pattern is unanchored, has a max_offset and has not asked for
1022 // SOM, we can use that knowledge to anchor it which will limit its
1023 // lifespan. Note that we can't use this transformation if there's a
1024 // min_length, as it's currently handled using "sly SOM".
1025 if (som == SOM_NONE) {
1026 anchorPatternWithBoundedRepeat(g, rm);
1027 if (can_never_match(g)) {
1028 return;
1029 }
1030 }
1031
1032 removeUnneededOffsetBounds(g, rm);
1033}
1034
1035} // namespace ue2
1036