1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Code for discovering properties of an NFA graph used by |
31 | * hs_expression_info(). |
32 | */ |
33 | #include "ng_expr_info.h" |
34 | |
35 | #include "hs_internal.h" |
36 | #include "ng.h" |
37 | #include "ng_asserts.h" |
38 | #include "ng_depth.h" |
39 | #include "ng_edge_redundancy.h" |
40 | #include "ng_extparam.h" |
41 | #include "ng_fuzzy.h" |
42 | #include "ng_holder.h" |
43 | #include "ng_prune.h" |
44 | #include "ng_reports.h" |
45 | #include "ng_util.h" |
46 | #include "ue2common.h" |
47 | #include "compiler/expression_info.h" |
48 | #include "parser/position.h" // for POS flags |
49 | #include "util/boundary_reports.h" |
50 | #include "util/compile_context.h" |
51 | #include "util/depth.h" |
52 | #include "util/graph.h" |
53 | #include "util/graph_range.h" |
54 | #include "util/report_manager.h" |
55 | |
56 | #include <limits.h> |
57 | #include <set> |
58 | |
59 | using namespace std; |
60 | |
61 | namespace ue2 { |
62 | |
63 | /* get rid of leading \b and multiline ^ vertices */ |
64 | static |
65 | void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) { |
66 | vector<NFAVertex> victims; |
67 | |
68 | for (auto v : adjacent_vertices_range(root, g)) { |
69 | if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { |
70 | DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n" ); |
71 | victims.push_back(v); |
72 | } |
73 | } |
74 | |
75 | for (auto u : victims) { |
76 | for (auto v : adjacent_vertices_range(u, g)) { |
77 | add_edge_if_not_present(root, v, g); |
78 | } |
79 | } |
80 | |
81 | remove_vertices(victims, g); |
82 | } |
83 | |
84 | static |
85 | void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v, |
86 | const vector<DepthMinMax> &depths, DepthMinMax &info) { |
87 | if (is_any_accept(v, g)) { |
88 | return; |
89 | } |
90 | if (is_any_start(v, g)) { |
91 | info.min = depth(0); |
92 | info.max = max(info.max, depth(0)); |
93 | return; |
94 | } |
95 | |
96 | u32 idx = g[v].index; |
97 | assert(idx < depths.size()); |
98 | const DepthMinMax &d = depths.at(idx); |
99 | |
100 | for (ReportID report_id : g[v].reports) { |
101 | const Report &report = rm.getReport(report_id); |
102 | assert(report.type == EXTERNAL_CALLBACK); |
103 | |
104 | DepthMinMax rd = d; |
105 | |
106 | // Compute graph width to this report, taking any offset adjustment |
107 | // into account. |
108 | rd.min += report.offsetAdjust; |
109 | rd.max += report.offsetAdjust; |
110 | |
111 | // A min_length param is a lower bound for match width. |
112 | if (report.minLength && report.minLength <= depth::max_value()) { |
113 | depth min_len((u32)report.minLength); |
114 | rd.min = max(rd.min, min_len); |
115 | rd.max = max(rd.max, min_len); |
116 | } |
117 | |
118 | // A max_offset param is an upper bound for match width. |
119 | if (report.maxOffset && report.maxOffset <= depth::max_value()) { |
120 | depth max_offset((u32)report.maxOffset); |
121 | rd.min = min(rd.min, max_offset); |
122 | rd.max = min(rd.max, max_offset); |
123 | } |
124 | |
125 | DEBUG_PRINTF("vertex %zu report %u: %s\n" , g[v].index, report_id, |
126 | rd.str().c_str()); |
127 | |
128 | info = unionDepthMinMax(info, rd); |
129 | } |
130 | } |
131 | |
132 | static |
133 | bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) { |
134 | for (const auto &report_id : all_reports(g)) { |
135 | if (rm.getReport(report_id).offsetAdjust) { |
136 | return true; |
137 | } |
138 | } |
139 | return false; |
140 | } |
141 | |
142 | void fillExpressionInfo(ReportManager &rm, const CompileContext &cc, |
143 | NGHolder &g, ExpressionInfo &expr, |
144 | hs_expr_info *info) { |
145 | assert(info); |
146 | |
147 | // remove reports that aren't on vertices connected to accept. |
148 | clearReports(g); |
149 | |
150 | assert(allMatchStatesHaveReports(g)); |
151 | |
152 | /* |
153 | * Note: the following set of analysis passes / transformations should |
154 | * match those in NG::addGraph(). |
155 | */ |
156 | |
157 | /* ensure utf8 starts at cp boundary */ |
158 | ensureCodePointStart(rm, g, expr); |
159 | |
160 | if (can_never_match(g)) { |
161 | throw CompileError(expr.index, "Pattern can never match." ); |
162 | } |
163 | |
164 | bool hamming = expr.hamm_distance > 0; |
165 | u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance; |
166 | |
167 | // validate graph's suitability for fuzzing |
168 | validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey); |
169 | |
170 | resolveAsserts(rm, g, expr); |
171 | assert(allMatchStatesHaveReports(g)); |
172 | |
173 | // fuzz graph - this must happen before any transformations are made |
174 | make_fuzzy(g, e_dist, hamming, cc.grey); |
175 | |
176 | pruneUseless(g); |
177 | pruneEmptyVertices(g); |
178 | |
179 | if (can_never_match(g)) { |
180 | throw CompileError(expr.index, "Pattern can never match." ); |
181 | } |
182 | |
183 | optimiseVirtualStarts(g); |
184 | |
185 | propagateExtendedParams(g, expr, rm); |
186 | |
187 | removeLeadingVirtualVerticesFromRoot(g, g.start); |
188 | removeLeadingVirtualVerticesFromRoot(g, g.startDs); |
189 | |
190 | auto depths = calcDepthsFrom(g, g.start); |
191 | |
192 | DepthMinMax d; |
193 | |
194 | for (auto u : inv_adjacent_vertices_range(g.accept, g)) { |
195 | checkVertex(rm, g, u, depths, d); |
196 | } |
197 | |
198 | for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) { |
199 | checkVertex(rm, g, u, depths, d); |
200 | } |
201 | |
202 | if (d.max.is_finite()) { |
203 | info->max_width = d.max; |
204 | } else { |
205 | info->max_width = UINT_MAX; |
206 | } |
207 | if (d.min.is_finite()) { |
208 | info->min_width = d.min; |
209 | } else { |
210 | info->min_width = UINT_MAX; |
211 | } |
212 | |
213 | info->unordered_matches = hasOffsetAdjust(rm, g); |
214 | info->matches_at_eod = can_match_at_eod(g); |
215 | info->matches_only_at_eod = can_only_match_at_eod(g); |
216 | } |
217 | |
218 | } // namespace ue2 |
219 | |