1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Code for discovering properties of an NFA graph used by
31 * hs_expression_info().
32 */
33#include "ng_expr_info.h"
34
35#include "hs_internal.h"
36#include "ng.h"
37#include "ng_asserts.h"
38#include "ng_depth.h"
39#include "ng_edge_redundancy.h"
40#include "ng_extparam.h"
41#include "ng_fuzzy.h"
42#include "ng_holder.h"
43#include "ng_prune.h"
44#include "ng_reports.h"
45#include "ng_util.h"
46#include "ue2common.h"
47#include "compiler/expression_info.h"
48#include "parser/position.h" // for POS flags
49#include "util/boundary_reports.h"
50#include "util/compile_context.h"
51#include "util/depth.h"
52#include "util/graph.h"
53#include "util/graph_range.h"
54#include "util/report_manager.h"
55
56#include <limits.h>
57#include <set>
58
59using namespace std;
60
61namespace ue2 {
62
63/* get rid of leading \b and multiline ^ vertices */
64static
65void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) {
66 vector<NFAVertex> victims;
67
68 for (auto v : adjacent_vertices_range(root, g)) {
69 if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
70 DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n");
71 victims.push_back(v);
72 }
73 }
74
75 for (auto u : victims) {
76 for (auto v : adjacent_vertices_range(u, g)) {
77 add_edge_if_not_present(root, v, g);
78 }
79 }
80
81 remove_vertices(victims, g);
82}
83
84static
85void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v,
86 const vector<DepthMinMax> &depths, DepthMinMax &info) {
87 if (is_any_accept(v, g)) {
88 return;
89 }
90 if (is_any_start(v, g)) {
91 info.min = depth(0);
92 info.max = max(info.max, depth(0));
93 return;
94 }
95
96 u32 idx = g[v].index;
97 assert(idx < depths.size());
98 const DepthMinMax &d = depths.at(idx);
99
100 for (ReportID report_id : g[v].reports) {
101 const Report &report = rm.getReport(report_id);
102 assert(report.type == EXTERNAL_CALLBACK);
103
104 DepthMinMax rd = d;
105
106 // Compute graph width to this report, taking any offset adjustment
107 // into account.
108 rd.min += report.offsetAdjust;
109 rd.max += report.offsetAdjust;
110
111 // A min_length param is a lower bound for match width.
112 if (report.minLength && report.minLength <= depth::max_value()) {
113 depth min_len((u32)report.minLength);
114 rd.min = max(rd.min, min_len);
115 rd.max = max(rd.max, min_len);
116 }
117
118 // A max_offset param is an upper bound for match width.
119 if (report.maxOffset && report.maxOffset <= depth::max_value()) {
120 depth max_offset((u32)report.maxOffset);
121 rd.min = min(rd.min, max_offset);
122 rd.max = min(rd.max, max_offset);
123 }
124
125 DEBUG_PRINTF("vertex %zu report %u: %s\n", g[v].index, report_id,
126 rd.str().c_str());
127
128 info = unionDepthMinMax(info, rd);
129 }
130}
131
132static
133bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) {
134 for (const auto &report_id : all_reports(g)) {
135 if (rm.getReport(report_id).offsetAdjust) {
136 return true;
137 }
138 }
139 return false;
140}
141
142void fillExpressionInfo(ReportManager &rm, const CompileContext &cc,
143 NGHolder &g, ExpressionInfo &expr,
144 hs_expr_info *info) {
145 assert(info);
146
147 // remove reports that aren't on vertices connected to accept.
148 clearReports(g);
149
150 assert(allMatchStatesHaveReports(g));
151
152 /*
153 * Note: the following set of analysis passes / transformations should
154 * match those in NG::addGraph().
155 */
156
157 /* ensure utf8 starts at cp boundary */
158 ensureCodePointStart(rm, g, expr);
159
160 if (can_never_match(g)) {
161 throw CompileError(expr.index, "Pattern can never match.");
162 }
163
164 bool hamming = expr.hamm_distance > 0;
165 u32 e_dist = hamming ? expr.hamm_distance : expr.edit_distance;
166
167 // validate graph's suitability for fuzzing
168 validate_fuzzy_compile(g, e_dist, hamming, expr.utf8, cc.grey);
169
170 resolveAsserts(rm, g, expr);
171 assert(allMatchStatesHaveReports(g));
172
173 // fuzz graph - this must happen before any transformations are made
174 make_fuzzy(g, e_dist, hamming, cc.grey);
175
176 pruneUseless(g);
177 pruneEmptyVertices(g);
178
179 if (can_never_match(g)) {
180 throw CompileError(expr.index, "Pattern can never match.");
181 }
182
183 optimiseVirtualStarts(g);
184
185 propagateExtendedParams(g, expr, rm);
186
187 removeLeadingVirtualVerticesFromRoot(g, g.start);
188 removeLeadingVirtualVerticesFromRoot(g, g.startDs);
189
190 auto depths = calcDepthsFrom(g, g.start);
191
192 DepthMinMax d;
193
194 for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
195 checkVertex(rm, g, u, depths, d);
196 }
197
198 for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) {
199 checkVertex(rm, g, u, depths, d);
200 }
201
202 if (d.max.is_finite()) {
203 info->max_width = d.max;
204 } else {
205 info->max_width = UINT_MAX;
206 }
207 if (d.min.is_finite()) {
208 info->min_width = d.min;
209 } else {
210 info->min_width = UINT_MAX;
211 }
212
213 info->unordered_matches = hasOffsetAdjust(rm, g);
214 info->matches_at_eod = can_match_at_eod(g);
215 info->matches_only_at_eod = can_only_match_at_eod(g);
216}
217
218} // namespace ue2
219