1/*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "rose_build_impl.h"
30#include "nfa/castlecompile.h"
31#include "nfagraph/ng_repeat.h"
32#include "util/compile_context.h"
33#include "util/boundary_reports.h"
34#include "util/make_unique.h"
35#include "util/report_manager.h"
36
37using namespace std;
38
39namespace ue2 {
40
41static
42bool requiresDedupe(const NGHolder &h, const flat_set<ReportID> &reports,
43 const Grey &grey) {
44 /* TODO: tighten */
45 NFAVertex seen_vert = NGHolder::null_vertex();
46
47 for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
48 if (has_intersection(h[v].reports, reports)) {
49 if (seen_vert != NGHolder::null_vertex()) {
50 return true;
51 }
52 seen_vert = v;
53 }
54 }
55
56 for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
57 if (has_intersection(h[v].reports, reports)) {
58 if (seen_vert != NGHolder::null_vertex()) {
59 return true;
60 }
61 seen_vert = v;
62 }
63 }
64
65 if (seen_vert) {
66 /* if the reporting vertex is part of of a terminal repeat, the
67 * construction process may reform the graph splitting it into two
68 * vertices (pos, cyclic) and hence require dedupe */
69 vector<GraphRepeatInfo> repeats;
70 findRepeats(h, grey.minExtBoundedRepeatSize, &repeats);
71 for (const auto &repeat : repeats) {
72 if (find(repeat.vertices.begin(), repeat.vertices.end(),
73 seen_vert) != repeat.vertices.end()) {
74 return true;
75 }
76 }
77 }
78
79 return false;
80}
81
82class RoseDedupeAuxImpl : public RoseDedupeAux {
83public:
84 explicit RoseDedupeAuxImpl(const RoseBuildImpl &build_in);
85 bool requiresDedupeSupport(
86 const flat_set<ReportID> &reports) const override;
87
88private:
89 bool hasSafeMultiReports(const flat_set<ReportID> &reports) const;
90
91 const RoseBuildImpl &build;
92 map<ReportID, set<RoseVertex>> vert_map; //!< ordinary literals
93 map<ReportID, set<RoseVertex>> sb_vert_map; //!< small block literals
94 map<ReportID, set<suffix_id>> suffix_map;
95 map<ReportID, set<const OutfixInfo *>> outfix_map;
96 map<ReportID, set<const raw_puff *>> puff_map;
97
98 unordered_set<ReportID> live_reports; //!< all live internal reports.
99};
100
101unique_ptr<RoseDedupeAux> RoseBuildImpl::generateDedupeAux() const {
102 return ue2::make_unique<RoseDedupeAuxImpl>(*this);
103}
104
105RoseDedupeAux::~RoseDedupeAux() = default;
106
107RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &build_in)
108 : build(build_in) {
109 const RoseGraph &g = build.g;
110
111 set<suffix_id> suffixes;
112
113 for (auto v : vertices_range(g)) {
114 insert(&live_reports, g[v].reports);
115
116 // Literals in the small block table are "shadow" copies of literals in
117 // the other tables that do not run in the same runtime invocation.
118 // Dedupe key assignment will be taken care of by the real literals.
119 if (build.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) {
120 for (const auto &report_id : g[v].reports) {
121 sb_vert_map[report_id].insert(v);
122 }
123 } else {
124 for (const auto &report_id : g[v].reports) {
125 vert_map[report_id].insert(v);
126 }
127 }
128
129 // Several vertices may share a suffix, so we collect the set of
130 // suffixes first to avoid repeating work.
131 if (g[v].suffix) {
132 suffixes.insert(g[v].suffix);
133 }
134 }
135
136 for (const auto &suffix : suffixes) {
137 for (const auto &report_id : all_reports(suffix)) {
138 suffix_map[report_id].insert(suffix);
139 live_reports.insert(report_id);
140 }
141 }
142
143 for (const auto &outfix : build.outfixes) {
144 for (const auto &report_id : all_reports(outfix)) {
145 outfix_map[report_id].insert(&outfix);
146 live_reports.insert(report_id);
147 }
148 }
149
150 if (build.mpv_outfix) {
151 auto *mpv = build.mpv_outfix->mpv();
152 for (const auto &puff : mpv->puffettes) {
153 puff_map[puff.report].insert(&puff);
154 live_reports.insert(puff.report);
155 }
156 for (const auto &puff : mpv->triggered_puffettes) {
157 puff_map[puff.report].insert(&puff);
158 live_reports.insert(puff.report);
159 }
160 }
161
162 // Collect live reports from boundary reports.
163 insert(&live_reports, build.boundary.report_at_0);
164 insert(&live_reports, build.boundary.report_at_0_eod);
165 insert(&live_reports, build.boundary.report_at_eod);
166
167 DEBUG_PRINTF("%zu of %zu reports are live\n", live_reports.size(),
168 build.rm.numReports());
169}
170
171static
172vector<CharReach> makePath(const rose_literal_id &lit) {
173 vector<CharReach> path(begin(lit.s), end(lit.s));
174 for (u32 i = 0; i < lit.delay; i++) {
175 path.push_back(CharReach::dot());
176 }
177 return path;
178}
179
180/**
181 * \brief True if one of the given literals overlaps with the suffix of
182 * another, meaning that they could arrive at the same offset.
183 */
184static
185bool literalsCouldRace(const rose_literal_id &lit1,
186 const rose_literal_id &lit2) {
187 DEBUG_PRINTF("compare %s (delay %u) and %s (delay %u)\n",
188 dumpString(lit1.s).c_str(), lit1.delay,
189 dumpString(lit2.s).c_str(), lit2.delay);
190
191 // Add dots on the end of each literal for delay.
192 const auto v1 = makePath(lit1);
193 const auto v2 = makePath(lit2);
194
195 // See if the smaller path is a suffix of the larger path.
196 const auto *smaller = v1.size() < v2.size() ? &v1 : &v2;
197 const auto *bigger = v1.size() < v2.size() ? &v2 : &v1;
198 auto r = mismatch(smaller->rbegin(), smaller->rend(), bigger->rbegin(),
199 overlaps);
200 return r.first == smaller->rend();
201}
202
203bool RoseDedupeAuxImpl::hasSafeMultiReports(
204 const flat_set<ReportID> &reports) const {
205 if (reports.size() <= 1) {
206 return true;
207 }
208
209 /* We have more than one ReportID corresponding to the external ID that is
210 * presented to the user. These may differ in offset adjustment, bounds
211 * checks, etc. */
212
213 /* TODO: work out if these differences will actually cause problems */
214
215 /* One common case where we know we don't have a problem is if there are
216 * precisely two reports, one for the main Rose path and one for the
217 * "small block matcher" path. */
218 if (reports.size() == 2) {
219 ReportID id1 = *reports.begin();
220 ReportID id2 = *reports.rbegin();
221
222 bool has_verts_1 = contains(vert_map, id1);
223 bool has_verts_2 = contains(vert_map, id2);
224 bool has_sb_verts_1 = contains(sb_vert_map, id1);
225 bool has_sb_verts_2 = contains(sb_vert_map, id2);
226
227 if (has_verts_1 != has_verts_2 && has_sb_verts_1 != has_sb_verts_2) {
228 DEBUG_PRINTF("two reports, one full and one small block: ok\n");
229 return true;
230 }
231 }
232
233 DEBUG_PRINTF("more than one report\n");
234 return false;
235}
236
237bool RoseDedupeAuxImpl::requiresDedupeSupport(
238 const flat_set<ReportID> &reports_in) const {
239 /* TODO: this could be expanded to check for offset or character
240 constraints */
241
242 // We don't want to consider dead reports (tracked by ReportManager but no
243 // longer used) for the purposes of assigning dupe keys.
244 flat_set<ReportID> reports;
245 for (auto id : reports_in) {
246 if (contains(live_reports, id)) {
247 reports.insert(id);
248 }
249 }
250
251 DEBUG_PRINTF("live reports: %s\n", as_string_list(reports).c_str());
252
253 const RoseGraph &g = build.g;
254
255 bool has_suffix = false;
256 bool has_outfix = false;
257
258 if (!hasSafeMultiReports(reports)) {
259 DEBUG_PRINTF("multiple reports not safe\n");
260 return true;
261 }
262
263 set<RoseVertex> roles;
264 set<suffix_id> suffixes;
265 set<const OutfixInfo *> outfixes;
266 set<const raw_puff *> puffettes;
267 for (ReportID r : reports) {
268 if (contains(vert_map, r)) {
269 insert(&roles, vert_map.at(r));
270 }
271 if (contains(suffix_map, r)) {
272 insert(&suffixes, suffix_map.at(r));
273 }
274
275 if (contains(outfix_map, r)) {
276 insert(&outfixes, outfix_map.at(r));
277 }
278
279 if (contains(puff_map, r)) {
280 insert(&puffettes, puff_map.at(r));
281 }
282 }
283
284 /* roles */
285
286 map<u32, u32> lits; // Literal ID -> count of occurrences.
287
288 const bool has_role = !roles.empty();
289 for (auto v : roles) {
290 for (const auto &lit : g[v].literals) {
291 lits[lit]++;
292 }
293 if (g[v].eod_accept) {
294 // Literals plugged into this EOD accept must be taken into account
295 // as well.
296 for (auto u : inv_adjacent_vertices_range(v, g)) {
297 for (const auto &lit : g[u].literals) {
298 lits[lit]++;
299 }
300 }
301 }
302 }
303
304 /* literals */
305
306 for (const auto &m : lits) {
307 if (m.second > 1) {
308 DEBUG_PRINTF("lit %u used by >1 reporting roles\n", m.first);
309 return true;
310 }
311 }
312
313 for (auto it = begin(lits); it != end(lits); ++it) {
314 const auto &lit1 = build.literals.at(it->first);
315 for (auto jt = next(it); jt != end(lits); ++jt) {
316 const auto &lit2 = build.literals.at(jt->first);
317 if (literalsCouldRace(lit1, lit2)) {
318 DEBUG_PRINTF("literals could race\n");
319 return true;
320 }
321 }
322 }
323
324 /* suffixes */
325
326 for (const auto &suffix : suffixes) {
327 if (has_suffix || has_role) {
328 return true; /* scope for badness */
329 }
330
331 has_suffix = true;
332
333 /* some lesser suffix engines (nfas, haig, castle) can raise multiple
334 * matches for a report id at the same offset if there are multiple
335 * report states live. */
336 if (suffix.haig()) {
337 return true;
338 }
339 if (suffix.graph() &&
340 requiresDedupe(*suffix.graph(), reports, build.cc.grey)) {
341 return true;
342 }
343 if (suffix.castle() && requiresDedupe(*suffix.castle(), reports)) {
344 return true;
345 }
346 }
347
348 /* outfixes */
349
350 for (const auto &outfix_ptr : outfixes) {
351 assert(outfix_ptr);
352 const OutfixInfo &out = *outfix_ptr;
353
354 if (has_outfix || has_role || has_suffix) {
355 return true;
356 }
357 has_outfix = true;
358
359 if (out.haig()) {
360 return true; /* haig may report matches with different SOM at the
361 same offset */
362 }
363
364 if (out.holder() &&
365 requiresDedupe(*out.holder(), reports, build.cc.grey)) {
366 return true;
367 }
368 }
369
370 /* mpv */
371 for (UNUSED const auto &puff : puffettes) {
372 if (has_outfix || has_role || has_suffix) {
373 return true;
374 }
375 has_outfix = true;
376 }
377
378 /* boundary */
379 if (has_intersection(build.boundary.report_at_eod, reports)) {
380 if (has_outfix || has_role || has_suffix) {
381 return true;
382 }
383 }
384
385 return false;
386}
387
388} // namespace ue2
389