| 1 | /* |
| 2 | * Copyright (c) 2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "rose_build_impl.h" |
| 30 | #include "nfa/castlecompile.h" |
| 31 | #include "nfagraph/ng_repeat.h" |
| 32 | #include "util/compile_context.h" |
| 33 | #include "util/boundary_reports.h" |
| 34 | #include "util/make_unique.h" |
| 35 | #include "util/report_manager.h" |
| 36 | |
| 37 | using namespace std; |
| 38 | |
| 39 | namespace ue2 { |
| 40 | |
| 41 | static |
| 42 | bool requiresDedupe(const NGHolder &h, const flat_set<ReportID> &reports, |
| 43 | const Grey &grey) { |
| 44 | /* TODO: tighten */ |
| 45 | NFAVertex seen_vert = NGHolder::null_vertex(); |
| 46 | |
| 47 | for (auto v : inv_adjacent_vertices_range(h.accept, h)) { |
| 48 | if (has_intersection(h[v].reports, reports)) { |
| 49 | if (seen_vert != NGHolder::null_vertex()) { |
| 50 | return true; |
| 51 | } |
| 52 | seen_vert = v; |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { |
| 57 | if (has_intersection(h[v].reports, reports)) { |
| 58 | if (seen_vert != NGHolder::null_vertex()) { |
| 59 | return true; |
| 60 | } |
| 61 | seen_vert = v; |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | if (seen_vert) { |
| 66 | /* if the reporting vertex is part of of a terminal repeat, the |
| 67 | * construction process may reform the graph splitting it into two |
| 68 | * vertices (pos, cyclic) and hence require dedupe */ |
| 69 | vector<GraphRepeatInfo> repeats; |
| 70 | findRepeats(h, grey.minExtBoundedRepeatSize, &repeats); |
| 71 | for (const auto &repeat : repeats) { |
| 72 | if (find(repeat.vertices.begin(), repeat.vertices.end(), |
| 73 | seen_vert) != repeat.vertices.end()) { |
| 74 | return true; |
| 75 | } |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | return false; |
| 80 | } |
| 81 | |
| 82 | class RoseDedupeAuxImpl : public RoseDedupeAux { |
| 83 | public: |
| 84 | explicit RoseDedupeAuxImpl(const RoseBuildImpl &build_in); |
| 85 | bool requiresDedupeSupport( |
| 86 | const flat_set<ReportID> &reports) const override; |
| 87 | |
| 88 | private: |
| 89 | bool hasSafeMultiReports(const flat_set<ReportID> &reports) const; |
| 90 | |
| 91 | const RoseBuildImpl &build; |
| 92 | map<ReportID, set<RoseVertex>> vert_map; //!< ordinary literals |
| 93 | map<ReportID, set<RoseVertex>> sb_vert_map; //!< small block literals |
| 94 | map<ReportID, set<suffix_id>> suffix_map; |
| 95 | map<ReportID, set<const OutfixInfo *>> outfix_map; |
| 96 | map<ReportID, set<const raw_puff *>> puff_map; |
| 97 | |
| 98 | unordered_set<ReportID> live_reports; //!< all live internal reports. |
| 99 | }; |
| 100 | |
| 101 | unique_ptr<RoseDedupeAux> RoseBuildImpl::generateDedupeAux() const { |
| 102 | return ue2::make_unique<RoseDedupeAuxImpl>(*this); |
| 103 | } |
| 104 | |
| 105 | RoseDedupeAux::~RoseDedupeAux() = default; |
| 106 | |
| 107 | RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &build_in) |
| 108 | : build(build_in) { |
| 109 | const RoseGraph &g = build.g; |
| 110 | |
| 111 | set<suffix_id> suffixes; |
| 112 | |
| 113 | for (auto v : vertices_range(g)) { |
| 114 | insert(&live_reports, g[v].reports); |
| 115 | |
| 116 | // Literals in the small block table are "shadow" copies of literals in |
| 117 | // the other tables that do not run in the same runtime invocation. |
| 118 | // Dedupe key assignment will be taken care of by the real literals. |
| 119 | if (build.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) { |
| 120 | for (const auto &report_id : g[v].reports) { |
| 121 | sb_vert_map[report_id].insert(v); |
| 122 | } |
| 123 | } else { |
| 124 | for (const auto &report_id : g[v].reports) { |
| 125 | vert_map[report_id].insert(v); |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | // Several vertices may share a suffix, so we collect the set of |
| 130 | // suffixes first to avoid repeating work. |
| 131 | if (g[v].suffix) { |
| 132 | suffixes.insert(g[v].suffix); |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | for (const auto &suffix : suffixes) { |
| 137 | for (const auto &report_id : all_reports(suffix)) { |
| 138 | suffix_map[report_id].insert(suffix); |
| 139 | live_reports.insert(report_id); |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | for (const auto &outfix : build.outfixes) { |
| 144 | for (const auto &report_id : all_reports(outfix)) { |
| 145 | outfix_map[report_id].insert(&outfix); |
| 146 | live_reports.insert(report_id); |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | if (build.mpv_outfix) { |
| 151 | auto *mpv = build.mpv_outfix->mpv(); |
| 152 | for (const auto &puff : mpv->puffettes) { |
| 153 | puff_map[puff.report].insert(&puff); |
| 154 | live_reports.insert(puff.report); |
| 155 | } |
| 156 | for (const auto &puff : mpv->triggered_puffettes) { |
| 157 | puff_map[puff.report].insert(&puff); |
| 158 | live_reports.insert(puff.report); |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | // Collect live reports from boundary reports. |
| 163 | insert(&live_reports, build.boundary.report_at_0); |
| 164 | insert(&live_reports, build.boundary.report_at_0_eod); |
| 165 | insert(&live_reports, build.boundary.report_at_eod); |
| 166 | |
| 167 | DEBUG_PRINTF("%zu of %zu reports are live\n" , live_reports.size(), |
| 168 | build.rm.numReports()); |
| 169 | } |
| 170 | |
| 171 | static |
| 172 | vector<CharReach> makePath(const rose_literal_id &lit) { |
| 173 | vector<CharReach> path(begin(lit.s), end(lit.s)); |
| 174 | for (u32 i = 0; i < lit.delay; i++) { |
| 175 | path.push_back(CharReach::dot()); |
| 176 | } |
| 177 | return path; |
| 178 | } |
| 179 | |
| 180 | /** |
| 181 | * \brief True if one of the given literals overlaps with the suffix of |
| 182 | * another, meaning that they could arrive at the same offset. |
| 183 | */ |
| 184 | static |
| 185 | bool literalsCouldRace(const rose_literal_id &lit1, |
| 186 | const rose_literal_id &lit2) { |
| 187 | DEBUG_PRINTF("compare %s (delay %u) and %s (delay %u)\n" , |
| 188 | dumpString(lit1.s).c_str(), lit1.delay, |
| 189 | dumpString(lit2.s).c_str(), lit2.delay); |
| 190 | |
| 191 | // Add dots on the end of each literal for delay. |
| 192 | const auto v1 = makePath(lit1); |
| 193 | const auto v2 = makePath(lit2); |
| 194 | |
| 195 | // See if the smaller path is a suffix of the larger path. |
| 196 | const auto *smaller = v1.size() < v2.size() ? &v1 : &v2; |
| 197 | const auto *bigger = v1.size() < v2.size() ? &v2 : &v1; |
| 198 | auto r = mismatch(smaller->rbegin(), smaller->rend(), bigger->rbegin(), |
| 199 | overlaps); |
| 200 | return r.first == smaller->rend(); |
| 201 | } |
| 202 | |
| 203 | bool RoseDedupeAuxImpl::hasSafeMultiReports( |
| 204 | const flat_set<ReportID> &reports) const { |
| 205 | if (reports.size() <= 1) { |
| 206 | return true; |
| 207 | } |
| 208 | |
| 209 | /* We have more than one ReportID corresponding to the external ID that is |
| 210 | * presented to the user. These may differ in offset adjustment, bounds |
| 211 | * checks, etc. */ |
| 212 | |
| 213 | /* TODO: work out if these differences will actually cause problems */ |
| 214 | |
| 215 | /* One common case where we know we don't have a problem is if there are |
| 216 | * precisely two reports, one for the main Rose path and one for the |
| 217 | * "small block matcher" path. */ |
| 218 | if (reports.size() == 2) { |
| 219 | ReportID id1 = *reports.begin(); |
| 220 | ReportID id2 = *reports.rbegin(); |
| 221 | |
| 222 | bool has_verts_1 = contains(vert_map, id1); |
| 223 | bool has_verts_2 = contains(vert_map, id2); |
| 224 | bool has_sb_verts_1 = contains(sb_vert_map, id1); |
| 225 | bool has_sb_verts_2 = contains(sb_vert_map, id2); |
| 226 | |
| 227 | if (has_verts_1 != has_verts_2 && has_sb_verts_1 != has_sb_verts_2) { |
| 228 | DEBUG_PRINTF("two reports, one full and one small block: ok\n" ); |
| 229 | return true; |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | DEBUG_PRINTF("more than one report\n" ); |
| 234 | return false; |
| 235 | } |
| 236 | |
| 237 | bool RoseDedupeAuxImpl::requiresDedupeSupport( |
| 238 | const flat_set<ReportID> &reports_in) const { |
| 239 | /* TODO: this could be expanded to check for offset or character |
| 240 | constraints */ |
| 241 | |
| 242 | // We don't want to consider dead reports (tracked by ReportManager but no |
| 243 | // longer used) for the purposes of assigning dupe keys. |
| 244 | flat_set<ReportID> reports; |
| 245 | for (auto id : reports_in) { |
| 246 | if (contains(live_reports, id)) { |
| 247 | reports.insert(id); |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | DEBUG_PRINTF("live reports: %s\n" , as_string_list(reports).c_str()); |
| 252 | |
| 253 | const RoseGraph &g = build.g; |
| 254 | |
| 255 | bool has_suffix = false; |
| 256 | bool has_outfix = false; |
| 257 | |
| 258 | if (!hasSafeMultiReports(reports)) { |
| 259 | DEBUG_PRINTF("multiple reports not safe\n" ); |
| 260 | return true; |
| 261 | } |
| 262 | |
| 263 | set<RoseVertex> roles; |
| 264 | set<suffix_id> suffixes; |
| 265 | set<const OutfixInfo *> outfixes; |
| 266 | set<const raw_puff *> puffettes; |
| 267 | for (ReportID r : reports) { |
| 268 | if (contains(vert_map, r)) { |
| 269 | insert(&roles, vert_map.at(r)); |
| 270 | } |
| 271 | if (contains(suffix_map, r)) { |
| 272 | insert(&suffixes, suffix_map.at(r)); |
| 273 | } |
| 274 | |
| 275 | if (contains(outfix_map, r)) { |
| 276 | insert(&outfixes, outfix_map.at(r)); |
| 277 | } |
| 278 | |
| 279 | if (contains(puff_map, r)) { |
| 280 | insert(&puffettes, puff_map.at(r)); |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | /* roles */ |
| 285 | |
| 286 | map<u32, u32> lits; // Literal ID -> count of occurrences. |
| 287 | |
| 288 | const bool has_role = !roles.empty(); |
| 289 | for (auto v : roles) { |
| 290 | for (const auto &lit : g[v].literals) { |
| 291 | lits[lit]++; |
| 292 | } |
| 293 | if (g[v].eod_accept) { |
| 294 | // Literals plugged into this EOD accept must be taken into account |
| 295 | // as well. |
| 296 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
| 297 | for (const auto &lit : g[u].literals) { |
| 298 | lits[lit]++; |
| 299 | } |
| 300 | } |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | /* literals */ |
| 305 | |
| 306 | for (const auto &m : lits) { |
| 307 | if (m.second > 1) { |
| 308 | DEBUG_PRINTF("lit %u used by >1 reporting roles\n" , m.first); |
| 309 | return true; |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | for (auto it = begin(lits); it != end(lits); ++it) { |
| 314 | const auto &lit1 = build.literals.at(it->first); |
| 315 | for (auto jt = next(it); jt != end(lits); ++jt) { |
| 316 | const auto &lit2 = build.literals.at(jt->first); |
| 317 | if (literalsCouldRace(lit1, lit2)) { |
| 318 | DEBUG_PRINTF("literals could race\n" ); |
| 319 | return true; |
| 320 | } |
| 321 | } |
| 322 | } |
| 323 | |
| 324 | /* suffixes */ |
| 325 | |
| 326 | for (const auto &suffix : suffixes) { |
| 327 | if (has_suffix || has_role) { |
| 328 | return true; /* scope for badness */ |
| 329 | } |
| 330 | |
| 331 | has_suffix = true; |
| 332 | |
| 333 | /* some lesser suffix engines (nfas, haig, castle) can raise multiple |
| 334 | * matches for a report id at the same offset if there are multiple |
| 335 | * report states live. */ |
| 336 | if (suffix.haig()) { |
| 337 | return true; |
| 338 | } |
| 339 | if (suffix.graph() && |
| 340 | requiresDedupe(*suffix.graph(), reports, build.cc.grey)) { |
| 341 | return true; |
| 342 | } |
| 343 | if (suffix.castle() && requiresDedupe(*suffix.castle(), reports)) { |
| 344 | return true; |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | /* outfixes */ |
| 349 | |
| 350 | for (const auto &outfix_ptr : outfixes) { |
| 351 | assert(outfix_ptr); |
| 352 | const OutfixInfo &out = *outfix_ptr; |
| 353 | |
| 354 | if (has_outfix || has_role || has_suffix) { |
| 355 | return true; |
| 356 | } |
| 357 | has_outfix = true; |
| 358 | |
| 359 | if (out.haig()) { |
| 360 | return true; /* haig may report matches with different SOM at the |
| 361 | same offset */ |
| 362 | } |
| 363 | |
| 364 | if (out.holder() && |
| 365 | requiresDedupe(*out.holder(), reports, build.cc.grey)) { |
| 366 | return true; |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | /* mpv */ |
| 371 | for (UNUSED const auto &puff : puffettes) { |
| 372 | if (has_outfix || has_role || has_suffix) { |
| 373 | return true; |
| 374 | } |
| 375 | has_outfix = true; |
| 376 | } |
| 377 | |
| 378 | /* boundary */ |
| 379 | if (has_intersection(build.boundary.report_at_eod, reports)) { |
| 380 | if (has_outfix || has_role || has_suffix) { |
| 381 | return true; |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | return false; |
| 386 | } |
| 387 | |
| 388 | } // namespace ue2 |
| 389 | |