| 1 | /* | 
|---|
| 2 | * Copyright (c) 2017, Intel Corporation | 
|---|
| 3 | * | 
|---|
| 4 | * Redistribution and use in source and binary forms, with or without | 
|---|
| 5 | * modification, are permitted provided that the following conditions are met: | 
|---|
| 6 | * | 
|---|
| 7 | *  * Redistributions of source code must retain the above copyright notice, | 
|---|
| 8 | *    this list of conditions and the following disclaimer. | 
|---|
| 9 | *  * Redistributions in binary form must reproduce the above copyright | 
|---|
| 10 | *    notice, this list of conditions and the following disclaimer in the | 
|---|
| 11 | *    documentation and/or other materials provided with the distribution. | 
|---|
| 12 | *  * Neither the name of Intel Corporation nor the names of its contributors | 
|---|
| 13 | *    may be used to endorse or promote products derived from this software | 
|---|
| 14 | *    without specific prior written permission. | 
|---|
| 15 | * | 
|---|
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
|---|
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|---|
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|---|
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
|---|
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
|---|
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
|---|
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
|---|
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
|---|
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
|---|
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
|---|
| 26 | * POSSIBILITY OF SUCH DAMAGE. | 
|---|
| 27 | */ | 
|---|
| 28 |  | 
|---|
| 29 | #include "rose_build_impl.h" | 
|---|
| 30 | #include "nfa/castlecompile.h" | 
|---|
| 31 | #include "nfagraph/ng_repeat.h" | 
|---|
| 32 | #include "util/compile_context.h" | 
|---|
| 33 | #include "util/boundary_reports.h" | 
|---|
| 34 | #include "util/make_unique.h" | 
|---|
| 35 | #include "util/report_manager.h" | 
|---|
| 36 |  | 
|---|
| 37 | using namespace std; | 
|---|
| 38 |  | 
|---|
| 39 | namespace ue2 { | 
|---|
| 40 |  | 
|---|
| 41 | static | 
|---|
| 42 | bool requiresDedupe(const NGHolder &h, const flat_set<ReportID> &reports, | 
|---|
| 43 | const Grey &grey) { | 
|---|
| 44 | /* TODO: tighten */ | 
|---|
| 45 | NFAVertex seen_vert = NGHolder::null_vertex(); | 
|---|
| 46 |  | 
|---|
| 47 | for (auto v : inv_adjacent_vertices_range(h.accept, h)) { | 
|---|
| 48 | if (has_intersection(h[v].reports, reports)) { | 
|---|
| 49 | if (seen_vert != NGHolder::null_vertex()) { | 
|---|
| 50 | return true; | 
|---|
| 51 | } | 
|---|
| 52 | seen_vert = v; | 
|---|
| 53 | } | 
|---|
| 54 | } | 
|---|
| 55 |  | 
|---|
| 56 | for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { | 
|---|
| 57 | if (has_intersection(h[v].reports, reports)) { | 
|---|
| 58 | if (seen_vert != NGHolder::null_vertex()) { | 
|---|
| 59 | return true; | 
|---|
| 60 | } | 
|---|
| 61 | seen_vert = v; | 
|---|
| 62 | } | 
|---|
| 63 | } | 
|---|
| 64 |  | 
|---|
| 65 | if (seen_vert) { | 
|---|
| 66 | /* if the reporting vertex is part of of a terminal repeat, the | 
|---|
| 67 | * construction process may reform the graph splitting it into two | 
|---|
| 68 | * vertices (pos, cyclic) and hence require dedupe */ | 
|---|
| 69 | vector<GraphRepeatInfo> repeats; | 
|---|
| 70 | findRepeats(h, grey.minExtBoundedRepeatSize, &repeats); | 
|---|
| 71 | for (const auto &repeat : repeats) { | 
|---|
| 72 | if (find(repeat.vertices.begin(), repeat.vertices.end(), | 
|---|
| 73 | seen_vert) != repeat.vertices.end()) { | 
|---|
| 74 | return true; | 
|---|
| 75 | } | 
|---|
| 76 | } | 
|---|
| 77 | } | 
|---|
| 78 |  | 
|---|
| 79 | return false; | 
|---|
| 80 | } | 
|---|
| 81 |  | 
|---|
| 82 | class RoseDedupeAuxImpl : public RoseDedupeAux { | 
|---|
| 83 | public: | 
|---|
| 84 | explicit RoseDedupeAuxImpl(const RoseBuildImpl &build_in); | 
|---|
| 85 | bool requiresDedupeSupport( | 
|---|
| 86 | const flat_set<ReportID> &reports) const override; | 
|---|
| 87 |  | 
|---|
| 88 | private: | 
|---|
| 89 | bool hasSafeMultiReports(const flat_set<ReportID> &reports) const; | 
|---|
| 90 |  | 
|---|
| 91 | const RoseBuildImpl &build; | 
|---|
| 92 | map<ReportID, set<RoseVertex>> vert_map; //!< ordinary literals | 
|---|
| 93 | map<ReportID, set<RoseVertex>> sb_vert_map; //!< small block literals | 
|---|
| 94 | map<ReportID, set<suffix_id>> suffix_map; | 
|---|
| 95 | map<ReportID, set<const OutfixInfo *>> outfix_map; | 
|---|
| 96 | map<ReportID, set<const raw_puff *>> puff_map; | 
|---|
| 97 |  | 
|---|
| 98 | unordered_set<ReportID> live_reports; //!< all live internal reports. | 
|---|
| 99 | }; | 
|---|
| 100 |  | 
|---|
| 101 | unique_ptr<RoseDedupeAux> RoseBuildImpl::generateDedupeAux() const { | 
|---|
| 102 | return ue2::make_unique<RoseDedupeAuxImpl>(*this); | 
|---|
| 103 | } | 
|---|
| 104 |  | 
|---|
| 105 | RoseDedupeAux::~RoseDedupeAux() = default; | 
|---|
| 106 |  | 
|---|
| 107 | RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &build_in) | 
|---|
| 108 | : build(build_in) { | 
|---|
| 109 | const RoseGraph &g = build.g; | 
|---|
| 110 |  | 
|---|
| 111 | set<suffix_id> suffixes; | 
|---|
| 112 |  | 
|---|
| 113 | for (auto v : vertices_range(g)) { | 
|---|
| 114 | insert(&live_reports, g[v].reports); | 
|---|
| 115 |  | 
|---|
| 116 | // Literals in the small block table are "shadow" copies of literals in | 
|---|
| 117 | // the other tables that do not run in the same runtime invocation. | 
|---|
| 118 | // Dedupe key assignment will be taken care of by the real literals. | 
|---|
| 119 | if (build.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) { | 
|---|
| 120 | for (const auto &report_id : g[v].reports) { | 
|---|
| 121 | sb_vert_map[report_id].insert(v); | 
|---|
| 122 | } | 
|---|
| 123 | } else { | 
|---|
| 124 | for (const auto &report_id : g[v].reports) { | 
|---|
| 125 | vert_map[report_id].insert(v); | 
|---|
| 126 | } | 
|---|
| 127 | } | 
|---|
| 128 |  | 
|---|
| 129 | // Several vertices may share a suffix, so we collect the set of | 
|---|
| 130 | // suffixes first to avoid repeating work. | 
|---|
| 131 | if (g[v].suffix) { | 
|---|
| 132 | suffixes.insert(g[v].suffix); | 
|---|
| 133 | } | 
|---|
| 134 | } | 
|---|
| 135 |  | 
|---|
| 136 | for (const auto &suffix : suffixes) { | 
|---|
| 137 | for (const auto &report_id : all_reports(suffix)) { | 
|---|
| 138 | suffix_map[report_id].insert(suffix); | 
|---|
| 139 | live_reports.insert(report_id); | 
|---|
| 140 | } | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | for (const auto &outfix : build.outfixes) { | 
|---|
| 144 | for (const auto &report_id : all_reports(outfix)) { | 
|---|
| 145 | outfix_map[report_id].insert(&outfix); | 
|---|
| 146 | live_reports.insert(report_id); | 
|---|
| 147 | } | 
|---|
| 148 | } | 
|---|
| 149 |  | 
|---|
| 150 | if (build.mpv_outfix) { | 
|---|
| 151 | auto *mpv = build.mpv_outfix->mpv(); | 
|---|
| 152 | for (const auto &puff : mpv->puffettes) { | 
|---|
| 153 | puff_map[puff.report].insert(&puff); | 
|---|
| 154 | live_reports.insert(puff.report); | 
|---|
| 155 | } | 
|---|
| 156 | for (const auto &puff : mpv->triggered_puffettes) { | 
|---|
| 157 | puff_map[puff.report].insert(&puff); | 
|---|
| 158 | live_reports.insert(puff.report); | 
|---|
| 159 | } | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | // Collect live reports from boundary reports. | 
|---|
| 163 | insert(&live_reports, build.boundary.report_at_0); | 
|---|
| 164 | insert(&live_reports, build.boundary.report_at_0_eod); | 
|---|
| 165 | insert(&live_reports, build.boundary.report_at_eod); | 
|---|
| 166 |  | 
|---|
| 167 | DEBUG_PRINTF( "%zu of %zu reports are live\n", live_reports.size(), | 
|---|
| 168 | build.rm.numReports()); | 
|---|
| 169 | } | 
|---|
| 170 |  | 
|---|
| 171 | static | 
|---|
| 172 | vector<CharReach> makePath(const rose_literal_id &lit) { | 
|---|
| 173 | vector<CharReach> path(begin(lit.s), end(lit.s)); | 
|---|
| 174 | for (u32 i = 0; i < lit.delay; i++) { | 
|---|
| 175 | path.push_back(CharReach::dot()); | 
|---|
| 176 | } | 
|---|
| 177 | return path; | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | /** | 
|---|
| 181 | * \brief True if one of the given literals overlaps with the suffix of | 
|---|
| 182 | * another, meaning that they could arrive at the same offset. | 
|---|
| 183 | */ | 
|---|
| 184 | static | 
|---|
| 185 | bool literalsCouldRace(const rose_literal_id &lit1, | 
|---|
| 186 | const rose_literal_id &lit2) { | 
|---|
| 187 | DEBUG_PRINTF( "compare %s (delay %u) and %s (delay %u)\n", | 
|---|
| 188 | dumpString(lit1.s).c_str(), lit1.delay, | 
|---|
| 189 | dumpString(lit2.s).c_str(), lit2.delay); | 
|---|
| 190 |  | 
|---|
| 191 | // Add dots on the end of each literal for delay. | 
|---|
| 192 | const auto v1 = makePath(lit1); | 
|---|
| 193 | const auto v2 = makePath(lit2); | 
|---|
| 194 |  | 
|---|
| 195 | // See if the smaller path is a suffix of the larger path. | 
|---|
| 196 | const auto *smaller = v1.size() < v2.size() ? &v1 : &v2; | 
|---|
| 197 | const auto *bigger = v1.size() < v2.size() ? &v2 : &v1; | 
|---|
| 198 | auto r = mismatch(smaller->rbegin(), smaller->rend(), bigger->rbegin(), | 
|---|
| 199 | overlaps); | 
|---|
| 200 | return r.first == smaller->rend(); | 
|---|
| 201 | } | 
|---|
| 202 |  | 
|---|
| 203 | bool RoseDedupeAuxImpl::hasSafeMultiReports( | 
|---|
| 204 | const flat_set<ReportID> &reports) const { | 
|---|
| 205 | if (reports.size() <= 1) { | 
|---|
| 206 | return true; | 
|---|
| 207 | } | 
|---|
| 208 |  | 
|---|
| 209 | /* We have more than one ReportID corresponding to the external ID that is | 
|---|
| 210 | * presented to the user. These may differ in offset adjustment, bounds | 
|---|
| 211 | * checks, etc. */ | 
|---|
| 212 |  | 
|---|
| 213 | /* TODO: work out if these differences will actually cause problems */ | 
|---|
| 214 |  | 
|---|
| 215 | /* One common case where we know we don't have a problem is if there are | 
|---|
| 216 | * precisely two reports, one for the main Rose path and one for the | 
|---|
| 217 | * "small block matcher" path. */ | 
|---|
| 218 | if (reports.size() == 2) { | 
|---|
| 219 | ReportID id1 = *reports.begin(); | 
|---|
| 220 | ReportID id2 = *reports.rbegin(); | 
|---|
| 221 |  | 
|---|
| 222 | bool has_verts_1 = contains(vert_map, id1); | 
|---|
| 223 | bool has_verts_2 = contains(vert_map, id2); | 
|---|
| 224 | bool has_sb_verts_1 = contains(sb_vert_map, id1); | 
|---|
| 225 | bool has_sb_verts_2 = contains(sb_vert_map, id2); | 
|---|
| 226 |  | 
|---|
| 227 | if (has_verts_1 != has_verts_2 && has_sb_verts_1 != has_sb_verts_2) { | 
|---|
| 228 | DEBUG_PRINTF( "two reports, one full and one small block: ok\n"); | 
|---|
| 229 | return true; | 
|---|
| 230 | } | 
|---|
| 231 | } | 
|---|
| 232 |  | 
|---|
| 233 | DEBUG_PRINTF( "more than one report\n"); | 
|---|
| 234 | return false; | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | bool RoseDedupeAuxImpl::requiresDedupeSupport( | 
|---|
| 238 | const flat_set<ReportID> &reports_in) const { | 
|---|
| 239 | /* TODO: this could be expanded to check for offset or character | 
|---|
| 240 | constraints */ | 
|---|
| 241 |  | 
|---|
| 242 | // We don't want to consider dead reports (tracked by ReportManager but no | 
|---|
| 243 | // longer used) for the purposes of assigning dupe keys. | 
|---|
| 244 | flat_set<ReportID> reports; | 
|---|
| 245 | for (auto id : reports_in) { | 
|---|
| 246 | if (contains(live_reports, id)) { | 
|---|
| 247 | reports.insert(id); | 
|---|
| 248 | } | 
|---|
| 249 | } | 
|---|
| 250 |  | 
|---|
| 251 | DEBUG_PRINTF( "live reports: %s\n", as_string_list(reports).c_str()); | 
|---|
| 252 |  | 
|---|
| 253 | const RoseGraph &g = build.g; | 
|---|
| 254 |  | 
|---|
| 255 | bool has_suffix = false; | 
|---|
| 256 | bool has_outfix = false; | 
|---|
| 257 |  | 
|---|
| 258 | if (!hasSafeMultiReports(reports)) { | 
|---|
| 259 | DEBUG_PRINTF( "multiple reports not safe\n"); | 
|---|
| 260 | return true; | 
|---|
| 261 | } | 
|---|
| 262 |  | 
|---|
| 263 | set<RoseVertex> roles; | 
|---|
| 264 | set<suffix_id> suffixes; | 
|---|
| 265 | set<const OutfixInfo *> outfixes; | 
|---|
| 266 | set<const raw_puff *> puffettes; | 
|---|
| 267 | for (ReportID r : reports) { | 
|---|
| 268 | if (contains(vert_map, r)) { | 
|---|
| 269 | insert(&roles, vert_map.at(r)); | 
|---|
| 270 | } | 
|---|
| 271 | if (contains(suffix_map, r)) { | 
|---|
| 272 | insert(&suffixes, suffix_map.at(r)); | 
|---|
| 273 | } | 
|---|
| 274 |  | 
|---|
| 275 | if (contains(outfix_map, r)) { | 
|---|
| 276 | insert(&outfixes, outfix_map.at(r)); | 
|---|
| 277 | } | 
|---|
| 278 |  | 
|---|
| 279 | if (contains(puff_map, r)) { | 
|---|
| 280 | insert(&puffettes, puff_map.at(r)); | 
|---|
| 281 | } | 
|---|
| 282 | } | 
|---|
| 283 |  | 
|---|
| 284 | /* roles */ | 
|---|
| 285 |  | 
|---|
| 286 | map<u32, u32> lits; // Literal ID -> count of occurrences. | 
|---|
| 287 |  | 
|---|
| 288 | const bool has_role = !roles.empty(); | 
|---|
| 289 | for (auto v : roles) { | 
|---|
| 290 | for (const auto &lit : g[v].literals) { | 
|---|
| 291 | lits[lit]++; | 
|---|
| 292 | } | 
|---|
| 293 | if (g[v].eod_accept) { | 
|---|
| 294 | // Literals plugged into this EOD accept must be taken into account | 
|---|
| 295 | // as well. | 
|---|
| 296 | for (auto u : inv_adjacent_vertices_range(v, g)) { | 
|---|
| 297 | for (const auto &lit : g[u].literals) { | 
|---|
| 298 | lits[lit]++; | 
|---|
| 299 | } | 
|---|
| 300 | } | 
|---|
| 301 | } | 
|---|
| 302 | } | 
|---|
| 303 |  | 
|---|
| 304 | /* literals */ | 
|---|
| 305 |  | 
|---|
| 306 | for (const auto &m : lits) { | 
|---|
| 307 | if (m.second > 1) { | 
|---|
| 308 | DEBUG_PRINTF( "lit %u used by >1 reporting roles\n", m.first); | 
|---|
| 309 | return true; | 
|---|
| 310 | } | 
|---|
| 311 | } | 
|---|
| 312 |  | 
|---|
| 313 | for (auto it = begin(lits); it != end(lits); ++it) { | 
|---|
| 314 | const auto &lit1 = build.literals.at(it->first); | 
|---|
| 315 | for (auto jt = next(it); jt != end(lits); ++jt) { | 
|---|
| 316 | const auto &lit2 = build.literals.at(jt->first); | 
|---|
| 317 | if (literalsCouldRace(lit1, lit2)) { | 
|---|
| 318 | DEBUG_PRINTF( "literals could race\n"); | 
|---|
| 319 | return true; | 
|---|
| 320 | } | 
|---|
| 321 | } | 
|---|
| 322 | } | 
|---|
| 323 |  | 
|---|
| 324 | /* suffixes */ | 
|---|
| 325 |  | 
|---|
| 326 | for (const auto &suffix : suffixes) { | 
|---|
| 327 | if (has_suffix || has_role) { | 
|---|
| 328 | return true; /* scope for badness */ | 
|---|
| 329 | } | 
|---|
| 330 |  | 
|---|
| 331 | has_suffix = true; | 
|---|
| 332 |  | 
|---|
| 333 | /* some lesser suffix engines (nfas, haig, castle) can raise multiple | 
|---|
| 334 | * matches for a report id at the same offset if there are multiple | 
|---|
| 335 | * report states live. */ | 
|---|
| 336 | if (suffix.haig()) { | 
|---|
| 337 | return true; | 
|---|
| 338 | } | 
|---|
| 339 | if (suffix.graph() && | 
|---|
| 340 | requiresDedupe(*suffix.graph(), reports, build.cc.grey)) { | 
|---|
| 341 | return true; | 
|---|
| 342 | } | 
|---|
| 343 | if (suffix.castle() && requiresDedupe(*suffix.castle(), reports)) { | 
|---|
| 344 | return true; | 
|---|
| 345 | } | 
|---|
| 346 | } | 
|---|
| 347 |  | 
|---|
| 348 | /* outfixes */ | 
|---|
| 349 |  | 
|---|
| 350 | for (const auto &outfix_ptr : outfixes) { | 
|---|
| 351 | assert(outfix_ptr); | 
|---|
| 352 | const OutfixInfo &out = *outfix_ptr; | 
|---|
| 353 |  | 
|---|
| 354 | if (has_outfix || has_role || has_suffix) { | 
|---|
| 355 | return true; | 
|---|
| 356 | } | 
|---|
| 357 | has_outfix = true; | 
|---|
| 358 |  | 
|---|
| 359 | if (out.haig()) { | 
|---|
| 360 | return true; /* haig may report matches with different SOM at the | 
|---|
| 361 | same offset */ | 
|---|
| 362 | } | 
|---|
| 363 |  | 
|---|
| 364 | if (out.holder() && | 
|---|
| 365 | requiresDedupe(*out.holder(), reports, build.cc.grey)) { | 
|---|
| 366 | return true; | 
|---|
| 367 | } | 
|---|
| 368 | } | 
|---|
| 369 |  | 
|---|
| 370 | /* mpv */ | 
|---|
| 371 | for (UNUSED const auto &puff : puffettes) { | 
|---|
| 372 | if (has_outfix || has_role || has_suffix) { | 
|---|
| 373 | return true; | 
|---|
| 374 | } | 
|---|
| 375 | has_outfix = true; | 
|---|
| 376 | } | 
|---|
| 377 |  | 
|---|
| 378 | /* boundary */ | 
|---|
| 379 | if (has_intersection(build.boundary.report_at_eod, reports)) { | 
|---|
| 380 | if (has_outfix || has_role || has_suffix) { | 
|---|
| 381 | return true; | 
|---|
| 382 | } | 
|---|
| 383 | } | 
|---|
| 384 |  | 
|---|
| 385 | return false; | 
|---|
| 386 | } | 
|---|
| 387 |  | 
|---|
| 388 | } // namespace ue2 | 
|---|
| 389 |  | 
|---|