| 1 | /* | 
|---|
| 2 | * Copyright (c) 2015-2019, Intel Corporation | 
|---|
| 3 | * | 
|---|
| 4 | * Redistribution and use in source and binary forms, with or without | 
|---|
| 5 | * modification, are permitted provided that the following conditions are met: | 
|---|
| 6 | * | 
|---|
| 7 | *  * Redistributions of source code must retain the above copyright notice, | 
|---|
| 8 | *    this list of conditions and the following disclaimer. | 
|---|
| 9 | *  * Redistributions in binary form must reproduce the above copyright | 
|---|
| 10 | *    notice, this list of conditions and the following disclaimer in the | 
|---|
| 11 | *    documentation and/or other materials provided with the distribution. | 
|---|
| 12 | *  * Neither the name of Intel Corporation nor the names of its contributors | 
|---|
| 13 | *    may be used to endorse or promote products derived from this software | 
|---|
| 14 | *    without specific prior written permission. | 
|---|
| 15 | * | 
|---|
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
|---|
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|---|
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|---|
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
|---|
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
|---|
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
|---|
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
|---|
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
|---|
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
|---|
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
|---|
| 26 | * POSSIBILITY OF SUCH DAMAGE. | 
|---|
| 27 | */ | 
|---|
| 28 |  | 
|---|
| 29 | #ifndef ROSE_BUILD_IMPL_H | 
|---|
| 30 | #define ROSE_BUILD_IMPL_H | 
|---|
| 31 |  | 
|---|
| 32 | #include "rose_build.h" | 
|---|
| 33 | #include "rose_build_util.h" | 
|---|
| 34 | #include "rose_common.h" | 
|---|
| 35 | #include "rose_graph.h" | 
|---|
| 36 | #include "nfa/mpvcompile.h" | 
|---|
| 37 | #include "nfa/goughcompile.h" | 
|---|
| 38 | #include "nfa/nfa_internal.h" | 
|---|
| 39 | #include "nfagraph/ng_holder.h" | 
|---|
| 40 | #include "nfagraph/ng_revacc.h" | 
|---|
| 41 | #include "util/bytecode_ptr.h" | 
|---|
| 42 | #include "util/flat_containers.h" | 
|---|
| 43 | #include "util/hash.h" | 
|---|
| 44 | #include "util/order_check.h" | 
|---|
| 45 | #include "util/queue_index_factory.h" | 
|---|
| 46 | #include "util/ue2string.h" | 
|---|
| 47 | #include "util/unordered.h" | 
|---|
| 48 | #include "util/verify_types.h" | 
|---|
| 49 |  | 
|---|
| 50 | #include <deque> | 
|---|
| 51 | #include <map> | 
|---|
| 52 | #include <string> | 
|---|
| 53 | #include <vector> | 
|---|
| 54 | #include <boost/variant.hpp> | 
|---|
| 55 |  | 
|---|
| 56 | struct RoseEngine; | 
|---|
| 57 |  | 
|---|
| 58 | namespace ue2 { | 
|---|
| 59 |  | 
|---|
| 60 | #define ROSE_GROUPS_MAX 64 | 
|---|
| 61 |  | 
|---|
| 62 | #define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 | 
|---|
| 63 |  | 
|---|
| 64 | /** | 
|---|
| 65 | * \brief The largest allowable "short" literal fragment which can be given to | 
|---|
| 66 | * a literal matcher directly. | 
|---|
| 67 | * | 
|---|
| 68 | * Literals longer than this will be truncated to their suffix and confirmed in | 
|---|
| 69 | * the Rose interpreter, either as "medium length" literals which can be | 
|---|
| 70 | * confirmed from history, or "long literals" which make use of the streaming | 
|---|
| 71 | * table support. | 
|---|
| 72 | */ | 
|---|
| 73 | #define ROSE_SHORT_LITERAL_LEN_MAX 8 | 
|---|
| 74 |  | 
|---|
| 75 | struct BoundaryReports; | 
|---|
| 76 | struct CastleProto; | 
|---|
| 77 | struct CompileContext; | 
|---|
| 78 | class ReportManager; | 
|---|
| 79 | class SmallWriteBuild; | 
|---|
| 80 | class SomSlotManager; | 
|---|
| 81 |  | 
|---|
| 82 | struct suffix_id { | 
|---|
| 83 | suffix_id(const RoseSuffixInfo &in) | 
|---|
| 84 | : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()), | 
|---|
| 85 | h(in.haig.get()), t(in.tamarama.get()), | 
|---|
| 86 | dfa_min_width(in.dfa_min_width), | 
|---|
| 87 | dfa_max_width(in.dfa_max_width) { | 
|---|
| 88 | assert(!g || g->kind == NFA_SUFFIX); | 
|---|
| 89 | } | 
|---|
| 90 | bool operator==(const suffix_id &b) const { | 
|---|
| 91 | bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t; | 
|---|
| 92 | assert(!rv || dfa_min_width == b.dfa_min_width); | 
|---|
| 93 | assert(!rv || dfa_max_width == b.dfa_max_width); | 
|---|
| 94 | return rv; | 
|---|
| 95 | } | 
|---|
| 96 | bool operator!=(const suffix_id &b) const { return !(*this == b); } | 
|---|
| 97 | bool operator<(const suffix_id &b) const { | 
|---|
| 98 | const suffix_id &a = *this; | 
|---|
| 99 | ORDER_CHECK(g); | 
|---|
| 100 | ORDER_CHECK(c); | 
|---|
| 101 | ORDER_CHECK(d); | 
|---|
| 102 | ORDER_CHECK(h); | 
|---|
| 103 | ORDER_CHECK(t); | 
|---|
| 104 | return false; | 
|---|
| 105 | } | 
|---|
| 106 |  | 
|---|
| 107 | NGHolder *graph() { | 
|---|
| 108 | if (!d && !h) { | 
|---|
| 109 | assert(dfa_min_width == depth(0)); | 
|---|
| 110 | assert(dfa_max_width == depth::infinity()); | 
|---|
| 111 | } | 
|---|
| 112 | return g; | 
|---|
| 113 | } | 
|---|
| 114 | const NGHolder *graph() const { | 
|---|
| 115 | if (!d && !h) { | 
|---|
| 116 | assert(dfa_min_width == depth(0)); | 
|---|
| 117 | assert(dfa_max_width == depth::infinity()); | 
|---|
| 118 | } | 
|---|
| 119 | return g; | 
|---|
| 120 | } | 
|---|
| 121 | CastleProto *castle() { | 
|---|
| 122 | if (!d && !h) { | 
|---|
| 123 | assert(dfa_min_width == depth(0)); | 
|---|
| 124 | assert(dfa_max_width == depth::infinity()); | 
|---|
| 125 | } | 
|---|
| 126 | return c; | 
|---|
| 127 | } | 
|---|
| 128 | const CastleProto *castle() const { | 
|---|
| 129 | if (!d && !h) { | 
|---|
| 130 | assert(dfa_min_width == depth(0)); | 
|---|
| 131 | assert(dfa_max_width == depth::infinity()); | 
|---|
| 132 | } | 
|---|
| 133 | return c; | 
|---|
| 134 | } | 
|---|
| 135 | TamaProto *tamarama() { | 
|---|
| 136 | if (!d && !h) { | 
|---|
| 137 | assert(dfa_min_width == depth(0)); | 
|---|
| 138 | assert(dfa_max_width == depth::infinity()); | 
|---|
| 139 | } | 
|---|
| 140 | return t; | 
|---|
| 141 | } | 
|---|
| 142 | const TamaProto *tamarama() const { | 
|---|
| 143 | if (!d && !h) { | 
|---|
| 144 | assert(dfa_min_width == depth(0)); | 
|---|
| 145 | assert(dfa_max_width == depth::infinity()); | 
|---|
| 146 | } | 
|---|
| 147 | return t; | 
|---|
| 148 | } | 
|---|
| 149 |  | 
|---|
| 150 |  | 
|---|
| 151 | raw_som_dfa *haig() { return h; } | 
|---|
| 152 | const raw_som_dfa *haig() const { return h; } | 
|---|
| 153 | raw_dfa *dfa() { return d; } | 
|---|
| 154 | const raw_dfa *dfa() const { return d; } | 
|---|
| 155 |  | 
|---|
| 156 | size_t hash() const; | 
|---|
| 157 |  | 
|---|
| 158 | private: | 
|---|
| 159 | NGHolder *g; | 
|---|
| 160 | CastleProto *c; | 
|---|
| 161 | raw_dfa *d; | 
|---|
| 162 | raw_som_dfa *h; | 
|---|
| 163 | TamaProto *t; | 
|---|
| 164 | depth dfa_min_width; | 
|---|
| 165 | depth dfa_max_width; | 
|---|
| 166 |  | 
|---|
| 167 | friend depth findMinWidth(const suffix_id &s); | 
|---|
| 168 | friend depth findMaxWidth(const suffix_id &s); | 
|---|
| 169 | friend depth findMinWidth(const suffix_id &s, u32 top); | 
|---|
| 170 | friend depth findMaxWidth(const suffix_id &s, u32 top); | 
|---|
| 171 | }; | 
|---|
| 172 |  | 
|---|
| 173 | std::set<ReportID> all_reports(const suffix_id &s); | 
|---|
| 174 | std::set<u32> all_tops(const suffix_id &s); | 
|---|
| 175 | bool has_eod_accepts(const suffix_id &s); | 
|---|
| 176 | bool has_non_eod_accepts(const suffix_id &s); | 
|---|
| 177 | depth findMinWidth(const suffix_id &s); | 
|---|
| 178 | depth findMaxWidth(const suffix_id &s); | 
|---|
| 179 | depth findMinWidth(const suffix_id &s, u32 top); | 
|---|
| 180 | depth findMaxWidth(const suffix_id &s, u32 top); | 
|---|
| 181 |  | 
|---|
| 182 | /** \brief represents an engine to the left of a rose role */ | 
|---|
| 183 | struct left_id { | 
|---|
| 184 | left_id(const LeftEngInfo &in) | 
|---|
| 185 | : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()), | 
|---|
| 186 | h(in.haig.get()), dfa_min_width(in.dfa_min_width), | 
|---|
| 187 | dfa_max_width(in.dfa_max_width) { | 
|---|
| 188 | assert(!g || !has_managed_reports(*g)); | 
|---|
| 189 | } | 
|---|
| 190 | bool operator==(const left_id &b) const { | 
|---|
| 191 | bool rv = g == b.g && c == b.c && h == b.h && d == b.d; | 
|---|
| 192 | assert(!rv || dfa_min_width == b.dfa_min_width); | 
|---|
| 193 | assert(!rv || dfa_max_width == b.dfa_max_width); | 
|---|
| 194 | return rv; | 
|---|
| 195 | } | 
|---|
| 196 | bool operator!=(const left_id &b) const { return !(*this == b); } | 
|---|
| 197 | bool operator<(const left_id &b) const { | 
|---|
| 198 | const left_id &a = *this; | 
|---|
| 199 | ORDER_CHECK(g); | 
|---|
| 200 | ORDER_CHECK(c); | 
|---|
| 201 | ORDER_CHECK(d); | 
|---|
| 202 | ORDER_CHECK(h); | 
|---|
| 203 | return false; | 
|---|
| 204 | } | 
|---|
| 205 |  | 
|---|
| 206 | NGHolder *graph() { | 
|---|
| 207 | if (!d && !h) { | 
|---|
| 208 | assert(dfa_min_width == depth(0)); | 
|---|
| 209 | assert(dfa_max_width == depth::infinity()); | 
|---|
| 210 | } | 
|---|
| 211 | return g; | 
|---|
| 212 | } | 
|---|
| 213 | const NGHolder *graph() const { | 
|---|
| 214 | if (!d && !h) { | 
|---|
| 215 | assert(dfa_min_width == depth(0)); | 
|---|
| 216 | assert(dfa_max_width == depth::infinity()); | 
|---|
| 217 | } | 
|---|
| 218 | return g; | 
|---|
| 219 | } | 
|---|
| 220 | CastleProto *castle() { | 
|---|
| 221 | if (!d && !h) { | 
|---|
| 222 | assert(dfa_min_width == depth(0)); | 
|---|
| 223 | assert(dfa_max_width == depth::infinity()); | 
|---|
| 224 | } | 
|---|
| 225 |  | 
|---|
| 226 | return c; | 
|---|
| 227 | } | 
|---|
| 228 | const CastleProto *castle() const { | 
|---|
| 229 | if (!d && !h) { | 
|---|
| 230 | assert(dfa_min_width == depth(0)); | 
|---|
| 231 | assert(dfa_max_width == depth::infinity()); | 
|---|
| 232 | } | 
|---|
| 233 |  | 
|---|
| 234 | return c; | 
|---|
| 235 | } | 
|---|
| 236 | raw_som_dfa *haig() { return h; } | 
|---|
| 237 | const raw_som_dfa *haig() const { return h; } | 
|---|
| 238 | raw_dfa *dfa() { return d; } | 
|---|
| 239 | const raw_dfa *dfa() const { return d; } | 
|---|
| 240 |  | 
|---|
| 241 | size_t hash() const; | 
|---|
| 242 |  | 
|---|
| 243 | private: | 
|---|
| 244 | NGHolder *g; | 
|---|
| 245 | CastleProto *c; | 
|---|
| 246 | raw_dfa *d; | 
|---|
| 247 | raw_som_dfa *h; | 
|---|
| 248 | depth dfa_min_width; | 
|---|
| 249 | depth dfa_max_width; | 
|---|
| 250 |  | 
|---|
| 251 | friend bool isAnchored(const left_id &r); | 
|---|
| 252 | friend depth findMinWidth(const left_id &r); | 
|---|
| 253 | friend depth findMaxWidth(const left_id &r); | 
|---|
| 254 | }; | 
|---|
| 255 |  | 
|---|
| 256 | std::set<u32> all_tops(const left_id &r); | 
|---|
| 257 | std::set<ReportID> all_reports(const left_id &left); | 
|---|
| 258 | bool isAnchored(const left_id &r); | 
|---|
| 259 | depth findMinWidth(const left_id &r); | 
|---|
| 260 | depth findMaxWidth(const left_id &r); | 
|---|
| 261 | u32 num_tops(const left_id &r); | 
|---|
| 262 |  | 
|---|
| 263 | struct rose_literal_info { | 
|---|
| 264 | flat_set<u32> delayed_ids; | 
|---|
| 265 | flat_set<RoseVertex> vertices; | 
|---|
| 266 | rose_group group_mask = 0; | 
|---|
| 267 | u32 undelayed_id = MO_INVALID_IDX; | 
|---|
| 268 | bool squash_group = false; | 
|---|
| 269 | bool requires_benefits = false; | 
|---|
| 270 | }; | 
|---|
| 271 |  | 
|---|
| 272 | /** | 
|---|
| 273 | * \brief Main literal struct used at Rose build time. Numeric literal IDs | 
|---|
| 274 | * used at build time point at these (via the RoseBuildImpl::literals map). | 
|---|
| 275 | */ | 
|---|
| 276 | struct rose_literal_id { | 
|---|
| 277 | rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in, | 
|---|
| 278 | u32 delay_in) | 
|---|
| 279 | : s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {} | 
|---|
| 280 |  | 
|---|
| 281 | rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in, | 
|---|
| 282 | const std::vector<u8> &cmp_in, rose_literal_table table_in, | 
|---|
| 283 | u32 delay_in); | 
|---|
| 284 |  | 
|---|
| 285 | ue2_literal s; | 
|---|
| 286 | std::vector<u8> msk; | 
|---|
| 287 | std::vector<u8> cmp; | 
|---|
| 288 | rose_literal_table table; | 
|---|
| 289 | u32 delay; | 
|---|
| 290 | u32 distinctiveness; | 
|---|
| 291 |  | 
|---|
| 292 | size_t elength(void) const { return s.length() + delay; } | 
|---|
| 293 | size_t elength_including_mask(void) const { | 
|---|
| 294 | size_t mask_len = msk.size(); | 
|---|
| 295 | for (u8 c : msk) { | 
|---|
| 296 | if (!c) { | 
|---|
| 297 | mask_len--; | 
|---|
| 298 | } else { | 
|---|
| 299 | break; | 
|---|
| 300 | } | 
|---|
| 301 | } | 
|---|
| 302 | return MAX(mask_len, s.length()) + delay; | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|
| 305 | bool operator==(const rose_literal_id &b) const { | 
|---|
| 306 | return s == b.s && msk == b.msk && cmp == b.cmp && table == b.table && | 
|---|
| 307 | delay == b.delay && distinctiveness == b.distinctiveness; | 
|---|
| 308 | } | 
|---|
| 309 |  | 
|---|
| 310 | size_t hash() const { | 
|---|
| 311 | return hash_all(s, msk, cmp, table, delay, distinctiveness); | 
|---|
| 312 | } | 
|---|
| 313 | }; | 
|---|
| 314 |  | 
|---|
| 315 | static inline | 
|---|
| 316 | bool operator<(const rose_literal_id &a, const rose_literal_id &b) { | 
|---|
| 317 | ORDER_CHECK(distinctiveness); | 
|---|
| 318 | ORDER_CHECK(table); | 
|---|
| 319 | ORDER_CHECK(s); | 
|---|
| 320 | ORDER_CHECK(delay); | 
|---|
| 321 | ORDER_CHECK(msk); | 
|---|
| 322 | ORDER_CHECK(cmp); | 
|---|
| 323 | return 0; | 
|---|
| 324 | } | 
|---|
| 325 |  | 
|---|
| 326 | class RoseLiteralMap { | 
|---|
| 327 | /** | 
|---|
| 328 | * \brief Main storage for literals. | 
|---|
| 329 | * | 
|---|
| 330 | * Note that this cannot be a vector, as the present code relies on | 
|---|
| 331 | * iterator stability when iterating over this list and adding to it inside | 
|---|
| 332 | * the loop. | 
|---|
| 333 | */ | 
|---|
| 334 | std::deque<rose_literal_id> lits; | 
|---|
| 335 |  | 
|---|
| 336 | /** \brief Quick-lookup index from literal -> index in lits. */ | 
|---|
| 337 | ue2_unordered_map<rose_literal_id, u32> lits_index; | 
|---|
| 338 |  | 
|---|
| 339 | public: | 
|---|
| 340 | std::pair<u32, bool> insert(const rose_literal_id &lit) { | 
|---|
| 341 | auto it = lits_index.find(lit); | 
|---|
| 342 | if (it != lits_index.end()) { | 
|---|
| 343 | u32 idx = it->second; | 
|---|
| 344 | auto &l = lits.at(idx); | 
|---|
| 345 | if (!lit.s.get_pure() && l.s.get_pure()) { | 
|---|
| 346 | lits_index.erase(l); | 
|---|
| 347 | l.s.unset_pure(); | 
|---|
| 348 | lits_index.emplace(l, idx); | 
|---|
| 349 | } | 
|---|
| 350 | return {idx, false}; | 
|---|
| 351 | } | 
|---|
| 352 | u32 id = verify_u32(lits.size()); | 
|---|
| 353 | lits.push_back(lit); | 
|---|
| 354 | lits_index.emplace(lit, id); | 
|---|
| 355 | return {id, true}; | 
|---|
| 356 | } | 
|---|
| 357 |  | 
|---|
| 358 | // Erase the last num elements. | 
|---|
| 359 | void erase_back(size_t num) { | 
|---|
| 360 | assert(num <= lits.size()); | 
|---|
| 361 | for (size_t i = 0; i < num; i++) { | 
|---|
| 362 | lits_index.erase(lits.back()); | 
|---|
| 363 | lits.pop_back(); | 
|---|
| 364 | } | 
|---|
| 365 | assert(lits.size() == lits_index.size()); | 
|---|
| 366 | } | 
|---|
| 367 |  | 
|---|
| 368 | const rose_literal_id &at(u32 id) const { | 
|---|
| 369 | assert(id < lits.size()); | 
|---|
| 370 | return lits.at(id); | 
|---|
| 371 | } | 
|---|
| 372 |  | 
|---|
| 373 | using const_iterator = decltype(lits)::const_iterator; | 
|---|
| 374 | const_iterator begin() const { return lits.begin(); } | 
|---|
| 375 | const_iterator end() const { return lits.end(); } | 
|---|
| 376 |  | 
|---|
| 377 | size_t size() const { | 
|---|
| 378 | return lits.size(); | 
|---|
| 379 | } | 
|---|
| 380 | }; | 
|---|
| 381 |  | 
|---|
| 382 | struct simple_anchored_info { | 
|---|
| 383 | simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit) | 
|---|
| 384 | : min_bound(min_b), max_bound(max_b), literal(lit) {} | 
|---|
| 385 | u32 min_bound; /**< min number of characters required before literal can | 
|---|
| 386 | * start matching */ | 
|---|
| 387 | u32 max_bound; /**< max number of characters allowed before literal can | 
|---|
| 388 | * start matching */ | 
|---|
| 389 | ue2_literal literal; | 
|---|
| 390 | }; | 
|---|
| 391 |  | 
|---|
| 392 | static really_inline | 
|---|
| 393 | bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) { | 
|---|
| 394 | ORDER_CHECK(min_bound); | 
|---|
| 395 | ORDER_CHECK(max_bound); | 
|---|
| 396 | ORDER_CHECK(literal); | 
|---|
| 397 | return 0; | 
|---|
| 398 | } | 
|---|
| 399 |  | 
|---|
| 400 | struct MpvProto { | 
|---|
| 401 | bool empty() const { | 
|---|
| 402 | return puffettes.empty() && triggered_puffettes.empty(); | 
|---|
| 403 | } | 
|---|
| 404 | void reset() { | 
|---|
| 405 | puffettes.clear(); | 
|---|
| 406 | triggered_puffettes.clear(); | 
|---|
| 407 | } | 
|---|
| 408 | std::vector<raw_puff> puffettes; | 
|---|
| 409 | std::vector<raw_puff> triggered_puffettes; | 
|---|
| 410 | }; | 
|---|
| 411 |  | 
|---|
| 412 | struct OutfixInfo { | 
|---|
| 413 | template<class T> | 
|---|
| 414 | explicit OutfixInfo(std::unique_ptr<T> x) : proto(std::move(x)) {} | 
|---|
| 415 |  | 
|---|
| 416 | explicit OutfixInfo(MpvProto mpv_in) : proto(std::move(mpv_in)) {} | 
|---|
| 417 |  | 
|---|
| 418 | u32 get_queue(QueueIndexFactory &qif); | 
|---|
| 419 |  | 
|---|
| 420 | u32 get_queue() const { | 
|---|
| 421 | assert(queue != ~0U); | 
|---|
| 422 | return queue; | 
|---|
| 423 | } | 
|---|
| 424 |  | 
|---|
| 425 | bool is_nonempty_mpv() const { | 
|---|
| 426 | auto *m = boost::get<MpvProto>(&proto); | 
|---|
| 427 | return m && !m->empty(); | 
|---|
| 428 | } | 
|---|
| 429 |  | 
|---|
| 430 | bool is_dead() const { | 
|---|
| 431 | auto *m = boost::get<MpvProto>(&proto); | 
|---|
| 432 | if (m) { | 
|---|
| 433 | return m->empty(); | 
|---|
| 434 | } | 
|---|
| 435 | return boost::get<boost::blank>(&proto) != nullptr; | 
|---|
| 436 | } | 
|---|
| 437 |  | 
|---|
| 438 | void clear() { | 
|---|
| 439 | proto = boost::blank(); | 
|---|
| 440 | } | 
|---|
| 441 |  | 
|---|
| 442 | // Convenience accessor functions. | 
|---|
| 443 |  | 
|---|
| 444 | NGHolder *holder() { | 
|---|
| 445 | auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto); | 
|---|
| 446 | return up ? up->get() : nullptr; | 
|---|
| 447 | } | 
|---|
| 448 | raw_dfa *rdfa() { | 
|---|
| 449 | auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto); | 
|---|
| 450 | return up ? up->get() : nullptr; | 
|---|
| 451 | } | 
|---|
| 452 | raw_som_dfa *haig() { | 
|---|
| 453 | auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto); | 
|---|
| 454 | return up ? up->get() : nullptr; | 
|---|
| 455 | } | 
|---|
| 456 | MpvProto *mpv() { | 
|---|
| 457 | return boost::get<MpvProto>(&proto); | 
|---|
| 458 | } | 
|---|
| 459 |  | 
|---|
| 460 | // Convenience const accessor functions. | 
|---|
| 461 |  | 
|---|
| 462 | const NGHolder *holder() const { | 
|---|
| 463 | auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto); | 
|---|
| 464 | return up ? up->get() : nullptr; | 
|---|
| 465 | } | 
|---|
| 466 | const raw_dfa *rdfa() const { | 
|---|
| 467 | auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto); | 
|---|
| 468 | return up ? up->get() : nullptr; | 
|---|
| 469 | } | 
|---|
| 470 | const raw_som_dfa *haig() const { | 
|---|
| 471 | auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto); | 
|---|
| 472 | return up ? up->get() : nullptr; | 
|---|
| 473 | } | 
|---|
| 474 | const MpvProto *mpv() const { | 
|---|
| 475 | return boost::get<MpvProto>(&proto); | 
|---|
| 476 | } | 
|---|
| 477 |  | 
|---|
| 478 | /** | 
|---|
| 479 | * \brief Variant wrapping the various engine types. If this is | 
|---|
| 480 | * boost::blank, it means that this outfix is unused (dead). | 
|---|
| 481 | */ | 
|---|
| 482 | boost::variant< | 
|---|
| 483 | boost::blank, | 
|---|
| 484 | std::unique_ptr<NGHolder>, | 
|---|
| 485 | std::unique_ptr<raw_dfa>, | 
|---|
| 486 | std::unique_ptr<raw_som_dfa>, | 
|---|
| 487 | MpvProto> proto = boost::blank(); | 
|---|
| 488 |  | 
|---|
| 489 | RevAccInfo rev_info; | 
|---|
| 490 | u32 maxBAWidth = 0; //!< max bi-anchored width | 
|---|
| 491 | depth minWidth{depth::infinity()}; | 
|---|
| 492 | depth maxWidth{0}; | 
|---|
| 493 | u64a maxOffset = 0; | 
|---|
| 494 | bool in_sbmatcher = false; //!< handled by small-block matcher. | 
|---|
| 495 |  | 
|---|
| 496 | private: | 
|---|
| 497 | u32 queue = ~0U; | 
|---|
| 498 | }; | 
|---|
| 499 |  | 
|---|
| 500 | std::set<ReportID> all_reports(const OutfixInfo &outfix); | 
|---|
| 501 |  | 
|---|
| 502 | // Concrete impl class | 
|---|
| 503 | class RoseBuildImpl : public RoseBuild { | 
|---|
| 504 | public: | 
|---|
| 505 | RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, SmallWriteBuild &smwr, | 
|---|
| 506 | const CompileContext &cc, const BoundaryReports &boundary); | 
|---|
| 507 |  | 
|---|
| 508 | ~RoseBuildImpl() override; | 
|---|
| 509 |  | 
|---|
| 510 | // Adds a single literal. | 
|---|
| 511 | void add(bool anchored, bool eod, const ue2_literal &lit, | 
|---|
| 512 | const flat_set<ReportID> &ids) override; | 
|---|
| 513 |  | 
|---|
| 514 | bool addRose(const RoseInGraph &ig, bool prefilter) override; | 
|---|
| 515 | bool addSombeRose(const RoseInGraph &ig) override; | 
|---|
| 516 |  | 
|---|
| 517 | bool addOutfix(const NGHolder &h) override; | 
|---|
| 518 | bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override; | 
|---|
| 519 | bool addOutfix(const raw_puff &rp) override; | 
|---|
| 520 |  | 
|---|
| 521 | bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override; | 
|---|
| 522 |  | 
|---|
| 523 | // Returns true if we were able to add it as a mask | 
|---|
| 524 | bool add(bool anchored, const std::vector<CharReach> &mask, | 
|---|
| 525 | const flat_set<ReportID> &reports) override; | 
|---|
| 526 |  | 
|---|
| 527 | bool addAnchoredAcyclic(const NGHolder &graph) override; | 
|---|
| 528 |  | 
|---|
| 529 | bool validateMask(const std::vector<CharReach> &mask, | 
|---|
| 530 | const flat_set<ReportID> &reports, bool anchored, | 
|---|
| 531 | bool eod) const override; | 
|---|
| 532 | void addMask(const std::vector<CharReach> &mask, | 
|---|
| 533 | const flat_set<ReportID> &reports, bool anchored, | 
|---|
| 534 | bool eod) override; | 
|---|
| 535 |  | 
|---|
| 536 | // Construct a runtime implementation. | 
|---|
| 537 | bytecode_ptr<RoseEngine> buildRose(u32 minWidth) override; | 
|---|
| 538 | bytecode_ptr<RoseEngine> buildFinalEngine(u32 minWidth); | 
|---|
| 539 |  | 
|---|
| 540 | void setSom() override { hasSom = true; } | 
|---|
| 541 |  | 
|---|
| 542 | std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override; | 
|---|
| 543 |  | 
|---|
| 544 | // Find the maximum bound on the edges to this vertex's successors. | 
|---|
| 545 | u32 calcSuccMaxBound(RoseVertex u) const; | 
|---|
| 546 |  | 
|---|
| 547 | /* Returns the ID of the given literal in the literal map, adding it if | 
|---|
| 548 | * necessary. */ | 
|---|
| 549 | u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table); | 
|---|
| 550 |  | 
|---|
| 551 | // Variant with msk/cmp. | 
|---|
| 552 | u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk, | 
|---|
| 553 | const std::vector<u8> &cmp, u32 delay, | 
|---|
| 554 | rose_literal_table table); | 
|---|
| 555 |  | 
|---|
| 556 | u32 getNewLiteralId(void); | 
|---|
| 557 |  | 
|---|
| 558 | void removeVertices(const std::vector<RoseVertex> &dead); | 
|---|
| 559 |  | 
|---|
| 560 | // Is the Rose anchored? | 
|---|
| 561 | bool hasNoFloatingRoots() const; | 
|---|
| 562 |  | 
|---|
| 563 | u32 calcHistoryRequired() const; | 
|---|
| 564 |  | 
|---|
| 565 | rose_group getInitialGroups() const; | 
|---|
| 566 | rose_group getSuccGroups(RoseVertex start) const; | 
|---|
| 567 | rose_group getGroups(RoseVertex v) const; | 
|---|
| 568 |  | 
|---|
| 569 | bool hasDelayedLiteral(RoseVertex v) const; | 
|---|
| 570 | bool hasDelayPred(RoseVertex v) const; | 
|---|
| 571 | bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const; | 
|---|
| 572 | bool hasAnchoredTablePred(RoseVertex v) const; | 
|---|
| 573 |  | 
|---|
| 574 | // Is the given vertex a successor of either root or anchored_root? | 
|---|
| 575 | bool isRootSuccessor(const RoseVertex &v) const; | 
|---|
| 576 | /* Is the given vertex a successor of something other than root or | 
|---|
| 577 | * anchored_root? */ | 
|---|
| 578 | bool isNonRootSuccessor(const RoseVertex &v) const; | 
|---|
| 579 |  | 
|---|
| 580 | bool isDirectReport(u32 id) const; | 
|---|
| 581 | bool isDelayed(u32 id) const; | 
|---|
| 582 |  | 
|---|
| 583 | bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored | 
|---|
| 584 | * table */ | 
|---|
| 585 | bool isFloating(RoseVertex v) const; /* true iff has literal in floating | 
|---|
| 586 | * table */ | 
|---|
| 587 | bool isInETable(RoseVertex v) const; /* true iff has literal in eod | 
|---|
| 588 | * table */ | 
|---|
| 589 |  | 
|---|
| 590 | size_t maxLiteralLen(RoseVertex v) const; | 
|---|
| 591 | size_t minLiteralLen(RoseVertex v) const; | 
|---|
| 592 |  | 
|---|
| 593 | // max overlap considered for every pair (ulit, vlit). | 
|---|
| 594 | size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const; | 
|---|
| 595 |  | 
|---|
| 596 | bool isPseudoStar(const RoseEdge &e) const; | 
|---|
| 597 | bool isPseudoStarOrFirstOnly(const RoseEdge &e) const; | 
|---|
| 598 | bool hasOnlyPseudoStarInEdges(RoseVertex v) const; | 
|---|
| 599 |  | 
|---|
| 600 | bool isAnyStart(const RoseVertex &v) const { | 
|---|
| 601 | return v == root || v == anchored_root; | 
|---|
| 602 | } | 
|---|
| 603 |  | 
|---|
| 604 | bool isVirtualVertex(const RoseVertex &v) const { | 
|---|
| 605 | return g[v].eod_accept || isAnyStart(v); | 
|---|
| 606 | } | 
|---|
| 607 |  | 
|---|
| 608 | void handleMixedSensitivity(void); | 
|---|
| 609 |  | 
|---|
| 610 | void findTransientLeftfixes(void); | 
|---|
| 611 |  | 
|---|
| 612 | const CompileContext &cc; | 
|---|
| 613 | RoseGraph g; | 
|---|
| 614 | const RoseVertex root; | 
|---|
| 615 | const RoseVertex anchored_root; | 
|---|
| 616 | RoseLiteralMap literals; | 
|---|
| 617 | std::map<RoseVertex, RoseVertex> ghost; | 
|---|
| 618 | ReportID getNewNfaReport() override { | 
|---|
| 619 | return next_nfa_report++; | 
|---|
| 620 | } | 
|---|
| 621 | std::deque<rose_literal_info> literal_info; | 
|---|
| 622 | bool hasSom; //!< at least one pattern requires SOM. | 
|---|
| 623 | std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas; | 
|---|
| 624 | std::map<simple_anchored_info, std::set<u32>> anchored_simple; | 
|---|
| 625 | std::map<u32, std::set<u32> > group_to_literal; | 
|---|
| 626 | u32 group_end; | 
|---|
| 627 |  | 
|---|
| 628 | u32 ematcher_region_size; /**< number of bytes the eod table runs over */ | 
|---|
| 629 |  | 
|---|
| 630 | /** \brief Mapping from anchored literal ID to the original literal suffix | 
|---|
| 631 | * present when the literal was added to the literal matcher. Used for | 
|---|
| 632 | * overlap calculation in history assignment. */ | 
|---|
| 633 | std::map<u32, rose_literal_id> anchoredLitSuffix; | 
|---|
| 634 |  | 
|---|
| 635 | ue2_unordered_set<left_id> transient; | 
|---|
| 636 | ue2_unordered_map<left_id, rose_group> rose_squash_masks; | 
|---|
| 637 |  | 
|---|
| 638 | std::vector<OutfixInfo> outfixes; | 
|---|
| 639 |  | 
|---|
| 640 | /** \brief MPV outfix entry. Null if not used, and moved into the outfixes | 
|---|
| 641 | * list before we start building the bytecode (at which point it is set to | 
|---|
| 642 | * null again). */ | 
|---|
| 643 | std::unique_ptr<OutfixInfo> mpv_outfix = nullptr; | 
|---|
| 644 |  | 
|---|
| 645 | u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX. | 
|---|
| 646 |  | 
|---|
| 647 | u32 max_rose_anchored_floating_overlap; | 
|---|
| 648 |  | 
|---|
| 649 | rose_group boundary_group_mask = 0; | 
|---|
| 650 |  | 
|---|
| 651 | QueueIndexFactory qif; | 
|---|
| 652 | ReportManager &rm; | 
|---|
| 653 | SomSlotManager &ssm; | 
|---|
| 654 | SmallWriteBuild &smwr; | 
|---|
| 655 | const BoundaryReports &boundary; | 
|---|
| 656 |  | 
|---|
| 657 | private: | 
|---|
| 658 | ReportID next_nfa_report; | 
|---|
| 659 | }; | 
|---|
| 660 |  | 
|---|
| 661 | size_t calcLongLitThreshold(const RoseBuildImpl &build, | 
|---|
| 662 | const size_t historyRequired); | 
|---|
| 663 |  | 
|---|
| 664 | // Free functions, in rose_build_misc.cpp | 
|---|
| 665 |  | 
|---|
| 666 | bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v); | 
|---|
| 667 | bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v); | 
|---|
| 668 |  | 
|---|
| 669 | size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b); | 
|---|
| 670 | ue2_literal findNonOverlappingTail(const std::set<ue2_literal> &lits, | 
|---|
| 671 | const ue2_literal &s); | 
|---|
| 672 |  | 
|---|
| 673 | #ifndef NDEBUG | 
|---|
| 674 | bool roseHasTops(const RoseBuildImpl &build, RoseVertex v); | 
|---|
| 675 | bool hasOrphanedTops(const RoseBuildImpl &build); | 
|---|
| 676 | #endif | 
|---|
| 677 |  | 
|---|
| 678 | u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm); | 
|---|
| 679 |  | 
|---|
| 680 | // Function that operates on a msk/cmp pair and a literal, as used in | 
|---|
| 681 | // hwlmLiteral, and zeroes msk elements that don't add any power to the | 
|---|
| 682 | // literal. | 
|---|
| 683 | void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk, | 
|---|
| 684 | std::vector<u8> &cmp); | 
|---|
| 685 |  | 
|---|
| 686 | u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id); | 
|---|
| 687 | u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id); | 
|---|
| 688 |  | 
|---|
| 689 | bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e); | 
|---|
| 690 |  | 
|---|
| 691 | #ifndef NDEBUG | 
|---|
| 692 | bool canImplementGraphs(const RoseBuildImpl &tbi); | 
|---|
| 693 | #endif | 
|---|
| 694 |  | 
|---|
| 695 | } // namespace ue2 | 
|---|
| 696 |  | 
|---|
| 697 | namespace std { | 
|---|
| 698 |  | 
|---|
| 699 | template<> | 
|---|
| 700 | struct hash<ue2::left_id> { | 
|---|
| 701 | size_t operator()(const ue2::left_id &l) const { | 
|---|
| 702 | return l.hash(); | 
|---|
| 703 | } | 
|---|
| 704 | }; | 
|---|
| 705 |  | 
|---|
| 706 | template<> | 
|---|
| 707 | struct hash<ue2::suffix_id> { | 
|---|
| 708 | size_t operator()(const ue2::suffix_id &s) const { | 
|---|
| 709 | return s.hash(); | 
|---|
| 710 | } | 
|---|
| 711 | }; | 
|---|
| 712 |  | 
|---|
| 713 | } // namespace std | 
|---|
| 714 |  | 
|---|
| 715 | #endif /* ROSE_BUILD_IMPL_H */ | 
|---|
| 716 |  | 
|---|