| 1 | /* |
| 2 | * Copyright (c) 2015-2019, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #ifndef ROSE_BUILD_IMPL_H |
| 30 | #define ROSE_BUILD_IMPL_H |
| 31 | |
| 32 | #include "rose_build.h" |
| 33 | #include "rose_build_util.h" |
| 34 | #include "rose_common.h" |
| 35 | #include "rose_graph.h" |
| 36 | #include "nfa/mpvcompile.h" |
| 37 | #include "nfa/goughcompile.h" |
| 38 | #include "nfa/nfa_internal.h" |
| 39 | #include "nfagraph/ng_holder.h" |
| 40 | #include "nfagraph/ng_revacc.h" |
| 41 | #include "util/bytecode_ptr.h" |
| 42 | #include "util/flat_containers.h" |
| 43 | #include "util/hash.h" |
| 44 | #include "util/order_check.h" |
| 45 | #include "util/queue_index_factory.h" |
| 46 | #include "util/ue2string.h" |
| 47 | #include "util/unordered.h" |
| 48 | #include "util/verify_types.h" |
| 49 | |
| 50 | #include <deque> |
| 51 | #include <map> |
| 52 | #include <string> |
| 53 | #include <vector> |
| 54 | #include <boost/variant.hpp> |
| 55 | |
| 56 | struct RoseEngine; |
| 57 | |
| 58 | namespace ue2 { |
| 59 | |
| 60 | #define ROSE_GROUPS_MAX 64 |
| 61 | |
| 62 | #define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 |
| 63 | |
| 64 | /** |
| 65 | * \brief The largest allowable "short" literal fragment which can be given to |
| 66 | * a literal matcher directly. |
| 67 | * |
| 68 | * Literals longer than this will be truncated to their suffix and confirmed in |
| 69 | * the Rose interpreter, either as "medium length" literals which can be |
| 70 | * confirmed from history, or "long literals" which make use of the streaming |
| 71 | * table support. |
| 72 | */ |
| 73 | #define ROSE_SHORT_LITERAL_LEN_MAX 8 |
| 74 | |
| 75 | struct BoundaryReports; |
| 76 | struct CastleProto; |
| 77 | struct CompileContext; |
| 78 | class ReportManager; |
| 79 | class SmallWriteBuild; |
| 80 | class SomSlotManager; |
| 81 | |
| 82 | struct suffix_id { |
| 83 | suffix_id(const RoseSuffixInfo &in) |
| 84 | : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()), |
| 85 | h(in.haig.get()), t(in.tamarama.get()), |
| 86 | dfa_min_width(in.dfa_min_width), |
| 87 | dfa_max_width(in.dfa_max_width) { |
| 88 | assert(!g || g->kind == NFA_SUFFIX); |
| 89 | } |
| 90 | bool operator==(const suffix_id &b) const { |
| 91 | bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t; |
| 92 | assert(!rv || dfa_min_width == b.dfa_min_width); |
| 93 | assert(!rv || dfa_max_width == b.dfa_max_width); |
| 94 | return rv; |
| 95 | } |
| 96 | bool operator!=(const suffix_id &b) const { return !(*this == b); } |
| 97 | bool operator<(const suffix_id &b) const { |
| 98 | const suffix_id &a = *this; |
| 99 | ORDER_CHECK(g); |
| 100 | ORDER_CHECK(c); |
| 101 | ORDER_CHECK(d); |
| 102 | ORDER_CHECK(h); |
| 103 | ORDER_CHECK(t); |
| 104 | return false; |
| 105 | } |
| 106 | |
| 107 | NGHolder *graph() { |
| 108 | if (!d && !h) { |
| 109 | assert(dfa_min_width == depth(0)); |
| 110 | assert(dfa_max_width == depth::infinity()); |
| 111 | } |
| 112 | return g; |
| 113 | } |
| 114 | const NGHolder *graph() const { |
| 115 | if (!d && !h) { |
| 116 | assert(dfa_min_width == depth(0)); |
| 117 | assert(dfa_max_width == depth::infinity()); |
| 118 | } |
| 119 | return g; |
| 120 | } |
| 121 | CastleProto *castle() { |
| 122 | if (!d && !h) { |
| 123 | assert(dfa_min_width == depth(0)); |
| 124 | assert(dfa_max_width == depth::infinity()); |
| 125 | } |
| 126 | return c; |
| 127 | } |
| 128 | const CastleProto *castle() const { |
| 129 | if (!d && !h) { |
| 130 | assert(dfa_min_width == depth(0)); |
| 131 | assert(dfa_max_width == depth::infinity()); |
| 132 | } |
| 133 | return c; |
| 134 | } |
| 135 | TamaProto *tamarama() { |
| 136 | if (!d && !h) { |
| 137 | assert(dfa_min_width == depth(0)); |
| 138 | assert(dfa_max_width == depth::infinity()); |
| 139 | } |
| 140 | return t; |
| 141 | } |
| 142 | const TamaProto *tamarama() const { |
| 143 | if (!d && !h) { |
| 144 | assert(dfa_min_width == depth(0)); |
| 145 | assert(dfa_max_width == depth::infinity()); |
| 146 | } |
| 147 | return t; |
| 148 | } |
| 149 | |
| 150 | |
| 151 | raw_som_dfa *haig() { return h; } |
| 152 | const raw_som_dfa *haig() const { return h; } |
| 153 | raw_dfa *dfa() { return d; } |
| 154 | const raw_dfa *dfa() const { return d; } |
| 155 | |
| 156 | size_t hash() const; |
| 157 | |
| 158 | private: |
| 159 | NGHolder *g; |
| 160 | CastleProto *c; |
| 161 | raw_dfa *d; |
| 162 | raw_som_dfa *h; |
| 163 | TamaProto *t; |
| 164 | depth dfa_min_width; |
| 165 | depth dfa_max_width; |
| 166 | |
| 167 | friend depth findMinWidth(const suffix_id &s); |
| 168 | friend depth findMaxWidth(const suffix_id &s); |
| 169 | friend depth findMinWidth(const suffix_id &s, u32 top); |
| 170 | friend depth findMaxWidth(const suffix_id &s, u32 top); |
| 171 | }; |
| 172 | |
| 173 | std::set<ReportID> all_reports(const suffix_id &s); |
| 174 | std::set<u32> all_tops(const suffix_id &s); |
| 175 | bool has_eod_accepts(const suffix_id &s); |
| 176 | bool has_non_eod_accepts(const suffix_id &s); |
| 177 | depth findMinWidth(const suffix_id &s); |
| 178 | depth findMaxWidth(const suffix_id &s); |
| 179 | depth findMinWidth(const suffix_id &s, u32 top); |
| 180 | depth findMaxWidth(const suffix_id &s, u32 top); |
| 181 | |
| 182 | /** \brief represents an engine to the left of a rose role */ |
| 183 | struct left_id { |
| 184 | left_id(const LeftEngInfo &in) |
| 185 | : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()), |
| 186 | h(in.haig.get()), dfa_min_width(in.dfa_min_width), |
| 187 | dfa_max_width(in.dfa_max_width) { |
| 188 | assert(!g || !has_managed_reports(*g)); |
| 189 | } |
| 190 | bool operator==(const left_id &b) const { |
| 191 | bool rv = g == b.g && c == b.c && h == b.h && d == b.d; |
| 192 | assert(!rv || dfa_min_width == b.dfa_min_width); |
| 193 | assert(!rv || dfa_max_width == b.dfa_max_width); |
| 194 | return rv; |
| 195 | } |
| 196 | bool operator!=(const left_id &b) const { return !(*this == b); } |
| 197 | bool operator<(const left_id &b) const { |
| 198 | const left_id &a = *this; |
| 199 | ORDER_CHECK(g); |
| 200 | ORDER_CHECK(c); |
| 201 | ORDER_CHECK(d); |
| 202 | ORDER_CHECK(h); |
| 203 | return false; |
| 204 | } |
| 205 | |
| 206 | NGHolder *graph() { |
| 207 | if (!d && !h) { |
| 208 | assert(dfa_min_width == depth(0)); |
| 209 | assert(dfa_max_width == depth::infinity()); |
| 210 | } |
| 211 | return g; |
| 212 | } |
| 213 | const NGHolder *graph() const { |
| 214 | if (!d && !h) { |
| 215 | assert(dfa_min_width == depth(0)); |
| 216 | assert(dfa_max_width == depth::infinity()); |
| 217 | } |
| 218 | return g; |
| 219 | } |
| 220 | CastleProto *castle() { |
| 221 | if (!d && !h) { |
| 222 | assert(dfa_min_width == depth(0)); |
| 223 | assert(dfa_max_width == depth::infinity()); |
| 224 | } |
| 225 | |
| 226 | return c; |
| 227 | } |
| 228 | const CastleProto *castle() const { |
| 229 | if (!d && !h) { |
| 230 | assert(dfa_min_width == depth(0)); |
| 231 | assert(dfa_max_width == depth::infinity()); |
| 232 | } |
| 233 | |
| 234 | return c; |
| 235 | } |
| 236 | raw_som_dfa *haig() { return h; } |
| 237 | const raw_som_dfa *haig() const { return h; } |
| 238 | raw_dfa *dfa() { return d; } |
| 239 | const raw_dfa *dfa() const { return d; } |
| 240 | |
| 241 | size_t hash() const; |
| 242 | |
| 243 | private: |
| 244 | NGHolder *g; |
| 245 | CastleProto *c; |
| 246 | raw_dfa *d; |
| 247 | raw_som_dfa *h; |
| 248 | depth dfa_min_width; |
| 249 | depth dfa_max_width; |
| 250 | |
| 251 | friend bool isAnchored(const left_id &r); |
| 252 | friend depth findMinWidth(const left_id &r); |
| 253 | friend depth findMaxWidth(const left_id &r); |
| 254 | }; |
| 255 | |
| 256 | std::set<u32> all_tops(const left_id &r); |
| 257 | std::set<ReportID> all_reports(const left_id &left); |
| 258 | bool isAnchored(const left_id &r); |
| 259 | depth findMinWidth(const left_id &r); |
| 260 | depth findMaxWidth(const left_id &r); |
| 261 | u32 num_tops(const left_id &r); |
| 262 | |
| 263 | struct rose_literal_info { |
| 264 | flat_set<u32> delayed_ids; |
| 265 | flat_set<RoseVertex> vertices; |
| 266 | rose_group group_mask = 0; |
| 267 | u32 undelayed_id = MO_INVALID_IDX; |
| 268 | bool squash_group = false; |
| 269 | bool requires_benefits = false; |
| 270 | }; |
| 271 | |
| 272 | /** |
| 273 | * \brief Main literal struct used at Rose build time. Numeric literal IDs |
| 274 | * used at build time point at these (via the RoseBuildImpl::literals map). |
| 275 | */ |
| 276 | struct rose_literal_id { |
| 277 | rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in, |
| 278 | u32 delay_in) |
| 279 | : s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {} |
| 280 | |
| 281 | rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in, |
| 282 | const std::vector<u8> &cmp_in, rose_literal_table table_in, |
| 283 | u32 delay_in); |
| 284 | |
| 285 | ue2_literal s; |
| 286 | std::vector<u8> msk; |
| 287 | std::vector<u8> cmp; |
| 288 | rose_literal_table table; |
| 289 | u32 delay; |
| 290 | u32 distinctiveness; |
| 291 | |
| 292 | size_t elength(void) const { return s.length() + delay; } |
| 293 | size_t elength_including_mask(void) const { |
| 294 | size_t mask_len = msk.size(); |
| 295 | for (u8 c : msk) { |
| 296 | if (!c) { |
| 297 | mask_len--; |
| 298 | } else { |
| 299 | break; |
| 300 | } |
| 301 | } |
| 302 | return MAX(mask_len, s.length()) + delay; |
| 303 | } |
| 304 | |
| 305 | bool operator==(const rose_literal_id &b) const { |
| 306 | return s == b.s && msk == b.msk && cmp == b.cmp && table == b.table && |
| 307 | delay == b.delay && distinctiveness == b.distinctiveness; |
| 308 | } |
| 309 | |
| 310 | size_t hash() const { |
| 311 | return hash_all(s, msk, cmp, table, delay, distinctiveness); |
| 312 | } |
| 313 | }; |
| 314 | |
| 315 | static inline |
| 316 | bool operator<(const rose_literal_id &a, const rose_literal_id &b) { |
| 317 | ORDER_CHECK(distinctiveness); |
| 318 | ORDER_CHECK(table); |
| 319 | ORDER_CHECK(s); |
| 320 | ORDER_CHECK(delay); |
| 321 | ORDER_CHECK(msk); |
| 322 | ORDER_CHECK(cmp); |
| 323 | return 0; |
| 324 | } |
| 325 | |
| 326 | class RoseLiteralMap { |
| 327 | /** |
| 328 | * \brief Main storage for literals. |
| 329 | * |
| 330 | * Note that this cannot be a vector, as the present code relies on |
| 331 | * iterator stability when iterating over this list and adding to it inside |
| 332 | * the loop. |
| 333 | */ |
| 334 | std::deque<rose_literal_id> lits; |
| 335 | |
| 336 | /** \brief Quick-lookup index from literal -> index in lits. */ |
| 337 | ue2_unordered_map<rose_literal_id, u32> lits_index; |
| 338 | |
| 339 | public: |
| 340 | std::pair<u32, bool> insert(const rose_literal_id &lit) { |
| 341 | auto it = lits_index.find(lit); |
| 342 | if (it != lits_index.end()) { |
| 343 | u32 idx = it->second; |
| 344 | auto &l = lits.at(idx); |
| 345 | if (!lit.s.get_pure() && l.s.get_pure()) { |
| 346 | lits_index.erase(l); |
| 347 | l.s.unset_pure(); |
| 348 | lits_index.emplace(l, idx); |
| 349 | } |
| 350 | return {idx, false}; |
| 351 | } |
| 352 | u32 id = verify_u32(lits.size()); |
| 353 | lits.push_back(lit); |
| 354 | lits_index.emplace(lit, id); |
| 355 | return {id, true}; |
| 356 | } |
| 357 | |
| 358 | // Erase the last num elements. |
| 359 | void erase_back(size_t num) { |
| 360 | assert(num <= lits.size()); |
| 361 | for (size_t i = 0; i < num; i++) { |
| 362 | lits_index.erase(lits.back()); |
| 363 | lits.pop_back(); |
| 364 | } |
| 365 | assert(lits.size() == lits_index.size()); |
| 366 | } |
| 367 | |
| 368 | const rose_literal_id &at(u32 id) const { |
| 369 | assert(id < lits.size()); |
| 370 | return lits.at(id); |
| 371 | } |
| 372 | |
| 373 | using const_iterator = decltype(lits)::const_iterator; |
| 374 | const_iterator begin() const { return lits.begin(); } |
| 375 | const_iterator end() const { return lits.end(); } |
| 376 | |
| 377 | size_t size() const { |
| 378 | return lits.size(); |
| 379 | } |
| 380 | }; |
| 381 | |
| 382 | struct simple_anchored_info { |
| 383 | simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit) |
| 384 | : min_bound(min_b), max_bound(max_b), literal(lit) {} |
| 385 | u32 min_bound; /**< min number of characters required before literal can |
| 386 | * start matching */ |
| 387 | u32 max_bound; /**< max number of characters allowed before literal can |
| 388 | * start matching */ |
| 389 | ue2_literal literal; |
| 390 | }; |
| 391 | |
| 392 | static really_inline |
| 393 | bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) { |
| 394 | ORDER_CHECK(min_bound); |
| 395 | ORDER_CHECK(max_bound); |
| 396 | ORDER_CHECK(literal); |
| 397 | return 0; |
| 398 | } |
| 399 | |
| 400 | struct MpvProto { |
| 401 | bool empty() const { |
| 402 | return puffettes.empty() && triggered_puffettes.empty(); |
| 403 | } |
| 404 | void reset() { |
| 405 | puffettes.clear(); |
| 406 | triggered_puffettes.clear(); |
| 407 | } |
| 408 | std::vector<raw_puff> puffettes; |
| 409 | std::vector<raw_puff> triggered_puffettes; |
| 410 | }; |
| 411 | |
| 412 | struct OutfixInfo { |
| 413 | template<class T> |
| 414 | explicit OutfixInfo(std::unique_ptr<T> x) : proto(std::move(x)) {} |
| 415 | |
| 416 | explicit OutfixInfo(MpvProto mpv_in) : proto(std::move(mpv_in)) {} |
| 417 | |
| 418 | u32 get_queue(QueueIndexFactory &qif); |
| 419 | |
| 420 | u32 get_queue() const { |
| 421 | assert(queue != ~0U); |
| 422 | return queue; |
| 423 | } |
| 424 | |
| 425 | bool is_nonempty_mpv() const { |
| 426 | auto *m = boost::get<MpvProto>(&proto); |
| 427 | return m && !m->empty(); |
| 428 | } |
| 429 | |
| 430 | bool is_dead() const { |
| 431 | auto *m = boost::get<MpvProto>(&proto); |
| 432 | if (m) { |
| 433 | return m->empty(); |
| 434 | } |
| 435 | return boost::get<boost::blank>(&proto) != nullptr; |
| 436 | } |
| 437 | |
| 438 | void clear() { |
| 439 | proto = boost::blank(); |
| 440 | } |
| 441 | |
| 442 | // Convenience accessor functions. |
| 443 | |
| 444 | NGHolder *holder() { |
| 445 | auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto); |
| 446 | return up ? up->get() : nullptr; |
| 447 | } |
| 448 | raw_dfa *rdfa() { |
| 449 | auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto); |
| 450 | return up ? up->get() : nullptr; |
| 451 | } |
| 452 | raw_som_dfa *haig() { |
| 453 | auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto); |
| 454 | return up ? up->get() : nullptr; |
| 455 | } |
| 456 | MpvProto *mpv() { |
| 457 | return boost::get<MpvProto>(&proto); |
| 458 | } |
| 459 | |
| 460 | // Convenience const accessor functions. |
| 461 | |
| 462 | const NGHolder *holder() const { |
| 463 | auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto); |
| 464 | return up ? up->get() : nullptr; |
| 465 | } |
| 466 | const raw_dfa *rdfa() const { |
| 467 | auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto); |
| 468 | return up ? up->get() : nullptr; |
| 469 | } |
| 470 | const raw_som_dfa *haig() const { |
| 471 | auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto); |
| 472 | return up ? up->get() : nullptr; |
| 473 | } |
| 474 | const MpvProto *mpv() const { |
| 475 | return boost::get<MpvProto>(&proto); |
| 476 | } |
| 477 | |
| 478 | /** |
| 479 | * \brief Variant wrapping the various engine types. If this is |
| 480 | * boost::blank, it means that this outfix is unused (dead). |
| 481 | */ |
| 482 | boost::variant< |
| 483 | boost::blank, |
| 484 | std::unique_ptr<NGHolder>, |
| 485 | std::unique_ptr<raw_dfa>, |
| 486 | std::unique_ptr<raw_som_dfa>, |
| 487 | MpvProto> proto = boost::blank(); |
| 488 | |
| 489 | RevAccInfo rev_info; |
| 490 | u32 maxBAWidth = 0; //!< max bi-anchored width |
| 491 | depth minWidth{depth::infinity()}; |
| 492 | depth maxWidth{0}; |
| 493 | u64a maxOffset = 0; |
| 494 | bool in_sbmatcher = false; //!< handled by small-block matcher. |
| 495 | |
| 496 | private: |
| 497 | u32 queue = ~0U; |
| 498 | }; |
| 499 | |
| 500 | std::set<ReportID> all_reports(const OutfixInfo &outfix); |
| 501 | |
| 502 | // Concrete impl class |
| 503 | class RoseBuildImpl : public RoseBuild { |
| 504 | public: |
| 505 | RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, SmallWriteBuild &smwr, |
| 506 | const CompileContext &cc, const BoundaryReports &boundary); |
| 507 | |
| 508 | ~RoseBuildImpl() override; |
| 509 | |
| 510 | // Adds a single literal. |
| 511 | void add(bool anchored, bool eod, const ue2_literal &lit, |
| 512 | const flat_set<ReportID> &ids) override; |
| 513 | |
| 514 | bool addRose(const RoseInGraph &ig, bool prefilter) override; |
| 515 | bool addSombeRose(const RoseInGraph &ig) override; |
| 516 | |
| 517 | bool addOutfix(const NGHolder &h) override; |
| 518 | bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override; |
| 519 | bool addOutfix(const raw_puff &rp) override; |
| 520 | |
| 521 | bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override; |
| 522 | |
| 523 | // Returns true if we were able to add it as a mask |
| 524 | bool add(bool anchored, const std::vector<CharReach> &mask, |
| 525 | const flat_set<ReportID> &reports) override; |
| 526 | |
| 527 | bool addAnchoredAcyclic(const NGHolder &graph) override; |
| 528 | |
| 529 | bool validateMask(const std::vector<CharReach> &mask, |
| 530 | const flat_set<ReportID> &reports, bool anchored, |
| 531 | bool eod) const override; |
| 532 | void addMask(const std::vector<CharReach> &mask, |
| 533 | const flat_set<ReportID> &reports, bool anchored, |
| 534 | bool eod) override; |
| 535 | |
| 536 | // Construct a runtime implementation. |
| 537 | bytecode_ptr<RoseEngine> buildRose(u32 minWidth) override; |
| 538 | bytecode_ptr<RoseEngine> buildFinalEngine(u32 minWidth); |
| 539 | |
| 540 | void setSom() override { hasSom = true; } |
| 541 | |
| 542 | std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override; |
| 543 | |
| 544 | // Find the maximum bound on the edges to this vertex's successors. |
| 545 | u32 calcSuccMaxBound(RoseVertex u) const; |
| 546 | |
| 547 | /* Returns the ID of the given literal in the literal map, adding it if |
| 548 | * necessary. */ |
| 549 | u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table); |
| 550 | |
| 551 | // Variant with msk/cmp. |
| 552 | u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk, |
| 553 | const std::vector<u8> &cmp, u32 delay, |
| 554 | rose_literal_table table); |
| 555 | |
| 556 | u32 getNewLiteralId(void); |
| 557 | |
| 558 | void removeVertices(const std::vector<RoseVertex> &dead); |
| 559 | |
| 560 | // Is the Rose anchored? |
| 561 | bool hasNoFloatingRoots() const; |
| 562 | |
| 563 | u32 calcHistoryRequired() const; |
| 564 | |
| 565 | rose_group getInitialGroups() const; |
| 566 | rose_group getSuccGroups(RoseVertex start) const; |
| 567 | rose_group getGroups(RoseVertex v) const; |
| 568 | |
| 569 | bool hasDelayedLiteral(RoseVertex v) const; |
| 570 | bool hasDelayPred(RoseVertex v) const; |
| 571 | bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const; |
| 572 | bool hasAnchoredTablePred(RoseVertex v) const; |
| 573 | |
| 574 | // Is the given vertex a successor of either root or anchored_root? |
| 575 | bool isRootSuccessor(const RoseVertex &v) const; |
| 576 | /* Is the given vertex a successor of something other than root or |
| 577 | * anchored_root? */ |
| 578 | bool isNonRootSuccessor(const RoseVertex &v) const; |
| 579 | |
| 580 | bool isDirectReport(u32 id) const; |
| 581 | bool isDelayed(u32 id) const; |
| 582 | |
| 583 | bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored |
| 584 | * table */ |
| 585 | bool isFloating(RoseVertex v) const; /* true iff has literal in floating |
| 586 | * table */ |
| 587 | bool isInETable(RoseVertex v) const; /* true iff has literal in eod |
| 588 | * table */ |
| 589 | |
| 590 | size_t maxLiteralLen(RoseVertex v) const; |
| 591 | size_t minLiteralLen(RoseVertex v) const; |
| 592 | |
| 593 | // max overlap considered for every pair (ulit, vlit). |
| 594 | size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const; |
| 595 | |
| 596 | bool isPseudoStar(const RoseEdge &e) const; |
| 597 | bool isPseudoStarOrFirstOnly(const RoseEdge &e) const; |
| 598 | bool hasOnlyPseudoStarInEdges(RoseVertex v) const; |
| 599 | |
| 600 | bool isAnyStart(const RoseVertex &v) const { |
| 601 | return v == root || v == anchored_root; |
| 602 | } |
| 603 | |
| 604 | bool isVirtualVertex(const RoseVertex &v) const { |
| 605 | return g[v].eod_accept || isAnyStart(v); |
| 606 | } |
| 607 | |
| 608 | void handleMixedSensitivity(void); |
| 609 | |
| 610 | void findTransientLeftfixes(void); |
| 611 | |
| 612 | const CompileContext &cc; |
| 613 | RoseGraph g; |
| 614 | const RoseVertex root; |
| 615 | const RoseVertex anchored_root; |
| 616 | RoseLiteralMap literals; |
| 617 | std::map<RoseVertex, RoseVertex> ghost; |
| 618 | ReportID getNewNfaReport() override { |
| 619 | return next_nfa_report++; |
| 620 | } |
| 621 | std::deque<rose_literal_info> literal_info; |
| 622 | bool hasSom; //!< at least one pattern requires SOM. |
| 623 | std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas; |
| 624 | std::map<simple_anchored_info, std::set<u32>> anchored_simple; |
| 625 | std::map<u32, std::set<u32> > group_to_literal; |
| 626 | u32 group_end; |
| 627 | |
| 628 | u32 ematcher_region_size; /**< number of bytes the eod table runs over */ |
| 629 | |
| 630 | /** \brief Mapping from anchored literal ID to the original literal suffix |
| 631 | * present when the literal was added to the literal matcher. Used for |
| 632 | * overlap calculation in history assignment. */ |
| 633 | std::map<u32, rose_literal_id> anchoredLitSuffix; |
| 634 | |
| 635 | ue2_unordered_set<left_id> transient; |
| 636 | ue2_unordered_map<left_id, rose_group> rose_squash_masks; |
| 637 | |
| 638 | std::vector<OutfixInfo> outfixes; |
| 639 | |
| 640 | /** \brief MPV outfix entry. Null if not used, and moved into the outfixes |
| 641 | * list before we start building the bytecode (at which point it is set to |
| 642 | * null again). */ |
| 643 | std::unique_ptr<OutfixInfo> mpv_outfix = nullptr; |
| 644 | |
| 645 | u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX. |
| 646 | |
| 647 | u32 max_rose_anchored_floating_overlap; |
| 648 | |
| 649 | rose_group boundary_group_mask = 0; |
| 650 | |
| 651 | QueueIndexFactory qif; |
| 652 | ReportManager &rm; |
| 653 | SomSlotManager &ssm; |
| 654 | SmallWriteBuild &smwr; |
| 655 | const BoundaryReports &boundary; |
| 656 | |
| 657 | private: |
| 658 | ReportID next_nfa_report; |
| 659 | }; |
| 660 | |
| 661 | size_t calcLongLitThreshold(const RoseBuildImpl &build, |
| 662 | const size_t historyRequired); |
| 663 | |
| 664 | // Free functions, in rose_build_misc.cpp |
| 665 | |
| 666 | bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v); |
| 667 | bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v); |
| 668 | |
| 669 | size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b); |
| 670 | ue2_literal findNonOverlappingTail(const std::set<ue2_literal> &lits, |
| 671 | const ue2_literal &s); |
| 672 | |
| 673 | #ifndef NDEBUG |
| 674 | bool roseHasTops(const RoseBuildImpl &build, RoseVertex v); |
| 675 | bool hasOrphanedTops(const RoseBuildImpl &build); |
| 676 | #endif |
| 677 | |
| 678 | u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm); |
| 679 | |
| 680 | // Function that operates on a msk/cmp pair and a literal, as used in |
| 681 | // hwlmLiteral, and zeroes msk elements that don't add any power to the |
| 682 | // literal. |
| 683 | void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk, |
| 684 | std::vector<u8> &cmp); |
| 685 | |
| 686 | u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id); |
| 687 | u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id); |
| 688 | |
| 689 | bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e); |
| 690 | |
| 691 | #ifndef NDEBUG |
| 692 | bool canImplementGraphs(const RoseBuildImpl &tbi); |
| 693 | #endif |
| 694 | |
| 695 | } // namespace ue2 |
| 696 | |
| 697 | namespace std { |
| 698 | |
| 699 | template<> |
| 700 | struct hash<ue2::left_id> { |
| 701 | size_t operator()(const ue2::left_id &l) const { |
| 702 | return l.hash(); |
| 703 | } |
| 704 | }; |
| 705 | |
| 706 | template<> |
| 707 | struct hash<ue2::suffix_id> { |
| 708 | size_t operator()(const ue2::suffix_id &s) const { |
| 709 | return s.hash(); |
| 710 | } |
| 711 | }; |
| 712 | |
| 713 | } // namespace std |
| 714 | |
| 715 | #endif /* ROSE_BUILD_IMPL_H */ |
| 716 | |