1 | /* |
2 | * Copyright (c) 2015-2019, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #ifndef ROSE_BUILD_IMPL_H |
30 | #define ROSE_BUILD_IMPL_H |
31 | |
32 | #include "rose_build.h" |
33 | #include "rose_build_util.h" |
34 | #include "rose_common.h" |
35 | #include "rose_graph.h" |
36 | #include "nfa/mpvcompile.h" |
37 | #include "nfa/goughcompile.h" |
38 | #include "nfa/nfa_internal.h" |
39 | #include "nfagraph/ng_holder.h" |
40 | #include "nfagraph/ng_revacc.h" |
41 | #include "util/bytecode_ptr.h" |
42 | #include "util/flat_containers.h" |
43 | #include "util/hash.h" |
44 | #include "util/order_check.h" |
45 | #include "util/queue_index_factory.h" |
46 | #include "util/ue2string.h" |
47 | #include "util/unordered.h" |
48 | #include "util/verify_types.h" |
49 | |
50 | #include <deque> |
51 | #include <map> |
52 | #include <string> |
53 | #include <vector> |
54 | #include <boost/variant.hpp> |
55 | |
56 | struct RoseEngine; |
57 | |
58 | namespace ue2 { |
59 | |
60 | #define ROSE_GROUPS_MAX 64 |
61 | |
62 | #define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 |
63 | |
64 | /** |
65 | * \brief The largest allowable "short" literal fragment which can be given to |
66 | * a literal matcher directly. |
67 | * |
68 | * Literals longer than this will be truncated to their suffix and confirmed in |
69 | * the Rose interpreter, either as "medium length" literals which can be |
70 | * confirmed from history, or "long literals" which make use of the streaming |
71 | * table support. |
72 | */ |
73 | #define ROSE_SHORT_LITERAL_LEN_MAX 8 |
74 | |
75 | struct BoundaryReports; |
76 | struct CastleProto; |
77 | struct CompileContext; |
78 | class ReportManager; |
79 | class SmallWriteBuild; |
80 | class SomSlotManager; |
81 | |
82 | struct suffix_id { |
83 | suffix_id(const RoseSuffixInfo &in) |
84 | : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()), |
85 | h(in.haig.get()), t(in.tamarama.get()), |
86 | dfa_min_width(in.dfa_min_width), |
87 | dfa_max_width(in.dfa_max_width) { |
88 | assert(!g || g->kind == NFA_SUFFIX); |
89 | } |
90 | bool operator==(const suffix_id &b) const { |
91 | bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t; |
92 | assert(!rv || dfa_min_width == b.dfa_min_width); |
93 | assert(!rv || dfa_max_width == b.dfa_max_width); |
94 | return rv; |
95 | } |
96 | bool operator!=(const suffix_id &b) const { return !(*this == b); } |
97 | bool operator<(const suffix_id &b) const { |
98 | const suffix_id &a = *this; |
99 | ORDER_CHECK(g); |
100 | ORDER_CHECK(c); |
101 | ORDER_CHECK(d); |
102 | ORDER_CHECK(h); |
103 | ORDER_CHECK(t); |
104 | return false; |
105 | } |
106 | |
107 | NGHolder *graph() { |
108 | if (!d && !h) { |
109 | assert(dfa_min_width == depth(0)); |
110 | assert(dfa_max_width == depth::infinity()); |
111 | } |
112 | return g; |
113 | } |
114 | const NGHolder *graph() const { |
115 | if (!d && !h) { |
116 | assert(dfa_min_width == depth(0)); |
117 | assert(dfa_max_width == depth::infinity()); |
118 | } |
119 | return g; |
120 | } |
121 | CastleProto *castle() { |
122 | if (!d && !h) { |
123 | assert(dfa_min_width == depth(0)); |
124 | assert(dfa_max_width == depth::infinity()); |
125 | } |
126 | return c; |
127 | } |
128 | const CastleProto *castle() const { |
129 | if (!d && !h) { |
130 | assert(dfa_min_width == depth(0)); |
131 | assert(dfa_max_width == depth::infinity()); |
132 | } |
133 | return c; |
134 | } |
135 | TamaProto *tamarama() { |
136 | if (!d && !h) { |
137 | assert(dfa_min_width == depth(0)); |
138 | assert(dfa_max_width == depth::infinity()); |
139 | } |
140 | return t; |
141 | } |
142 | const TamaProto *tamarama() const { |
143 | if (!d && !h) { |
144 | assert(dfa_min_width == depth(0)); |
145 | assert(dfa_max_width == depth::infinity()); |
146 | } |
147 | return t; |
148 | } |
149 | |
150 | |
151 | raw_som_dfa *haig() { return h; } |
152 | const raw_som_dfa *haig() const { return h; } |
153 | raw_dfa *dfa() { return d; } |
154 | const raw_dfa *dfa() const { return d; } |
155 | |
156 | size_t hash() const; |
157 | |
158 | private: |
159 | NGHolder *g; |
160 | CastleProto *c; |
161 | raw_dfa *d; |
162 | raw_som_dfa *h; |
163 | TamaProto *t; |
164 | depth dfa_min_width; |
165 | depth dfa_max_width; |
166 | |
167 | friend depth findMinWidth(const suffix_id &s); |
168 | friend depth findMaxWidth(const suffix_id &s); |
169 | friend depth findMinWidth(const suffix_id &s, u32 top); |
170 | friend depth findMaxWidth(const suffix_id &s, u32 top); |
171 | }; |
172 | |
173 | std::set<ReportID> all_reports(const suffix_id &s); |
174 | std::set<u32> all_tops(const suffix_id &s); |
175 | bool has_eod_accepts(const suffix_id &s); |
176 | bool has_non_eod_accepts(const suffix_id &s); |
177 | depth findMinWidth(const suffix_id &s); |
178 | depth findMaxWidth(const suffix_id &s); |
179 | depth findMinWidth(const suffix_id &s, u32 top); |
180 | depth findMaxWidth(const suffix_id &s, u32 top); |
181 | |
182 | /** \brief represents an engine to the left of a rose role */ |
183 | struct left_id { |
184 | left_id(const LeftEngInfo &in) |
185 | : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()), |
186 | h(in.haig.get()), dfa_min_width(in.dfa_min_width), |
187 | dfa_max_width(in.dfa_max_width) { |
188 | assert(!g || !has_managed_reports(*g)); |
189 | } |
190 | bool operator==(const left_id &b) const { |
191 | bool rv = g == b.g && c == b.c && h == b.h && d == b.d; |
192 | assert(!rv || dfa_min_width == b.dfa_min_width); |
193 | assert(!rv || dfa_max_width == b.dfa_max_width); |
194 | return rv; |
195 | } |
196 | bool operator!=(const left_id &b) const { return !(*this == b); } |
197 | bool operator<(const left_id &b) const { |
198 | const left_id &a = *this; |
199 | ORDER_CHECK(g); |
200 | ORDER_CHECK(c); |
201 | ORDER_CHECK(d); |
202 | ORDER_CHECK(h); |
203 | return false; |
204 | } |
205 | |
206 | NGHolder *graph() { |
207 | if (!d && !h) { |
208 | assert(dfa_min_width == depth(0)); |
209 | assert(dfa_max_width == depth::infinity()); |
210 | } |
211 | return g; |
212 | } |
213 | const NGHolder *graph() const { |
214 | if (!d && !h) { |
215 | assert(dfa_min_width == depth(0)); |
216 | assert(dfa_max_width == depth::infinity()); |
217 | } |
218 | return g; |
219 | } |
220 | CastleProto *castle() { |
221 | if (!d && !h) { |
222 | assert(dfa_min_width == depth(0)); |
223 | assert(dfa_max_width == depth::infinity()); |
224 | } |
225 | |
226 | return c; |
227 | } |
228 | const CastleProto *castle() const { |
229 | if (!d && !h) { |
230 | assert(dfa_min_width == depth(0)); |
231 | assert(dfa_max_width == depth::infinity()); |
232 | } |
233 | |
234 | return c; |
235 | } |
236 | raw_som_dfa *haig() { return h; } |
237 | const raw_som_dfa *haig() const { return h; } |
238 | raw_dfa *dfa() { return d; } |
239 | const raw_dfa *dfa() const { return d; } |
240 | |
241 | size_t hash() const; |
242 | |
243 | private: |
244 | NGHolder *g; |
245 | CastleProto *c; |
246 | raw_dfa *d; |
247 | raw_som_dfa *h; |
248 | depth dfa_min_width; |
249 | depth dfa_max_width; |
250 | |
251 | friend bool isAnchored(const left_id &r); |
252 | friend depth findMinWidth(const left_id &r); |
253 | friend depth findMaxWidth(const left_id &r); |
254 | }; |
255 | |
256 | std::set<u32> all_tops(const left_id &r); |
257 | std::set<ReportID> all_reports(const left_id &left); |
258 | bool isAnchored(const left_id &r); |
259 | depth findMinWidth(const left_id &r); |
260 | depth findMaxWidth(const left_id &r); |
261 | u32 num_tops(const left_id &r); |
262 | |
263 | struct rose_literal_info { |
264 | flat_set<u32> delayed_ids; |
265 | flat_set<RoseVertex> vertices; |
266 | rose_group group_mask = 0; |
267 | u32 undelayed_id = MO_INVALID_IDX; |
268 | bool squash_group = false; |
269 | bool requires_benefits = false; |
270 | }; |
271 | |
272 | /** |
273 | * \brief Main literal struct used at Rose build time. Numeric literal IDs |
274 | * used at build time point at these (via the RoseBuildImpl::literals map). |
275 | */ |
276 | struct rose_literal_id { |
277 | rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in, |
278 | u32 delay_in) |
279 | : s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {} |
280 | |
281 | rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in, |
282 | const std::vector<u8> &cmp_in, rose_literal_table table_in, |
283 | u32 delay_in); |
284 | |
285 | ue2_literal s; |
286 | std::vector<u8> msk; |
287 | std::vector<u8> cmp; |
288 | rose_literal_table table; |
289 | u32 delay; |
290 | u32 distinctiveness; |
291 | |
292 | size_t elength(void) const { return s.length() + delay; } |
293 | size_t elength_including_mask(void) const { |
294 | size_t mask_len = msk.size(); |
295 | for (u8 c : msk) { |
296 | if (!c) { |
297 | mask_len--; |
298 | } else { |
299 | break; |
300 | } |
301 | } |
302 | return MAX(mask_len, s.length()) + delay; |
303 | } |
304 | |
305 | bool operator==(const rose_literal_id &b) const { |
306 | return s == b.s && msk == b.msk && cmp == b.cmp && table == b.table && |
307 | delay == b.delay && distinctiveness == b.distinctiveness; |
308 | } |
309 | |
310 | size_t hash() const { |
311 | return hash_all(s, msk, cmp, table, delay, distinctiveness); |
312 | } |
313 | }; |
314 | |
315 | static inline |
316 | bool operator<(const rose_literal_id &a, const rose_literal_id &b) { |
317 | ORDER_CHECK(distinctiveness); |
318 | ORDER_CHECK(table); |
319 | ORDER_CHECK(s); |
320 | ORDER_CHECK(delay); |
321 | ORDER_CHECK(msk); |
322 | ORDER_CHECK(cmp); |
323 | return 0; |
324 | } |
325 | |
326 | class RoseLiteralMap { |
327 | /** |
328 | * \brief Main storage for literals. |
329 | * |
330 | * Note that this cannot be a vector, as the present code relies on |
331 | * iterator stability when iterating over this list and adding to it inside |
332 | * the loop. |
333 | */ |
334 | std::deque<rose_literal_id> lits; |
335 | |
336 | /** \brief Quick-lookup index from literal -> index in lits. */ |
337 | ue2_unordered_map<rose_literal_id, u32> lits_index; |
338 | |
339 | public: |
340 | std::pair<u32, bool> insert(const rose_literal_id &lit) { |
341 | auto it = lits_index.find(lit); |
342 | if (it != lits_index.end()) { |
343 | u32 idx = it->second; |
344 | auto &l = lits.at(idx); |
345 | if (!lit.s.get_pure() && l.s.get_pure()) { |
346 | lits_index.erase(l); |
347 | l.s.unset_pure(); |
348 | lits_index.emplace(l, idx); |
349 | } |
350 | return {idx, false}; |
351 | } |
352 | u32 id = verify_u32(lits.size()); |
353 | lits.push_back(lit); |
354 | lits_index.emplace(lit, id); |
355 | return {id, true}; |
356 | } |
357 | |
358 | // Erase the last num elements. |
359 | void erase_back(size_t num) { |
360 | assert(num <= lits.size()); |
361 | for (size_t i = 0; i < num; i++) { |
362 | lits_index.erase(lits.back()); |
363 | lits.pop_back(); |
364 | } |
365 | assert(lits.size() == lits_index.size()); |
366 | } |
367 | |
368 | const rose_literal_id &at(u32 id) const { |
369 | assert(id < lits.size()); |
370 | return lits.at(id); |
371 | } |
372 | |
373 | using const_iterator = decltype(lits)::const_iterator; |
374 | const_iterator begin() const { return lits.begin(); } |
375 | const_iterator end() const { return lits.end(); } |
376 | |
377 | size_t size() const { |
378 | return lits.size(); |
379 | } |
380 | }; |
381 | |
382 | struct simple_anchored_info { |
383 | simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit) |
384 | : min_bound(min_b), max_bound(max_b), literal(lit) {} |
385 | u32 min_bound; /**< min number of characters required before literal can |
386 | * start matching */ |
387 | u32 max_bound; /**< max number of characters allowed before literal can |
388 | * start matching */ |
389 | ue2_literal literal; |
390 | }; |
391 | |
392 | static really_inline |
393 | bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) { |
394 | ORDER_CHECK(min_bound); |
395 | ORDER_CHECK(max_bound); |
396 | ORDER_CHECK(literal); |
397 | return 0; |
398 | } |
399 | |
400 | struct MpvProto { |
401 | bool empty() const { |
402 | return puffettes.empty() && triggered_puffettes.empty(); |
403 | } |
404 | void reset() { |
405 | puffettes.clear(); |
406 | triggered_puffettes.clear(); |
407 | } |
408 | std::vector<raw_puff> puffettes; |
409 | std::vector<raw_puff> triggered_puffettes; |
410 | }; |
411 | |
412 | struct OutfixInfo { |
413 | template<class T> |
414 | explicit OutfixInfo(std::unique_ptr<T> x) : proto(std::move(x)) {} |
415 | |
416 | explicit OutfixInfo(MpvProto mpv_in) : proto(std::move(mpv_in)) {} |
417 | |
418 | u32 get_queue(QueueIndexFactory &qif); |
419 | |
420 | u32 get_queue() const { |
421 | assert(queue != ~0U); |
422 | return queue; |
423 | } |
424 | |
425 | bool is_nonempty_mpv() const { |
426 | auto *m = boost::get<MpvProto>(&proto); |
427 | return m && !m->empty(); |
428 | } |
429 | |
430 | bool is_dead() const { |
431 | auto *m = boost::get<MpvProto>(&proto); |
432 | if (m) { |
433 | return m->empty(); |
434 | } |
435 | return boost::get<boost::blank>(&proto) != nullptr; |
436 | } |
437 | |
438 | void clear() { |
439 | proto = boost::blank(); |
440 | } |
441 | |
442 | // Convenience accessor functions. |
443 | |
444 | NGHolder *holder() { |
445 | auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto); |
446 | return up ? up->get() : nullptr; |
447 | } |
448 | raw_dfa *rdfa() { |
449 | auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto); |
450 | return up ? up->get() : nullptr; |
451 | } |
452 | raw_som_dfa *haig() { |
453 | auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto); |
454 | return up ? up->get() : nullptr; |
455 | } |
456 | MpvProto *mpv() { |
457 | return boost::get<MpvProto>(&proto); |
458 | } |
459 | |
460 | // Convenience const accessor functions. |
461 | |
462 | const NGHolder *holder() const { |
463 | auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto); |
464 | return up ? up->get() : nullptr; |
465 | } |
466 | const raw_dfa *rdfa() const { |
467 | auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto); |
468 | return up ? up->get() : nullptr; |
469 | } |
470 | const raw_som_dfa *haig() const { |
471 | auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto); |
472 | return up ? up->get() : nullptr; |
473 | } |
474 | const MpvProto *mpv() const { |
475 | return boost::get<MpvProto>(&proto); |
476 | } |
477 | |
478 | /** |
479 | * \brief Variant wrapping the various engine types. If this is |
480 | * boost::blank, it means that this outfix is unused (dead). |
481 | */ |
482 | boost::variant< |
483 | boost::blank, |
484 | std::unique_ptr<NGHolder>, |
485 | std::unique_ptr<raw_dfa>, |
486 | std::unique_ptr<raw_som_dfa>, |
487 | MpvProto> proto = boost::blank(); |
488 | |
489 | RevAccInfo rev_info; |
490 | u32 maxBAWidth = 0; //!< max bi-anchored width |
491 | depth minWidth{depth::infinity()}; |
492 | depth maxWidth{0}; |
493 | u64a maxOffset = 0; |
494 | bool in_sbmatcher = false; //!< handled by small-block matcher. |
495 | |
496 | private: |
497 | u32 queue = ~0U; |
498 | }; |
499 | |
500 | std::set<ReportID> all_reports(const OutfixInfo &outfix); |
501 | |
502 | // Concrete impl class |
503 | class RoseBuildImpl : public RoseBuild { |
504 | public: |
505 | RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, SmallWriteBuild &smwr, |
506 | const CompileContext &cc, const BoundaryReports &boundary); |
507 | |
508 | ~RoseBuildImpl() override; |
509 | |
510 | // Adds a single literal. |
511 | void add(bool anchored, bool eod, const ue2_literal &lit, |
512 | const flat_set<ReportID> &ids) override; |
513 | |
514 | bool addRose(const RoseInGraph &ig, bool prefilter) override; |
515 | bool addSombeRose(const RoseInGraph &ig) override; |
516 | |
517 | bool addOutfix(const NGHolder &h) override; |
518 | bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override; |
519 | bool addOutfix(const raw_puff &rp) override; |
520 | |
521 | bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override; |
522 | |
523 | // Returns true if we were able to add it as a mask |
524 | bool add(bool anchored, const std::vector<CharReach> &mask, |
525 | const flat_set<ReportID> &reports) override; |
526 | |
527 | bool addAnchoredAcyclic(const NGHolder &graph) override; |
528 | |
529 | bool validateMask(const std::vector<CharReach> &mask, |
530 | const flat_set<ReportID> &reports, bool anchored, |
531 | bool eod) const override; |
532 | void addMask(const std::vector<CharReach> &mask, |
533 | const flat_set<ReportID> &reports, bool anchored, |
534 | bool eod) override; |
535 | |
536 | // Construct a runtime implementation. |
537 | bytecode_ptr<RoseEngine> buildRose(u32 minWidth) override; |
538 | bytecode_ptr<RoseEngine> buildFinalEngine(u32 minWidth); |
539 | |
540 | void setSom() override { hasSom = true; } |
541 | |
542 | std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override; |
543 | |
544 | // Find the maximum bound on the edges to this vertex's successors. |
545 | u32 calcSuccMaxBound(RoseVertex u) const; |
546 | |
547 | /* Returns the ID of the given literal in the literal map, adding it if |
548 | * necessary. */ |
549 | u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table); |
550 | |
551 | // Variant with msk/cmp. |
552 | u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk, |
553 | const std::vector<u8> &cmp, u32 delay, |
554 | rose_literal_table table); |
555 | |
556 | u32 getNewLiteralId(void); |
557 | |
558 | void removeVertices(const std::vector<RoseVertex> &dead); |
559 | |
560 | // Is the Rose anchored? |
561 | bool hasNoFloatingRoots() const; |
562 | |
563 | u32 calcHistoryRequired() const; |
564 | |
565 | rose_group getInitialGroups() const; |
566 | rose_group getSuccGroups(RoseVertex start) const; |
567 | rose_group getGroups(RoseVertex v) const; |
568 | |
569 | bool hasDelayedLiteral(RoseVertex v) const; |
570 | bool hasDelayPred(RoseVertex v) const; |
571 | bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const; |
572 | bool hasAnchoredTablePred(RoseVertex v) const; |
573 | |
574 | // Is the given vertex a successor of either root or anchored_root? |
575 | bool isRootSuccessor(const RoseVertex &v) const; |
576 | /* Is the given vertex a successor of something other than root or |
577 | * anchored_root? */ |
578 | bool isNonRootSuccessor(const RoseVertex &v) const; |
579 | |
580 | bool isDirectReport(u32 id) const; |
581 | bool isDelayed(u32 id) const; |
582 | |
583 | bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored |
584 | * table */ |
585 | bool isFloating(RoseVertex v) const; /* true iff has literal in floating |
586 | * table */ |
587 | bool isInETable(RoseVertex v) const; /* true iff has literal in eod |
588 | * table */ |
589 | |
590 | size_t maxLiteralLen(RoseVertex v) const; |
591 | size_t minLiteralLen(RoseVertex v) const; |
592 | |
593 | // max overlap considered for every pair (ulit, vlit). |
594 | size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const; |
595 | |
596 | bool isPseudoStar(const RoseEdge &e) const; |
597 | bool isPseudoStarOrFirstOnly(const RoseEdge &e) const; |
598 | bool hasOnlyPseudoStarInEdges(RoseVertex v) const; |
599 | |
600 | bool isAnyStart(const RoseVertex &v) const { |
601 | return v == root || v == anchored_root; |
602 | } |
603 | |
604 | bool isVirtualVertex(const RoseVertex &v) const { |
605 | return g[v].eod_accept || isAnyStart(v); |
606 | } |
607 | |
608 | void handleMixedSensitivity(void); |
609 | |
610 | void findTransientLeftfixes(void); |
611 | |
612 | const CompileContext &cc; |
613 | RoseGraph g; |
614 | const RoseVertex root; |
615 | const RoseVertex anchored_root; |
616 | RoseLiteralMap literals; |
617 | std::map<RoseVertex, RoseVertex> ghost; |
618 | ReportID getNewNfaReport() override { |
619 | return next_nfa_report++; |
620 | } |
621 | std::deque<rose_literal_info> literal_info; |
622 | bool hasSom; //!< at least one pattern requires SOM. |
623 | std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas; |
624 | std::map<simple_anchored_info, std::set<u32>> anchored_simple; |
625 | std::map<u32, std::set<u32> > group_to_literal; |
626 | u32 group_end; |
627 | |
628 | u32 ematcher_region_size; /**< number of bytes the eod table runs over */ |
629 | |
630 | /** \brief Mapping from anchored literal ID to the original literal suffix |
631 | * present when the literal was added to the literal matcher. Used for |
632 | * overlap calculation in history assignment. */ |
633 | std::map<u32, rose_literal_id> anchoredLitSuffix; |
634 | |
635 | ue2_unordered_set<left_id> transient; |
636 | ue2_unordered_map<left_id, rose_group> rose_squash_masks; |
637 | |
638 | std::vector<OutfixInfo> outfixes; |
639 | |
640 | /** \brief MPV outfix entry. Null if not used, and moved into the outfixes |
641 | * list before we start building the bytecode (at which point it is set to |
642 | * null again). */ |
643 | std::unique_ptr<OutfixInfo> mpv_outfix = nullptr; |
644 | |
645 | u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX. |
646 | |
647 | u32 max_rose_anchored_floating_overlap; |
648 | |
649 | rose_group boundary_group_mask = 0; |
650 | |
651 | QueueIndexFactory qif; |
652 | ReportManager &rm; |
653 | SomSlotManager &ssm; |
654 | SmallWriteBuild &smwr; |
655 | const BoundaryReports &boundary; |
656 | |
657 | private: |
658 | ReportID next_nfa_report; |
659 | }; |
660 | |
661 | size_t calcLongLitThreshold(const RoseBuildImpl &build, |
662 | const size_t historyRequired); |
663 | |
664 | // Free functions, in rose_build_misc.cpp |
665 | |
666 | bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v); |
667 | bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v); |
668 | |
669 | size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b); |
670 | ue2_literal findNonOverlappingTail(const std::set<ue2_literal> &lits, |
671 | const ue2_literal &s); |
672 | |
673 | #ifndef NDEBUG |
674 | bool roseHasTops(const RoseBuildImpl &build, RoseVertex v); |
675 | bool hasOrphanedTops(const RoseBuildImpl &build); |
676 | #endif |
677 | |
678 | u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm); |
679 | |
680 | // Function that operates on a msk/cmp pair and a literal, as used in |
681 | // hwlmLiteral, and zeroes msk elements that don't add any power to the |
682 | // literal. |
683 | void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk, |
684 | std::vector<u8> &cmp); |
685 | |
686 | u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id); |
687 | u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id); |
688 | |
689 | bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e); |
690 | |
691 | #ifndef NDEBUG |
692 | bool canImplementGraphs(const RoseBuildImpl &tbi); |
693 | #endif |
694 | |
695 | } // namespace ue2 |
696 | |
697 | namespace std { |
698 | |
699 | template<> |
700 | struct hash<ue2::left_id> { |
701 | size_t operator()(const ue2::left_id &l) const { |
702 | return l.hash(); |
703 | } |
704 | }; |
705 | |
706 | template<> |
707 | struct hash<ue2::suffix_id> { |
708 | size_t operator()(const ue2::suffix_id &s) const { |
709 | return s.hash(); |
710 | } |
711 | }; |
712 | |
713 | } // namespace std |
714 | |
715 | #endif /* ROSE_BUILD_IMPL_H */ |
716 | |