1/*
2 * Copyright (c) 2015-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Rose Build: functions for reducing the size of the Rose graph
31 * through merging.
32 */
33#include "rose_build_merge.h"
34
35#include "grey.h"
36#include "rose_build.h"
37#include "rose_build_impl.h"
38#include "rose_build_util.h"
39#include "ue2common.h"
40#include "nfa/castlecompile.h"
41#include "nfa/goughcompile.h"
42#include "nfa/limex_limits.h"
43#include "nfa/mcclellancompile.h"
44#include "nfa/nfa_build_util.h"
45#include "nfa/rdfa_merge.h"
46#include "nfagraph/ng_holder.h"
47#include "nfagraph/ng_haig.h"
48#include "nfagraph/ng_is_equal.h"
49#include "nfagraph/ng_lbr.h"
50#include "nfagraph/ng_limex.h"
51#include "nfagraph/ng_mcclellan.h"
52#include "nfagraph/ng_puff.h"
53#include "nfagraph/ng_redundancy.h"
54#include "nfagraph/ng_repeat.h"
55#include "nfagraph/ng_reports.h"
56#include "nfagraph/ng_stop.h"
57#include "nfagraph/ng_uncalc_components.h"
58#include "nfagraph/ng_util.h"
59#include "nfagraph/ng_width.h"
60#include "util/bitutils.h"
61#include "util/charreach.h"
62#include "util/compile_context.h"
63#include "util/container.h"
64#include "util/dump_charclass.h"
65#include "util/graph_range.h"
66#include "util/hash.h"
67#include "util/insertion_ordered.h"
68#include "util/order_check.h"
69#include "util/report_manager.h"
70#include "util/ue2string.h"
71#include "util/unordered.h"
72
73#include <algorithm>
74#include <functional>
75#include <list>
76#include <map>
77#include <queue>
78#include <set>
79#include <string>
80#include <vector>
81#include <utility>
82
83#include <boost/range/adaptor/map.hpp>
84
85using namespace std;
86using boost::adaptors::map_values;
87using boost::adaptors::map_keys;
88
89namespace ue2 {
90
91static const size_t NARROW_START_MAX = 10;
92static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = 128;
93static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64;
94static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32;
95static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10;
96static const size_t MERGE_GROUP_SIZE_MAX = 200;
97static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000;
98
99/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */
100static const size_t DFA_CHUNK_SIZE_MAX = 200;
101
102/** \brief Max DFA states in a merged DFA. */
103static const size_t DFA_MERGE_MAX_STATES = 8000;
104
105/** \brief In block mode, merge two prefixes even if they don't have identical
106 * literal sets if they have fewer than this many states and the merged graph
107 * is also small. */
108static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32;
109
110static
111size_t small_merge_max_vertices(const CompileContext &cc) {
112 return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM
113 : SMALL_MERGE_MAX_VERTICES_BLOCK;
114}
115
116static
117size_t small_rose_threshold(const CompileContext &cc) {
118 return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM
119 : SMALL_ROSE_THRESHOLD_BLOCK;
120}
121
122/**
123 * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that
124 * reports should not contribute to the hash.
125 */
126static
127size_t hashLeftfix(const left_id &left) {
128 size_t val = 0;
129
130 if (left.castle()) {
131 hash_combine(val, left.castle()->reach());
132 for (const auto &pr : left.castle()->repeats) {
133 hash_combine(val, pr.first); // top
134 hash_combine(val, pr.second.bounds);
135 }
136 } else if (left.graph()) {
137 hash_combine(val, hash_holder(*left.graph()));
138 }
139
140 return val;
141}
142
143namespace {
144
145/** Key used to group sets of leftfixes by the dedupeLeftfixes path. */
146struct RoseGroup {
147 RoseGroup(const RoseBuildImpl &build, RoseVertex v)
148 : left_hash(hashLeftfix(build.g[v].left)),
149 lag(build.g[v].left.lag), eod_table(build.isInETable(v)) {
150 const RoseGraph &g = build.g;
151 assert(in_degree(v, g) == 1);
152 RoseVertex u = *inv_adjacent_vertices(v, g).first;
153 parent = g[u].index;
154 }
155
156 bool operator<(const RoseGroup &b) const {
157 const RoseGroup &a = *this;
158 ORDER_CHECK(parent);
159 ORDER_CHECK(left_hash);
160 ORDER_CHECK(lag);
161 ORDER_CHECK(eod_table);
162 return false;
163 }
164
165private:
166 /** Parent vertex index. We must use the index, rather than the descriptor,
167 * for compile determinism. */
168 size_t parent;
169
170 /** Quick hash of the leftfix itself. Must be identical for a given pair of
171 * graphs if is_equal would return true. */
172 size_t left_hash;
173
174 /** Leftfix lag value. */
175 u32 lag;
176
177 /** True if associated vertex (successor) is in the EOD table. We don't
178 * allow sharing of leftfix engines between "normal" and EOD operation. */
179 bool eod_table;
180};
181
182/**
183 * Intended to find graphs that are identical except for their report
184 * IDs. Relies on vertex and edge indices to pick up graphs that have been
185 * messily put together in different orderings. Only implemented for castles and
186 * holders.
187 */
188static
189bool is_equal(const left_id &u_left, ReportID u_report,
190 const left_id &v_left, ReportID v_report) {
191 if (u_left.castle() && v_left.castle()) {
192 return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report);
193 }
194
195 if (!u_left.graph() || !v_left.graph()) {
196 return false;
197 }
198
199 return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report);
200}
201
202} // namespace
203
204/**
205 * This pass performs work similar to \ref dedupeSuffixes - it removes
206 * duplicate prefix/infixes (that is, leftfixes) which are identical graphs and
207 * share the same trigger vertex and lag. Leftfixes are first grouped by
208 * parent role and lag to reduce the number of candidates to be inspected
209 * for each leftfix. The graphs in each cluster are then compared with each
210 * other and the graph is updated to only refer to a canonical version of each
211 * graph.
212 *
213 * Note: only roles with a single predecessor vertex are considered for this
214 * transform - it should probably be generalised to work for roles which share
215 * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or
216 * it should be retired entirely.
217 */
218bool dedupeLeftfixes(RoseBuildImpl &tbi) {
219 DEBUG_PRINTF("deduping leftfixes\n");
220 map<RoseGroup, deque<RoseVertex>> roses;
221 bool work_done = false;
222
223 /* Note: a leftfix's transientness will not be altered by deduping */
224
225 // Collect leftfixes into groups.
226 RoseGraph &g = tbi.g;
227 for (auto v : vertices_range(g)) {
228 if (!g[v].left) {
229 continue;
230 }
231 const left_id left(g[v].left);
232
233 if (left.haig()) {
234 /* TODO: allow merging of identical haigs */
235 continue;
236 }
237
238 if (in_degree(v, g) != 1) {
239 continue;
240 }
241
242 roses[RoseGroup(tbi, v)].push_back(v);
243 }
244
245 DEBUG_PRINTF("collected %zu rose groups\n", roses.size());
246
247 // Walk groups and dedupe the roses therein.
248 for (deque<RoseVertex> &verts : roses | map_values) {
249 DEBUG_PRINTF("group has %zu vertices\n", verts.size());
250
251 unordered_set<left_id> seen;
252
253 for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) {
254 RoseVertex v = *jt;
255 left_id left(g[v].left);
256
257 // Skip cases we've already handled, and mark as seen otherwise.
258 if (!seen.insert(left).second) {
259 continue;
260 }
261
262 // Scan the rest of the list for dupes.
263 for (auto kt = std::next(jt); kt != jte; ++kt) {
264 if (g[v].left == g[*kt].left
265 || !is_equal(g[v].left, g[v].left.leftfix_report,
266 g[*kt].left, g[*kt].left.leftfix_report)) {
267 continue;
268 }
269
270 // Dupe found.
271 DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n",
272 g[*kt].index, g[v].index);
273 assert(g[v].left.lag == g[*kt].left.lag);
274 g[*kt].left = g[v].left;
275 work_done = true;
276 }
277 }
278 }
279
280 return work_done;
281}
282
283/**
284 * \brief Returns a numeric key that can be used to group this suffix with
285 * others that may be its duplicate.
286 */
287static
288size_t suffix_size_key(const suffix_id &s) {
289 if (s.graph()) {
290 return num_vertices(*s.graph());
291 }
292 if (s.castle()) {
293 return s.castle()->repeats.size();
294 }
295 return 0;
296}
297
298static
299bool is_equal(const suffix_id &s1, const suffix_id &s2) {
300 if (s1.graph() && s2.graph()) {
301 return is_equal(*s1.graph(), *s2.graph());
302 } else if (s1.castle() && s2.castle()) {
303 return is_equal(*s1.castle(), *s2.castle());
304 }
305 return false;
306}
307
308/**
309 * This function simply looks for suffix NGHolder graphs which are identical
310 * and updates the roles in the RoseGraph to refer to only a single copy. This
311 * obviously has benefits in terms of both performance (as we don't run
312 * multiple engines doing the same work) and stream state. This function first
313 * groups all suffixes by number of vertices and report set to restrict the set
314 * of possible candidates. Each group is then walked to find duplicates using
315 * the \ref is_equal comparator for NGHolders and updating the RoseGraph as it
316 * goes.
317 *
318 * Note: does not dedupe suffixes of vertices in the EOD table.
319 */
320void dedupeSuffixes(RoseBuildImpl &tbi) {
321 DEBUG_PRINTF("deduping suffixes\n");
322
323 unordered_map<suffix_id, set<RoseVertex>> suffix_map;
324 map<pair<size_t, set<ReportID>>, vector<suffix_id>> part;
325
326 // Collect suffixes into groups.
327 RoseGraph &g = tbi.g;
328 for (auto v : vertices_range(g)) {
329 if (!g[v].suffix || tbi.isInETable(v)) {
330 continue;
331 }
332
333 const suffix_id s(g[v].suffix);
334
335 if (!(s.graph() || s.castle())) {
336 continue; // e.g. Haig
337 }
338
339 set<RoseVertex> &verts = suffix_map[s];
340 if (verts.empty()) {
341 part[make_pair(suffix_size_key(s), all_reports(s))].push_back(s);
342 }
343 verts.insert(v);
344 }
345
346 DEBUG_PRINTF("collected %zu groups\n", part.size());
347
348 for (const auto &cand : part | map_values) {
349 if (cand.size() <= 1) {
350 continue;
351 }
352 DEBUG_PRINTF("deduping cand set of size %zu\n", cand.size());
353
354 for (auto jt = cand.begin(); jt != cand.end(); ++jt) {
355 if (suffix_map[*jt].empty()) {
356 continue;
357 }
358 for (auto kt = next(jt); kt != cand.end(); ++kt) {
359 if (suffix_map[*kt].empty() || !is_equal(*jt, *kt)) {
360 continue;
361 }
362 DEBUG_PRINTF("found dupe\n");
363 for (auto v : suffix_map[*kt]) {
364 RoseVertex dupe = *suffix_map[*jt].begin();
365 assert(dupe != v);
366 g[v].suffix.graph = g[dupe].suffix.graph;
367 g[v].suffix.castle = g[dupe].suffix.castle;
368 assert(suffix_id(g[v].suffix) ==
369 suffix_id(g[dupe].suffix));
370 suffix_map[*jt].insert(v);
371 }
372 suffix_map[*kt].clear();
373 }
374 }
375 }
376}
377
378namespace {
379
380/**
381 * This class stores a mapping from an engine reference (left_id, suffix_id,
382 * etc) to a list of vertices, and also allows us to iterate over the set of
383 * engine references in insertion order -- we add to the mapping in vertex
384 * iteration order, so this allows us to provide a consistent ordering.
385 */
386template<class EngineRef>
387class Bouquet {
388private:
389 list<EngineRef> ordering; // Unique list in insert order.
390 using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>;
391 BouquetMap bouquet;
392public:
393 void insert(const EngineRef &h, RoseVertex v) {
394 typename BouquetMap::iterator f = bouquet.find(h);
395 if (f == bouquet.end()) {
396 ordering.push_back(h);
397 bouquet[h].push_back(v);
398 } else {
399 f->second.push_back(v);
400 }
401 }
402
403 void insert(const EngineRef &h, const deque<RoseVertex> &verts) {
404 typename BouquetMap::iterator f = bouquet.find(h);
405 if (f == bouquet.end()) {
406 ordering.push_back(h);
407 bouquet.insert(make_pair(h, verts));
408 } else {
409 f->second.insert(f->second.end(), verts.begin(), verts.end());
410 }
411 }
412
413 const deque<RoseVertex> &vertices(const EngineRef &h) const {
414 typename BouquetMap::const_iterator it = bouquet.find(h);
415 assert(it != bouquet.end()); // must be present
416 return it->second;
417 }
418
419 void erase(const EngineRef &h) {
420 assert(bouquet.find(h) != bouquet.end());
421 bouquet.erase(h);
422 ordering.remove(h);
423 }
424
425 /** Remove all the elements in the given iterator range. */
426 template <class Iter>
427 void erase_all(Iter erase_begin, Iter erase_end) {
428 for (Iter it = erase_begin; it != erase_end; ++it) {
429 bouquet.erase(*it);
430 }
431
432 // Use a quick-lookup container so that we only have to traverse the
433 // 'ordering' list once.
434 const set<EngineRef> dead(erase_begin, erase_end);
435 for (iterator it = begin(); it != end(); /* incremented inside */) {
436 if (contains(dead, *it)) {
437 ordering.erase(it++);
438 } else {
439 ++it;
440 }
441 }
442 }
443
444 void clear() {
445 ordering.clear();
446 bouquet.clear();
447 }
448
449 size_t size() const { return bouquet.size(); }
450
451 // iterate over holders in insert order
452 typedef typename list<EngineRef>::iterator iterator;
453 iterator begin() { return ordering.begin(); }
454 iterator end() { return ordering.end(); }
455
456 // const iterate over holders in insert order
457 typedef typename list<EngineRef>::const_iterator const_iterator;
458 const_iterator begin() const { return ordering.begin(); }
459 const_iterator end() const { return ordering.end(); }
460};
461
462typedef Bouquet<left_id> LeftfixBouquet;
463typedef Bouquet<suffix_id> SuffixBouquet;
464
465} // namespace
466
467/**
468 * Split a \ref Bouquet of some type into several smaller ones.
469 */
470template <class EngineRef>
471static void chunkBouquets(const Bouquet<EngineRef> &in,
472 deque<Bouquet<EngineRef>> &out,
473 const size_t chunk_size) {
474 if (in.size() <= chunk_size) {
475 out.push_back(in);
476 return;
477 }
478
479 out.push_back(Bouquet<EngineRef>());
480 for (const auto &engine : in) {
481 if (out.back().size() >= chunk_size) {
482 out.push_back(Bouquet<EngineRef>());
483 }
484 out.back().insert(engine, in.vertices(engine));
485 }
486}
487
488static
489bool stringsCanFinishAtSameSpot(const ue2_literal &u,
490 ue2_literal::const_iterator v_b,
491 ue2_literal::const_iterator v_e) {
492 ue2_literal::const_iterator u_e = u.end();
493 ue2_literal::const_iterator u_b = u.begin();
494
495 while (u_e != u_b && v_e != v_b) {
496 --u_e;
497 --v_e;
498
499 if (!overlaps(*u_e, *v_e)) {
500 return false;
501 }
502 }
503
504 return true;
505}
506
507/**
508 * Check that if after u has been seen, that it is impossible for the arrival of
509 * v to require the inspection of an engine earlier than u did.
510 *
511 * Let delta be the earliest that v can be seen after u (may be zero)
512 *
513 * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta)
514 * ==> - ulag <= delta - vlag
515 * ==> vlag - ulag <= delta
516 */
517static
518bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
519 const rose_literal_id &vl, const u32 vlag) {
520 DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag,
521 escapeString(vl.s).c_str(), vlag);
522
523 if (vl.delay || ul.delay) {
524 /* engine related literals should not be delayed anyway */
525 return false;
526 }
527
528 if (ulag >= vlag) {
529 assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag);
530 return true;
531 }
532
533 size_t min_allowed_delta = vlag - ulag;
534 DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta);
535
536 for (size_t i = 0; i < min_allowed_delta; i++) {
537 if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
538 DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
539 return false;
540 }
541 }
542
543 DEBUG_PRINTF("OK\n");
544 return true;
545}
546
547static
548bool hasSameEngineType(const RoseVertexProps &u_prop,
549 const RoseVertexProps &v_prop) {
550 const left_id u_left = u_prop.left;
551 const left_id v_left = v_prop.left;
552
553 return !u_left.haig() == !v_left.haig()
554 && !u_left.dfa() == !v_left.dfa()
555 && !u_left.castle() == !v_left.castle()
556 && !u_left.graph() == !v_left.graph();
557}
558
559/**
560 * Verifies that merging the leftfix of vertices does not cause conflicts due
561 * to the literals on the right.
562 *
563 * The main concern is that the lags of the literals and overlap between them
564 * allow the engine check offset to potentially regress.
565 *
566 * Parameters are vectors of literals + lag pairs.
567 *
568 * Note: if more constraints of when the leftfixes were going to be checked
569 * (mandatory lookarounds passing, offset checks), more merges may be allowed.
570 */
571static
572bool compatibleLiteralsForMerge(
573 const vector<pair<const rose_literal_id *, u32>> &ulits,
574 const vector<pair<const rose_literal_id *, u32>> &vlits) {
575 assert(!ulits.empty());
576 assert(!vlits.empty());
577
578 // We cannot merge engines that prefix literals in different tables.
579 if (ulits[0].first->table != vlits[0].first->table) {
580 DEBUG_PRINTF("literals in different tables\n");
581 return false;
582 }
583
584 // We don't handle delayed cases yet.
585 for (const auto &ue : ulits) {
586 const rose_literal_id &ul = *ue.first;
587 if (ul.delay) {
588 return false;
589 }
590 }
591
592 for (const auto &ve : vlits) {
593 const rose_literal_id &vl = *ve.first;
594 if (vl.delay) {
595 return false;
596 }
597 }
598
599 /* An engine requires that all accesses to it are ordered by offsets. (ie,
600 we can not check an engine's state at offset Y, if we have already
601 checked its status at offset X and X > Y). If we can not establish that
602 the literals used for triggering will satisfy this property, then it is
603 not safe to merge the engine. */
604 for (const auto &ue : ulits) {
605 const rose_literal_id &ul = *ue.first;
606 u32 ulag = ue.second;
607
608 for (const auto &ve : vlits) {
609 const rose_literal_id &vl = *ve.first;
610 u32 vlag = ve.second;
611
612 if (!checkPrefix(ul, ulag, vl, vlag)
613 || !checkPrefix(vl, vlag, ul, ulag)) {
614 DEBUG_PRINTF("prefix check failed\n");
615 return false;
616 }
617 }
618 }
619
620 return true;
621}
622
623/**
624 * True if this graph has few enough accel states to be implemented as an NFA
625 * with all of those states actually becoming accel schemes.
626 */
627static
628bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) {
629 u32 num = countAccelStates(g, &build.rm, build.cc);
630 DEBUG_PRINTF("graph with %zu vertices has %u accel states\n",
631 num_vertices(g), num);
632 return num <= NFA_MAX_ACCEL_STATES;
633}
634
635/**
636 * In block mode, we want to be a little more selective -- We will only merge
637 * prefix engines when the literal sets are the same or if the merged graph
638 * has only grown by a small amount.
639 */
640static
641bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u,
642 RoseVertex v) {
643 assert(!build.cc.streaming);
644 assert(build.isRootSuccessor(u) == build.isRootSuccessor(v));
645
646 // Always merge infixes if we can (subject to the other criteria in
647 // mergeableRoseVertices).
648 if (!build.isRootSuccessor(u)) {
649 return true;
650 }
651
652 const RoseGraph &g = build.g;
653
654 // Merge prefixes with identical literal sets (as we'd have to run them
655 // both when we see those literals anyway).
656 if (g[u].literals == g[v].literals) {
657 return true;
658 }
659
660 // The rest of this function only deals with the case when both vertices
661 // have graph leftfixes.
662 if (!g[u].left.graph || !g[v].left.graph) {
663 return false;
664 }
665
666 const size_t u_count = num_vertices(*g[u].left.graph);
667 const size_t v_count = num_vertices(*g[v].left.graph);
668 DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n",
669 u_count, v_count);
670 if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES ||
671 v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
672 DEBUG_PRINTF("prefixes too big already\n");
673 return false;
674 }
675
676 DEBUG_PRINTF("trying merge\n");
677 NGHolder h;
678 cloneHolder(h, *g[v].left.graph);
679 if (!mergeNfaPair(*g[u].left.graph, h, nullptr, build.cc)) {
680 DEBUG_PRINTF("couldn't merge\n");
681 return false;
682 }
683
684 const size_t merged_count = num_vertices(h);
685 DEBUG_PRINTF("merged result has %zu vertices\n", merged_count);
686 if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
687 DEBUG_PRINTF("exceeded limit\n");
688 return false;
689 }
690
691 // We want to only perform merges that take advantage of some
692 // commonality in the two input graphs, so we check that the number of
693 // vertices has only grown a small amount: somewhere between the sum
694 // (no commonality) and the max (no growth at all) of the vertex counts
695 // of the input graphs.
696 const size_t max_size = u_count + v_count;
697 const size_t min_size = max(u_count, v_count);
698 const size_t max_growth = ((max_size - min_size) * 25) / 100;
699 if (merged_count > min_size + max_growth) {
700 DEBUG_PRINTF("grew too much\n");
701 return false;
702 }
703
704 // We don't want to squander any chances at accelerating.
705 if (!isAccelerableLeftfix(build, h) &&
706 (isAccelerableLeftfix(build, *g[u].left.graph) ||
707 isAccelerableLeftfix(build, *g[v].left.graph))) {
708 DEBUG_PRINTF("would lose accel property\n");
709 return false;
710 }
711
712 DEBUG_PRINTF("safe to merge\n");
713 return true;
714}
715
716bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
717 RoseVertex v) {
718 assert(u != v);
719
720 if (!hasSameEngineType(tbi.g[u], tbi.g[v])) {
721 return false;
722 }
723
724 if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) {
725 return false;
726 }
727
728 /* We cannot merge prefixes/vertices if they are successors of different
729 * root vertices */
730 if (tbi.isRootSuccessor(u)) {
731 assert(tbi.isRootSuccessor(v));
732 set<RoseVertex> u_preds;
733 set<RoseVertex> v_preds;
734 insert(&u_preds, inv_adjacent_vertices(u, tbi.g));
735 insert(&v_preds, inv_adjacent_vertices(v, tbi.g));
736
737 if (u_preds != v_preds) {
738 return false;
739 }
740 }
741
742 u32 ulag = tbi.g[u].left.lag;
743 vector<pair<const rose_literal_id *, u32>> ulits;
744 ulits.reserve(tbi.g[u].literals.size());
745 for (u32 id : tbi.g[u].literals) {
746 ulits.emplace_back(&tbi.literals.at(id), ulag);
747 }
748
749 u32 vlag = tbi.g[v].left.lag;
750 vector<pair<const rose_literal_id *, u32>> vlits;
751 vlits.reserve(tbi.g[v].literals.size());
752 for (u32 id : tbi.g[v].literals) {
753 vlits.emplace_back(&tbi.literals.at(id), vlag);
754 }
755
756 if (!compatibleLiteralsForMerge(ulits, vlits)) {
757 return false;
758 }
759
760 DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index,
761 tbi.g[v].index);
762 return true;
763}
764
765/* We cannot merge an engine, if a trigger literal and a post literal overlap
766 * in such a way that engine status needs to be check at a point before the
767 * engine's current location.
768 *
769 * i.e., for a trigger literal u and a pos literal v,
770 * where delta is the earliest v can appear after t,
771 * we require that v_loc - v_lag >= u_loc
772 * ==> u_loc + delta - v_lag >= u_loc
773 * ==> delta >= v_lag
774 *
775 */
776static
777bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl,
778 u32 vlag) {
779 DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(),
780 escapeString(vl.s).c_str(), vlag);
781
782 for (size_t i = 0; i < vlag; i++) {
783 if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
784 DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
785 return false;
786 }
787 }
788
789 DEBUG_PRINTF("OK\n");
790 return true;
791}
792
793template<typename VertexCont>
794static never_inline
795bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1,
796 const VertexCont &v2) {
797 flat_set<RoseVertex> preds;
798 for (auto v : v1) {
799 insert(&preds, inv_adjacent_vertices(v, build.g));
800 }
801
802 flat_set<u32> pred_lits;
803
804 /* No need to examine delays of a common pred - as it must already have
805 * survived the delay checks.
806 *
807 * This is important when the pred is in the anchored table as
808 * the literal is no longer available. */
809 flat_set<RoseVertex> known_good_preds;
810 for (auto v : v2) {
811 insert(&known_good_preds, inv_adjacent_vertices(v, build.g));
812 }
813
814 for (auto u : preds) {
815 if (!contains(known_good_preds, u)) {
816 insert(&pred_lits, build.g[u].literals);
817 }
818 }
819
820 vector<const rose_literal_id *> pred_rose_lits;
821 pred_rose_lits.reserve(pred_lits.size());
822 for (const auto &p : pred_lits) {
823 pred_rose_lits.push_back(&build.literals.at(p));
824 }
825
826 for (auto v : v2) {
827 u32 vlag = build.g[v].left.lag;
828 if (!vlag) {
829 continue;
830 }
831
832 for (const u32 vlit : build.g[v].literals) {
833 const rose_literal_id &vl = build.literals.at(vlit);
834 assert(!vl.delay); // this should never have got this far?
835 for (const auto &ul : pred_rose_lits) {
836 assert(!ul->delay); // this should never have got this far?
837
838 if (!checkPredDelay(*ul, vl, vlag)) {
839 return false;
840 }
841 }
842 }
843 }
844
845 return true;
846}
847
848static
849bool mergeableRoseVertices(const RoseBuildImpl &tbi,
850 const deque<RoseVertex> &verts1,
851 const deque<RoseVertex> &verts2) {
852 assert(!verts1.empty());
853 assert(!verts2.empty());
854
855 RoseVertex u_front = verts1.front();
856 RoseVertex v_front = verts2.front();
857
858 /* all vertices must have the same engine type: assume all verts in each
859 * group are already of the same type */
860 if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) {
861 return false;
862 }
863
864 bool is_prefix = tbi.isRootSuccessor(u_front);
865
866 /* We cannot merge prefixes/vertices if they are successors of different
867 * root vertices: similarly, assume the grouped vertices are compatible */
868 if (is_prefix) {
869 assert(tbi.isRootSuccessor(v_front));
870 set<RoseVertex> u_preds;
871 set<RoseVertex> v_preds;
872 insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g));
873 insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g));
874
875 if (u_preds != v_preds) {
876 return false;
877 }
878 }
879
880 vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
881 for (auto a : verts1) {
882 if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) {
883 return false;
884 }
885
886 u32 ulag = tbi.g[a].left.lag;
887 for (u32 id : tbi.g[a].literals) {
888 ulits.emplace_back(&tbi.literals.at(id), ulag);
889 }
890 }
891
892 vector<pair<const rose_literal_id *, u32>> vlits;
893 for (auto a : verts2) {
894 if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) {
895 return false;
896 }
897
898 u32 vlag = tbi.g[a].left.lag;
899 for (u32 id : tbi.g[a].literals) {
900 vlits.emplace_back(&tbi.literals.at(id), vlag);
901 }
902 }
903
904 if (!compatibleLiteralsForMerge(ulits, vlits)) {
905 return false;
906 }
907
908 // Check preds are compatible as well.
909 if (!checkPredDelays(tbi, verts1, verts2)
910 || !checkPredDelays(tbi, verts2, verts1)) {
911 return false;
912 }
913
914 DEBUG_PRINTF("vertex sets are mergeable\n");
915 return true;
916}
917
918bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1,
919 const set<RoseVertex> &v2) {
920 const deque<RoseVertex> vv1(v1.begin(), v1.end());
921 const deque<RoseVertex> vv2(v2.begin(), v2.end());
922 return mergeableRoseVertices(tbi, vv1, vv2);
923}
924
925/** \brief Priority queue element for Rose merges. */
926namespace {
927struct RoseMergeCandidate {
928 RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in,
929 u32 tb)
930 : r1(r1_in), r2(r2_in), stopxor(0), cpl(cpl_in), states(0),
931 tie_breaker(tb) {
932 if (r1.graph() && r2.graph()) {
933 const NGHolder &h1 = *r1.graph(), &h2 = *r2.graph();
934 /* som_none as haigs don't merge and just a guiding heuristic */
935 CharReach stop1 = findStopAlphabet(h1, SOM_NONE);
936 CharReach stop2 = findStopAlphabet(h2, SOM_NONE);
937 stopxor = (stop1 ^ stop2).count();
938
939 // We use the number of vertices as an approximation of the state
940 // count here, as this is just feeding a comparison.
941 u32 vertex_count = num_vertices(h1) + num_vertices(h2);
942 states = vertex_count - min(vertex_count, cpl);
943 } else if (r1.castle() && r2.castle()) {
944 // FIXME
945 }
946 }
947
948 bool operator<(const RoseMergeCandidate &a) const {
949 if (stopxor != a.stopxor) {
950 return stopxor > a.stopxor;
951 }
952 if (cpl != a.cpl) {
953 return cpl < a.cpl;
954 }
955 if (states != a.states) {
956 return states > a.states;
957 }
958 return tie_breaker < a.tie_breaker;
959 }
960
961 left_id r1;
962 left_id r2;
963 u32 stopxor;
964 u32 cpl; //!< common prefix length
965 u32 states;
966 u32 tie_breaker; //!< determinism
967};
968}
969
970static
971bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2,
972 const vector<RoseVertex> &verts1,
973 const vector<RoseVertex> &verts2) {
974 assert(!verts1.empty() && !verts2.empty());
975
976 DEBUG_PRINTF("merging pair of leftfixes:\n");
977 DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(),
978 as_string_list(all_tops(r1)).c_str());
979 DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(),
980 as_string_list(all_tops(r2)).c_str());
981
982 RoseGraph &g = build.g;
983
984 if (r1.graph()) {
985 assert(r2.graph());
986 assert(r1.graph()->kind == r2.graph()->kind);
987 if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) {
988 DEBUG_PRINTF("nfa merge failed\n");
989 return false;
990 }
991
992 /* The graph in r1 has been merged into the graph in r2. Update r1's
993 * vertices with the new graph ptr. mergeNfaPair() does not alter the
994 * tops from the input graph so no need to update top values.
995 *
996 * It is the responsibility of the caller to ensure that the tops are
997 * distinct when they have different trigger conditions.
998 * [Note: mergeLeftfixesVariableLag() should have a common parent set]
999 */
1000 shared_ptr<NGHolder> &h = g[verts2.front()].left.graph;
1001 for (RoseVertex v : verts1) {
1002 g[v].left.graph = h;
1003 }
1004
1005 return true;
1006 } else if (r1.castle()) {
1007 assert(r2.castle());
1008 assert(build.cc.grey.allowCastle);
1009
1010 map<u32, u32> top_map;
1011 if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) {
1012 DEBUG_PRINTF("castle merge failed\n");
1013 return false;
1014 }
1015
1016 // The castle in r1 has been merged into the castle in r2, with tops
1017 // remapped as per top_map.
1018 const shared_ptr<CastleProto> &c = g[verts2.front()].left.castle;
1019 for (RoseVertex v : verts1) {
1020 g[v].left.castle = c;
1021 for (const auto &e : in_edges_range(v, g)) {
1022 g[e].rose_top = top_map.at(g[e].rose_top);
1023 }
1024 }
1025 return true;
1026 }
1027
1028 assert(0);
1029 return false;
1030}
1031
1032/**
1033 * Checks that there is no problem due to the involved vertices if we merge two
1034 * leftfix engines.
1035 *
1036 * This functions takes the vertices on the right of the two engines.
1037 *
1038 * Unlike mergeableRoseVertices(), this does not:
1039 * - check that engines themselves can be merged
1040 * - use heuristics to find out if merging the engines is wise.
1041 */
1042static
1043bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build,
1044 const vector<RoseVertex> &targets_1,
1045 const vector<RoseVertex> &targets_2) {
1046 assert(!targets_1.empty());
1047 assert(!targets_2.empty());
1048
1049 vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
1050 for (auto a : targets_1) {
1051 u32 ulag = build.g[a].left.lag;
1052 for (u32 id : build.g[a].literals) {
1053 ulits.emplace_back(&build.literals.at(id), ulag);
1054 }
1055 }
1056
1057 vector<pair<const rose_literal_id *, u32>> vlits;
1058 for (auto a : targets_2) {
1059 u32 vlag = build.g[a].left.lag;
1060 for (u32 id : build.g[a].literals) {
1061 vlits.emplace_back(&build.literals.at(id), vlag);
1062 }
1063 }
1064
1065 if (!compatibleLiteralsForMerge(ulits, vlits)) {
1066 return false;
1067 }
1068
1069 // Check preds are compatible as well.
1070 if (!checkPredDelays(build, targets_1, targets_2)
1071 || !checkPredDelays(build, targets_2, targets_1)) {
1072 return false;
1073 }
1074
1075 DEBUG_PRINTF("vertex sets are mergeable\n");
1076 return true;
1077}
1078
1079/**
1080 * In block mode, we want to be a little more selective -- we will only merge
1081 * prefix engines when the literal sets are the same or if the merged graph
1082 * has only grown by a small amount.
1083 */
1084static
1085bool goodBlockModeMerge(const RoseBuildImpl &build,
1086 const vector<RoseVertex> &u_verts, const left_id &u_eng,
1087 const vector<RoseVertex> &v_verts,
1088 const left_id &v_eng) {
1089 assert(!build.cc.streaming);
1090
1091 // Always merge infixes if we can (subject to the other criteria in
1092 // mergeableRoseVertices).
1093 if (!build.isRootSuccessor(u_verts.front())) {
1094 return true;
1095 }
1096
1097 const RoseGraph &g = build.g;
1098
1099 flat_set<u32> u_lits;
1100 for (RoseVertex u : u_verts) {
1101 insert(&u_lits, g[u].literals);
1102 }
1103
1104 flat_set<u32> v_lits;
1105 for (RoseVertex v : v_verts) {
1106 insert(&v_lits, g[v].literals);
1107 }
1108
1109 // Merge prefixes with identical literal sets (as we'd have to run them
1110 // both when we see those literals anyway).
1111 if (u_lits == v_lits) {
1112 return true;
1113 }
1114
1115 // The rest of this function only deals with the case when have graph
1116 // leftfixes.
1117 if (!u_eng.graph()) {
1118 return false;
1119 }
1120 assert(v_eng.graph());
1121 const NGHolder &ug = *u_eng.graph();
1122 const NGHolder &vg = *v_eng.graph();
1123
1124 size_t u_count = num_vertices(ug);
1125 size_t v_count = num_vertices(vg);
1126 DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n",
1127 u_count, v_count);
1128 if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES ||
1129 v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
1130 DEBUG_PRINTF("prefixes too big already\n");
1131 return false;
1132 }
1133
1134 DEBUG_PRINTF("trying merge\n");
1135 NGHolder h;
1136 cloneHolder(h, vg);
1137 if (!mergeNfaPair(ug, h, nullptr, build.cc)) {
1138 DEBUG_PRINTF("couldn't merge\n");
1139 return false;
1140 }
1141
1142 const size_t merged_count = num_vertices(h);
1143 DEBUG_PRINTF("merged result has %zu vertices\n", merged_count);
1144 if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
1145 DEBUG_PRINTF("exceeded limit\n");
1146 return false;
1147 }
1148
1149 // We want to only perform merges that take advantage of some
1150 // commonality in the two input graphs, so we check that the number of
1151 // vertices has only grown a small amount: somewhere between the sum
1152 // (no commonality) and the max (no growth at all) of the vertex counts
1153 // of the input graphs.
1154 size_t max_size = u_count + v_count;
1155 size_t min_size = max(u_count, v_count);
1156 size_t max_growth = ((max_size - min_size) * 25) / 100;
1157 if (merged_count > min_size + max_growth) {
1158 DEBUG_PRINTF("grew too much\n");
1159 return false;
1160 }
1161
1162 // We don't want to squander any chances at accelerating.
1163 if (!isAccelerableLeftfix(build, h)
1164 && (isAccelerableLeftfix(build, ug)
1165 || isAccelerableLeftfix(build, vg))) {
1166 DEBUG_PRINTF("would lose accel property\n");
1167 return false;
1168 }
1169
1170 DEBUG_PRINTF("safe to merge\n");
1171 return true;
1172}
1173
1174/**
1175 * Merge r1 into r2 if safe and appropriate. Returns true on success.
1176 */
1177static
1178bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1,
1179 const vector<RoseVertex> &targets_1,
1180 left_id &r2,
1181 const vector<RoseVertex> &targets_2) {
1182 if (targets_1.empty() || targets_2.empty()) {
1183 /* one of the engines has already been merged away */
1184 return false;
1185 }
1186
1187 assert(!r1.graph() == !r2.graph());
1188 if (r1.graph()) {
1189 NGHolder *h1 = r1.graph();
1190 NGHolder *h2 = r2.graph();
1191 CharReach stop1 = findStopAlphabet(*h1, SOM_NONE);
1192 CharReach stop2 = findStopAlphabet(*h2, SOM_NONE);
1193 CharReach stopboth = stop1 & stop2;
1194 DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n", stop1.count(),
1195 stop2.count(), stopboth.count());
1196 if (stopboth.count() < 10
1197 && (stop1.count() > 10 || stop2.count() > 10)) {
1198 DEBUG_PRINTF("skip merge, would kill stop alphabet\n");
1199 return false;
1200 }
1201 size_t maxstop = max(stop1.count(), stop2.count());
1202 if (maxstop > 200 && stopboth.count() < 200) {
1203 DEBUG_PRINTF("skip merge, would reduce stop alphabet\n");
1204 return false;
1205 }
1206 }
1207
1208 /* Rechecking that the targets are compatible, as we may have already
1209 * merged new states into r1 or r2 and we need to verify that this
1210 * candidate is still ok. */
1211 if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) {
1212 return false;
1213 }
1214
1215 if (!build.cc.streaming
1216 && !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) {
1217 return false;
1218 }
1219
1220 return mergeLeftfixPair(build, r1, r2, targets_1, targets_2);
1221}
1222
1223static
1224bool nfaHasNarrowStart(const NGHolder &g) {
1225 if (out_degree(g.startDs, g) > 1) {
1226 return false; // unanchored
1227 }
1228
1229 CharReach cr;
1230
1231 for (auto v : adjacent_vertices_range(g.start, g)) {
1232 if (v == g.startDs) {
1233 continue;
1234 }
1235 cr |= g[v].char_reach;
1236 }
1237 return cr.count() <= NARROW_START_MAX;
1238}
1239
1240static
1241bool nfaHasFiniteMaxWidth(const NGHolder &g) {
1242 return findMaxWidth(g).is_finite();
1243}
1244
1245static
1246bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) {
1247 if (!proper_out_degree(h.startDs, h)) {
1248 return false;
1249 }
1250
1251 assert(!is_triggered(h));
1252
1253 NGHolder h_temp;
1254 cloneHolder(h_temp, h);
1255
1256 vector<BoundedRepeatData> repeats;
1257 bool suitable_for_sds_reforming = false;
1258 const map<u32, u32> fixed_depth_tops; /* not relevant for cfa check */
1259 const map<u32, vector<vector<CharReach>>> triggers; /* not for cfa check */
1260 const bool simple_model_selection = true; // FIRST is considered simple
1261 analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true,
1262 simple_model_selection, grey, &suitable_for_sds_reforming);
1263
1264 return suitable_for_sds_reforming;
1265}
1266
1267static
1268u32 commonPrefixLength(left_id &r1, left_id &r2) {
1269 if (r1.graph() && r2.graph()) {
1270 return commonPrefixLength(*r1.graph(), *r2.graph());
1271 } else if (r1.castle() && r2.castle()) {
1272 return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle()));
1273 }
1274 return 0;
1275}
1276
1277namespace {
1278struct MergeKey {
1279 MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) :
1280 parents(std::move(parents_in)) {
1281
1282 // We want to distinguish prefixes (but not infixes) on whether they
1283 // have a narrow start or max width.
1284 if (left.graph() && !is_triggered(*left.graph())) {
1285 const NGHolder &h = *left.graph();
1286 narrowStart = nfaHasNarrowStart(h);
1287 hasMaxWidth = nfaHasFiniteMaxWidth(h);
1288 } else {
1289 narrowStart = false;
1290 hasMaxWidth = false;
1291 }
1292
1293 if (left.castle()) {
1294 /* castles should have a non-empty reach */
1295 assert(left.castle()->reach().any());
1296 castle_cr = left.castle()->reach();
1297 } else {
1298 assert(left.graph());
1299 }
1300 }
1301
1302 bool operator<(const MergeKey &b) const {
1303 const MergeKey &a = *this;
1304 ORDER_CHECK(narrowStart);
1305 ORDER_CHECK(hasMaxWidth);
1306 ORDER_CHECK(castle_cr);
1307 ORDER_CHECK(parents);
1308 return false;
1309 }
1310
1311 // NOTE: these two bool discriminators are only used for prefixes, not
1312 // infixes.
1313 bool narrowStart;
1314 bool hasMaxWidth;
1315 CharReach castle_cr; /* empty for graphs, reach (non-empty) for castles. */
1316
1317 flat_set<RoseVertex> parents;
1318};
1319}
1320
1321template <typename T>
1322static
1323void chunk(vector<T> in, vector<vector<T>> *out, size_t chunk_size) {
1324 if (in.size() <= chunk_size) {
1325 out->push_back(std::move(in));
1326 return;
1327 }
1328
1329 out->push_back(vector<T>());
1330 out->back().reserve(chunk_size);
1331 for (const auto &t : in) {
1332 if (out->back().size() >= chunk_size) {
1333 out->push_back(vector<T>());
1334 out->back().reserve(chunk_size);
1335 }
1336 out->back().push_back(std::move(t));
1337 }
1338}
1339
1340static
1341insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) {
1342 insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
1343 for (auto v : vertices_range(g)) {
1344 const auto &left = g[v].left;
1345 if (!left) {
1346 continue;
1347 }
1348 assert(contains(all_reports(left), left.leftfix_report));
1349 eng_verts[left].push_back(v);
1350 }
1351
1352 return eng_verts;
1353}
1354
1355/**
1356 * This pass attempts to merge prefix/infix engines which share a common set of
1357 * parent vertices.
1358 *
1359 * Engines are greedily merged pairwise by this process based on a priority
1360 * queue keyed off the common prefix length.
1361 *
1362 * Engines are not merged if the lags are not compatible or if it would damage
1363 * the stop alphabet.
1364 *
1365 * Infixes:
1366 * - It is expected that when this is run all infixes are still at the single
1367 * top stage as we have not yet merged unrelated infixes together. After
1368 * execution, castles may have multiple (but equivalent) tops.
1369 *
1370 * Prefixes:
1371 * - transient prefixes are not considered.
1372 * - with a max width or a narrow start are kept segregated by
1373 * this phase and can only be merged with similar infixes.
1374 * - in block mode, merges are only performed if literal sets are the same.
1375 * - merges are not considered in cases where dot star start state will be
1376 * reformed to optimise a leading repeat.
1377 */
1378void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
1379 if (!build.cc.grey.mergeRose) {
1380 return;
1381 }
1382 assert(!hasOrphanedTops(build));
1383
1384 RoseGraph &g = build.g;
1385
1386 DEBUG_PRINTF("-----\n");
1387 DEBUG_PRINTF("entry\n");
1388 DEBUG_PRINTF("-----\n");
1389
1390 auto eng_verts = get_eng_verts(g);
1391
1392 map<MergeKey, vector<left_id>> engine_groups;
1393 for (const auto &e : eng_verts) {
1394 const left_id &left = e.first;
1395 const auto &verts = e.second;
1396 // Only non-transient for the moment.
1397 if (contains(build.transient, left)) {
1398 continue;
1399 }
1400
1401 // No forced McClellan or Haig infix merges.
1402 if (left.dfa() || left.haig()) {
1403 continue;
1404 }
1405 assert(left.graph() || left.castle());
1406
1407 if (left.graph()) {
1408 const NGHolder &h = *left.graph();
1409 /* we should not have merged yet */
1410 assert(!is_triggered(h) || onlyOneTop(h));
1411
1412 if (hasReformedStartDotStar(h, build.cc.grey)) {
1413 continue; // preserve the optimisation of the leading repeat
1414 }
1415 } else {
1416 assert(left.castle());
1417
1418 if (!build.cc.grey.allowCastle) {
1419 DEBUG_PRINTF("castle merging disallowed by greybox\n");
1420 continue;
1421 }
1422 }
1423
1424 // We collapse the anchored root into the root vertex when calculating
1425 // parents, so that we can merge differently-anchored prefix roses
1426 // together. (Prompted by UE-2100)
1427
1428 flat_set<RoseVertex> parents;
1429 for (RoseVertex v : verts) {
1430 insert(&parents, inv_adjacent_vertices_range(v, g));
1431 }
1432
1433 if (contains(parents, build.anchored_root)) {
1434 parents.erase(build.anchored_root);
1435 parents.insert(build.root);
1436 }
1437
1438 assert(!parents.empty());
1439
1440#ifndef _WIN32
1441 engine_groups[MergeKey(left, parents)].push_back(left);
1442#else
1443 // On windows, when passing MergeKey object into map 'engine_groups',
1444 // it will not be copied, but will be freed along with
1445 // engine_groups.clear().
1446 // If we construct MergeKey object on the stack, it will be destructed
1447 // on its life cycle ending, then on engine_groups.clear(), which
1448 // will cause is_block_type_valid() assertion error in MergeKey
1449 // destructor.
1450 MergeKey *mk = new MergeKey(left, parents);
1451 engine_groups[*mk].push_back(left);
1452#endif
1453 }
1454
1455 vector<vector<left_id>> chunks;
1456 for (auto &raw_group : engine_groups | map_values) {
1457 chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX);
1458 }
1459 engine_groups.clear();
1460
1461 DEBUG_PRINTF("chunked roses into %zu groups\n", chunks.size());
1462
1463 for (auto &roses : chunks) {
1464 if (roses.size() < 2) {
1465 continue;
1466 }
1467 // All pairs on the prio queue.
1468 u32 tie_breaker = 0;
1469 priority_queue<RoseMergeCandidate> pq;
1470 for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) {
1471 left_id r1 = *it;
1472 const vector<RoseVertex> &targets_1 = eng_verts[r1];
1473
1474 for (auto jt = next(it); jt != ite; ++jt) {
1475 left_id r2 = *jt;
1476
1477 /* we should have already split on engine types and reach */
1478 assert(!r1.castle() == !r2.castle());
1479 assert(!r1.graph() == !r2.graph());
1480 assert(!r1.castle()
1481 || r1.castle()->reach() == r2.castle()->reach());
1482
1483 const vector<RoseVertex> &targets_2 = eng_verts[r2];
1484 if (!checkVerticesOkForLeftfixMerge(build, targets_1,
1485 targets_2)) {
1486 continue; // No point queueing unmergeable cases.
1487 }
1488
1489 u32 cpl = commonPrefixLength(r1, r2);
1490 pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++));
1491 }
1492 }
1493
1494 DEBUG_PRINTF("merge queue has %zu entries\n", pq.size());
1495
1496 while (!pq.empty()) {
1497 left_id r1 = pq.top().r1;
1498 left_id r2 = pq.top().r2;
1499 DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n",
1500 r1.graph(), r2.graph(), pq.top().cpl, pq.top().states);
1501 pq.pop();
1502 vector<RoseVertex> &targets_1 = eng_verts[r1];
1503 vector<RoseVertex> &targets_2 = eng_verts[r2];
1504 if (mergeLeftVL_tryMergeCandidate(build, r1, targets_1, r2,
1505 targets_2)) {
1506 insert(&targets_2, targets_2.end(), targets_1);
1507 targets_1.clear();
1508 }
1509 }
1510 }
1511
1512 DEBUG_PRINTF("-----\n");
1513 DEBUG_PRINTF("exit\n");
1514 DEBUG_PRINTF("-----\n");
1515 assert(!hasOrphanedTops(build));
1516}
1517
1518namespace {
1519
1520/**
1521 * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path.
1522 */
1523struct DedupeLeftKey {
1524 DedupeLeftKey(const RoseBuildImpl &build,
1525 flat_set<pair<size_t, u32>> preds_in, const left_id &left)
1526 : left_hash(hashLeftfix(left)), preds(move(preds_in)),
1527 transient(contains(build.transient, left)) {
1528 }
1529
1530 bool operator<(const DedupeLeftKey &b) const {
1531 return tie(left_hash, preds, transient)
1532 < tie(b.left_hash, b.preds, b.transient);
1533 }
1534
1535private:
1536 /** Quick hash of the leftfix itself. Must be identical for a given pair of
1537 * graphs if is_equal would return true. */
1538 size_t left_hash;
1539
1540 /** For each in-edge, the pair of (parent index, edge top). */
1541 flat_set<pair<size_t, u32>> preds;
1542
1543 /** We don't want to combine transient with non-transient. */
1544 bool transient;
1545};
1546
1547} // namespace
1548
1549static
1550flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) {
1551 flat_set<pair<size_t, u32>> preds;
1552 for (const auto &e : in_edges_range(v, g)) {
1553 preds.emplace(g[source(e, g)].index, g[e].rose_top);
1554 }
1555 return preds;
1556}
1557
1558/**
1559 * This is a generalisation of \ref dedupeLeftfixes which relaxes two
1560 * restrictions: multiple predecessor roles are allowed and the delay used by
1561 * each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes,
1562 * the leftfixes' successor vertices are first grouped to reduce the number of
1563 * potential candidates - the grouping in this case is by the set of
1564 * predecessor roles with their associated top events. For the dedupe to be
1565 * possible, it is required that:
1566 *
1567 * 1. the nfa graphs with respect to the relevant reports are identical
1568 * 2. the nfa graphs are triggered by the same roles with same events (ensured
1569 * by the initial grouping pass)
1570 * 3. all the successor roles of either graph can inspect the combined leftfix
1571 * without advancing the state of the leftfix past the point that another
1572 * successor may want to inspect it; the overlap relationships between the
1573 * involved literals are examined to ensure that this property holds.
1574 *
1575 * Note: this is unable to dedupe when delayed literals are involved unlike
1576 * dedupeLeftfixes.
1577 */
1578void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
1579 DEBUG_PRINTF("entry\n");
1580
1581 RoseGraph &g = build.g;
1582 auto eng_verts = get_eng_verts(g);
1583
1584 map<DedupeLeftKey, vector<left_id>> engine_groups;
1585 for (const auto &e : eng_verts) {
1586 const left_id &left = e.first;
1587 const auto &verts = e.second;
1588
1589 /* There should only be one report on an engine as no merges have
1590 * happened yet. (aside from eod prefixes) */
1591 if (all_reports(left).size() != 1) {
1592 assert(any_of_in(adjacent_vertices_range(verts.front(), g),
1593 [&](RoseVertex w) { return g[w].eod_accept; }));
1594 continue;
1595 }
1596
1597 if (left.haig()) {
1598 /* TODO: allow deduping of identical haigs */
1599 continue;
1600 }
1601
1602 if (left.graph()) {
1603 /* we should not have merged yet */
1604 assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph()));
1605 }
1606
1607 auto preds = get_pred_tops(verts.front(), g);
1608 for (RoseVertex v : verts) {
1609 if (preds != get_pred_tops(v, g)) {
1610 DEBUG_PRINTF("distinct pred sets\n");
1611 continue;
1612 }
1613 }
1614 engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left);
1615 }
1616
1617 /* We don't bother chunking as we expect deduping to be successful if the
1618 * hashes match */
1619
1620 for (auto &group : engine_groups | map_values) {
1621 DEBUG_PRINTF("group of %zu roses\n", group.size());
1622
1623 if (group.size() < 2) {
1624 continue;
1625 }
1626
1627 for (auto it = group.begin(); it != group.end(); ++it) {
1628 left_id r1 = *it;
1629 vector<RoseVertex> &verts1 = eng_verts[r1];
1630 assert(!verts1.empty()); /* cleared engines should be behind us */
1631
1632 assert(all_reports(r1).size() == 1);
1633 ReportID r1_report = *all_reports(r1).begin();
1634
1635 for (auto jt = next(it); jt != group.end(); ++jt) {
1636 left_id r2 = *jt;
1637 vector<RoseVertex> &verts2 = eng_verts[r2];
1638 assert(!verts2.empty());
1639 assert(all_reports(r2).size() == 1);
1640 ReportID r2_report = *all_reports(r2).begin();
1641
1642 if (!is_equal(r1, r1_report, r2, r2_report)) {
1643 continue;
1644 }
1645
1646 if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) {
1647 continue;
1648 }
1649
1650 DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph());
1651
1652 // Replace r1 with r2.
1653
1654 for (auto v : verts1) {
1655 DEBUG_PRINTF("replacing report %u with %u on %zu\n",
1656 r2_report, r1_report, g[v].index);
1657 u32 orig_lag = g[v].left.lag;
1658 g[v].left = g[verts2.front()].left;
1659 g[v].left.lag = orig_lag;
1660 }
1661
1662 insert(&verts2, verts2.end(), verts1);
1663 verts1.clear();
1664
1665 /* remove stale entry from transient set, if present */
1666 build.transient.erase(r1);
1667
1668 break;
1669 }
1670 }
1671 }
1672}
1673
1674static
1675u32 findUnusedTop(const flat_set<u32> &tops) {
1676 u32 i = 0;
1677 while (contains(tops, i)) {
1678 i++;
1679 }
1680 return i;
1681}
1682
1683// Replace top 't' on edges with new top 'u'.
1684static
1685void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) {
1686 for (const auto &e : out_edges_range(h.start, h)) {
1687 NFAVertex v = target(e, h);
1688 if (v == h.startDs) {
1689 continue;
1690 }
1691 flat_set<u32> new_tops;
1692 for (u32 t : h[e].tops) {
1693 DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t);
1694 new_tops.insert(top_mapping.at(t));
1695 }
1696 h[e].tops = std::move(new_tops);
1697 }
1698}
1699
1700static
1701bool setDistinctTops(NGHolder &h1, const NGHolder &h2,
1702 map<u32, u32> &top_mapping) {
1703 flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2);
1704
1705 DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(),
1706 tops2.size());
1707
1708 // If our tops don't intersect, we're OK to merge with no changes.
1709 if (!has_intersection(tops1, tops2)) {
1710 DEBUG_PRINTF("tops don't intersect\n");
1711 return true;
1712 }
1713
1714 // Otherwise, we have to renumber the tops in h1 so that they don't overlap
1715 // with the tops in h2.
1716 top_mapping.clear();
1717 for (u32 t : tops1) {
1718 u32 u = findUnusedTop(tops2);
1719 DEBUG_PRINTF("replacing top %u with %u in h1\n", t, u);
1720 top_mapping.insert(make_pair(t, u));
1721 assert(!contains(tops2, u));
1722 tops2.insert(u);
1723 }
1724
1725 replaceTops(h1, top_mapping);
1726 return true;
1727}
1728
1729bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
1730 const deque<RoseVertex> &verts1) {
1731 map<u32, u32> top_mapping;
1732 if (!setDistinctTops(h1, h2, top_mapping)) {
1733 return false;
1734 }
1735
1736 if (top_mapping.empty()) {
1737 return true; // No remapping necessary.
1738 }
1739
1740 for (auto v : verts1) {
1741 DEBUG_PRINTF("vertex %zu\n", g[v].index);
1742 assert(!g[v].left.haig);
1743 assert(!g[v].left.dfa);
1744 for (const auto &e : in_edges_range(v, g)) {
1745 u32 t = g[e].rose_top;
1746 DEBUG_PRINTF("t=%u\n", t);
1747 assert(contains(top_mapping, t));
1748 g[e].rose_top = top_mapping[t];
1749 DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n",
1750 g[source(e, g)].index, g[target(e, g)].index, t,
1751 top_mapping[t]);
1752 }
1753 }
1754
1755 return true;
1756}
1757
1758static
1759bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
1760 const deque<RoseVertex> &verts1) {
1761 map<u32, u32> top_mapping;
1762 if (!setDistinctTops(h1, h2, top_mapping)) {
1763 return false;
1764 }
1765
1766 if (top_mapping.empty()) {
1767 return true; // No remapping necessary.
1768 }
1769
1770 for (auto v : verts1) {
1771 DEBUG_PRINTF("vertex %zu\n", g[v].index);
1772 u32 t = g[v].suffix.top;
1773 assert(contains(top_mapping, t));
1774 g[v].suffix.top = top_mapping[t];
1775 }
1776
1777 return true;
1778}
1779
1780/** \brief Estimate the number of accel states in the given graph when built as
1781 * an NFA.
1782 *
1783 * (The easiest way to estimate something like this is to actually build it:
1784 * the criteria for NFA acceleration are quite complicated and buried in
1785 * limex_compile.)
1786 */
1787static
1788u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) {
1789 return countAccelStates(h, &tbi.rm, tbi.cc);
1790}
1791
1792static
1793void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) {
1794 RoseGraph &g = tbi.g;
1795 DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size());
1796
1797 // We track the number of accelerable states for each graph in a map and
1798 // only recompute them when the graph is modified.
1799 unordered_map<left_id, u32> accel_count;
1800 for (const auto &rose : roses) {
1801 assert(rose.graph()->kind == NFA_INFIX);
1802 accel_count[rose] = estimatedAccelStates(tbi, *rose.graph());
1803 }
1804
1805 for (auto it = roses.begin(); it != roses.end(); ++it) {
1806 left_id r1 = *it;
1807 const deque<RoseVertex> &verts1 = roses.vertices(r1);
1808
1809 deque<left_id> merged;
1810 for (auto jt = next(it); jt != roses.end(); ++jt) {
1811 left_id r2 = *jt;
1812 const deque<RoseVertex> &verts2 = roses.vertices(r2);
1813
1814 DEBUG_PRINTF("consider merging rose %p (%zu verts) "
1815 "with %p (%zu verts)\n",
1816 r1.graph(), verts1.size(), r2.graph(), verts2.size());
1817
1818 u32 accel1 = accel_count[r1];
1819 if (accel1 >= NFA_MAX_ACCEL_STATES) {
1820 DEBUG_PRINTF("h1 has hit max accel\n");
1821 break; // next h1
1822 }
1823
1824 u32 accel2 = accel_count[r2];
1825 if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
1826 DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
1827 "accel2=%u)\n",
1828 accel1, accel2);
1829 continue; // next h2
1830 }
1831
1832 if (!mergeableRoseVertices(tbi, verts1, verts2)) {
1833 DEBUG_PRINTF("not mergeable\n");
1834 continue; // next h2
1835 }
1836
1837 // Attempt to merge h2 into h1.
1838
1839 NGHolder victim;
1840 cloneHolder(victim, *r2.graph());
1841
1842 // Store a copy of the in-edge properties in case we have to roll
1843 // back.
1844 map<RoseEdge, RoseEdgeProps> edge_props;
1845 for (auto v : verts2) {
1846 for (const auto &e : in_edges_range(v, g)) {
1847 edge_props[e] = g[e];
1848 }
1849 }
1850
1851 if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) {
1852 DEBUG_PRINTF("can't set distinct tops\n");
1853 continue; // next h2
1854 }
1855
1856 assert(victim.kind == r1.graph()->kind);
1857 assert(!generates_callbacks(*r1.graph()));
1858 if (!mergeNfaPair(victim, *r1.graph(), nullptr, tbi.cc)) {
1859 DEBUG_PRINTF("merge failed\n");
1860 // Roll back in-edge properties.
1861 for (const auto &m : edge_props) {
1862 g[m.first] = m.second;
1863 }
1864 continue; // next h2
1865 }
1866
1867 // Update h2's roses to point to h1 now
1868 shared_ptr<NGHolder> winner = g[verts1.front()].left.graph;
1869 for (auto v : verts2) {
1870 g[v].left.graph = winner;
1871 }
1872 roses.insert(r1, verts2);
1873
1874 merged.push_back(r2);
1875
1876 if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) {
1877 DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
1878 num_vertices(*winner));
1879 break; // next h1
1880 }
1881
1882 // Update h1's accel count estimate.
1883 accel_count[r1] = estimatedAccelStates(tbi, *winner);
1884 }
1885
1886 DEBUG_PRINTF("%zu roses merged\n", merged.size());
1887 roses.erase_all(merged.begin(), merged.end());
1888 }
1889}
1890
1891/**
1892 * This pass attempts to merge prefix/infix engines with a small number of
1893 * vertices together into larger engines. The engines must not be have a
1894 * reformed start dot star (due to a leading repeat) nor an infix LBR. Engines
1895 * that have compatible lag are greedily grouped such that they remain
1896 * accelerable and only have a small number of states. Note: if a role has an
1897 * infix with multiple trigger vertices, the role will be left unchanged by this
1898 * pass and will remain using an unmerged graph.
1899 */
1900void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
1901 DEBUG_PRINTF("entry\n");
1902
1903 if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses) {
1904 return;
1905 }
1906
1907 RoseGraph &g = tbi.g;
1908
1909 LeftfixBouquet nfa_leftfixes;
1910
1911 for (auto v : vertices_range(g)) {
1912 if (!g[v].left) {
1913 continue;
1914 }
1915
1916 // Handle single-parent infixes only.
1917 if (tbi.isRootSuccessor(v)) {
1918 continue;
1919 }
1920
1921 left_id left(g[v].left);
1922
1923 // Only non-transient for the moment.
1924 if (contains(tbi.transient, left)) {
1925 continue;
1926 }
1927
1928 // No DFAs or Haigs right now.
1929 if (left.dfa() || left.haig()) {
1930 continue;
1931 }
1932
1933 // Castles are handled by a different pass.
1934 if (left.castle()) {
1935 continue;
1936 }
1937
1938 assert(left.graph());
1939 NGHolder &h = *left.graph();
1940
1941 /* Ensure that kind on the graph is correct */
1942 assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX));
1943
1944 if (hasReformedStartDotStar(h, tbi.cc.grey)) {
1945 /* We would lose optimisations of the leading repeat by merging. */
1946 continue;
1947 }
1948
1949 // Small roses only.
1950 if (num_vertices(h) > small_rose_threshold(tbi.cc)) {
1951 continue;
1952 }
1953
1954 nfa_leftfixes.insert(left, v);
1955 }
1956
1957 deque<LeftfixBouquet> leftfix_groups;
1958 chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX);
1959 nfa_leftfixes.clear();
1960 DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n",
1961 leftfix_groups.size());
1962
1963 for (auto &group : leftfix_groups) {
1964 mergeNfaLeftfixes(tbi, group);
1965 }
1966}
1967
1968static
1969void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands,
1970 insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) {
1971 /* caller must have already ensured that candidates have the same reach */
1972 RoseGraph &g = build.g;
1973 DEBUG_PRINTF("%zu castle leftfix merge candidates\n", cands.size());
1974
1975 for (auto it = cands.begin(); it != cands.end(); ++it) {
1976 left_id &cand_1 = *it;
1977 vector<RoseVertex> &verts_1 = eng_verts[cand_1];
1978 if (verts_1.empty()) {
1979 continue;
1980 }
1981
1982 for (auto jt = next(it); jt != cands.end(); ++jt) {
1983 const left_id &cand_2 = *jt;
1984 vector<RoseVertex> &verts_2 = eng_verts[cand_2];
1985 if (verts_2.empty()) {
1986 continue;
1987 }
1988
1989 assert(cand_1.castle()->reach() == cand_2.castle()->reach());
1990
1991 if (!checkVerticesOkForLeftfixMerge(build, verts_1, verts_2)) {
1992 DEBUG_PRINTF("not mergeable\n");
1993 continue; // next cand_2
1994 }
1995
1996 DEBUG_PRINTF("castle1=%p (size %zu)\n", cand_1.castle(),
1997 cand_1.castle()->repeats.size());
1998 DEBUG_PRINTF("castle2=%p (size %zu)\n", cand_2.castle(),
1999 cand_2.castle()->repeats.size());
2000
2001 map<u32, u32> top_map;
2002 if (!mergeCastle(*cand_1.castle(), *cand_2.castle(), top_map)) {
2003 DEBUG_PRINTF("couldn't merge\n");
2004 continue; // next cand_2
2005 }
2006
2007 // Update castle2's roses to point to castle1 now.
2008 shared_ptr<CastleProto> winner = g[verts_1.front()].left.castle;
2009 for (auto v : verts_2) {
2010 assert(g[v].left.castle.get() == cand_2.castle());
2011 g[v].left.castle = winner;
2012 for (const auto &e : in_edges_range(v, g)) {
2013 g[e].rose_top = top_map.at(g[e].rose_top);
2014 }
2015 }
2016
2017 insert(&verts_1, verts_1.end(), verts_2);
2018 verts_2.clear();
2019 }
2020 }
2021}
2022
2023/**
2024 * Merges castles with the same reach together regardless of where in the rose
2025 * graph they are. Note: there is no requirement for the castles to have common
2026 * parent or target vertices.
2027 *
2028 * There are no heuristics for reducing block mode merges as castle speed
2029 * mainly depends on the reach being scanned.
2030 */
2031void mergeCastleLeftfixes(RoseBuildImpl &build) {
2032 DEBUG_PRINTF("entry\n");
2033
2034 if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses
2035 || !build.cc.grey.allowCastle) {
2036 return;
2037 }
2038
2039 RoseGraph &g = build.g;
2040
2041 insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
2042
2043 for (auto v : vertices_range(g)) {
2044 if (!g[v].left.castle) {
2045 continue;
2046 }
2047
2048 // Handle infixes only.
2049 if (build.isRootSuccessor(v)) {
2050 continue;
2051 }
2052
2053 eng_verts[g[v].left].push_back(v);
2054 }
2055
2056 map<CharReach, vector<left_id>> by_reach;
2057 for (const auto &left : eng_verts | map_keys) {
2058 by_reach[left.castle()->reach()].push_back(left);
2059 }
2060
2061 vector<vector<left_id>> chunks;
2062 for (auto &raw_group : by_reach | map_values) {
2063 chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX);
2064 }
2065 by_reach.clear();
2066
2067 DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size());
2068
2069 for (auto &chunk : chunks) {
2070 mergeCastleChunk(build, chunk, eng_verts);
2071 }
2072}
2073
2074static
2075void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes,
2076 const bool acyclic) {
2077 RoseGraph &g = tbi.g;
2078
2079 DEBUG_PRINTF("group has %zu suffixes\n", suffixes.size());
2080
2081 // If this isn't an acyclic case, we track the number of accelerable states
2082 // for each graph in a map and only recompute them when the graph is
2083 // modified.
2084 unordered_map<suffix_id, u32> accel_count;
2085 if (!acyclic) {
2086 for (const auto &suffix : suffixes) {
2087 assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX);
2088 accel_count[suffix] = estimatedAccelStates(tbi, *suffix.graph());
2089 }
2090 }
2091
2092 for (auto it = suffixes.begin(); it != suffixes.end(); ++it) {
2093 suffix_id s1 = *it;
2094 const deque<RoseVertex> &verts1 = suffixes.vertices(s1);
2095 assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX);
2096
2097 // Caller should ensure that we don't propose merges of graphs that are
2098 // already too big.
2099 assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc));
2100
2101 deque<suffix_id> merged;
2102 for (auto jt = next(it); jt != suffixes.end(); ++jt) {
2103 suffix_id s2 = *jt;
2104 const deque<RoseVertex> &verts2 = suffixes.vertices(s2);
2105 assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX);
2106
2107 if (!acyclic) {
2108 u32 accel1 = accel_count[s1];
2109 if (accel1 >= NFA_MAX_ACCEL_STATES) {
2110 DEBUG_PRINTF("h1 has hit max accel\n");
2111 break; // next h1
2112 }
2113
2114 u32 accel2 = accel_count[s2];
2115 if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
2116 DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
2117 "accel2=%u)\n",
2118 accel1, accel2);
2119 continue; // next h2
2120 }
2121 }
2122
2123 // Attempt to merge h2 into h1.
2124
2125 NGHolder victim;
2126 cloneHolder(victim, *s2.graph());
2127
2128 // Store a copy of the suffix tops in case we have to roll back.
2129 map<RoseVertex, u32> old_tops;
2130 for (auto v : verts2) {
2131 old_tops[v] = g[v].suffix.top;
2132 }
2133
2134 if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) {
2135 DEBUG_PRINTF("can't set distinct tops\n");
2136 continue; // next h2
2137 }
2138
2139 if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) {
2140 DEBUG_PRINTF("merge failed\n");
2141 // Roll back in-edge properties.
2142 for (const auto &m : old_tops) {
2143 g[m.first].suffix.top = m.second;
2144 }
2145 continue; // next h2
2146 }
2147
2148 // Update h2's roses to point to h1 now
2149 shared_ptr<NGHolder> winner = g[verts1.front()].suffix.graph;
2150 for (auto v : verts2) {
2151 g[v].suffix.graph = winner;
2152 }
2153 suffixes.insert(s1, verts2);
2154 merged.push_back(s2);
2155
2156 if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) {
2157 DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
2158 num_vertices(*s1.graph()));
2159 break; // next h1
2160 }
2161
2162 if (!acyclic) {
2163 // Update h1's accel count estimate.
2164 accel_count[s1] = estimatedAccelStates(tbi, *s1.graph());
2165 }
2166 }
2167
2168 DEBUG_PRINTF("%zu suffixes merged\n", merged.size());
2169 suffixes.erase_all(merged.begin(), merged.end());
2170 }
2171}
2172
2173/**
2174 * This merge pass combines suffixes from unrelated roles into a single
2175 * suffix with multiple top events in order to distinguish the triggers
2176 * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
2177 * while mergeSmallSuffixes only considers small suffixes. The merges will
2178 * group roles with suffixes in the graph into clusters of at most
2179 * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
2180 * suffixes and attempting to pairwise merge it with another member. Merges
2181 * will fail if the result is not implementable, requires too many distinct top
2182 * events, or if it losses the ability to be accelerated. The merge will modify
2183 * the existing suffix graph of the one member (g1), the other member updates
2184 * it graph to refer to g1 instead of its previous graph (g2) and use the new
2185 * tops created. Other roles may have been sharing g1 - these are unaffected by
2186 * the change as the existing top events are left untouched. Other roles using
2187 * g2 are also unaffected as g2 will continue to exist until while it has any
2188 * roles triggering it.
2189 *
2190 * Note: suffixes destined for the LBR are not considered for these merges as
2191 * the LBR can only handle a single repeat and this type of repeat is ideally
2192 * handled outside of an NFA or DFA.
2193 */
2194void mergeAcyclicSuffixes(RoseBuildImpl &tbi) {
2195 DEBUG_PRINTF("entry\n");
2196
2197 if (!tbi.cc.grey.mergeSuffixes) {
2198 return;
2199 }
2200
2201 SuffixBouquet suffixes;
2202
2203 RoseGraph &g = tbi.g;
2204
2205 for (auto v : vertices_range(g)) {
2206 shared_ptr<NGHolder> h = g[v].suffix.graph;
2207 if (!h || tbi.isInETable(v)) {
2208 continue;
2209 }
2210
2211 assert(!g[v].suffix.haig);
2212
2213 if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) {
2214 continue;
2215 }
2216
2217 if (!isAcyclic(*h)) {
2218 continue;
2219 }
2220
2221 suffixes.insert(g[v].suffix, v);
2222 }
2223
2224 deque<SuffixBouquet> suff_groups;
2225 chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
2226 DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
2227 suff_groups.size());
2228 suffixes.clear();
2229
2230 for (auto &group : suff_groups) {
2231 mergeSuffixes(tbi, group, true);
2232 }
2233}
2234
2235/**
2236 * This merge pass combines suffixes from unrelated roles into a single
2237 * suffix with multiple top events in order to distinguish the triggers
2238 * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
2239 * while mergeSmallSuffixes only considers small suffixes. The merges will
2240 * group roles with suffixes in the graph into clusters of at most
2241 * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
2242 * suffixes and attempting to pairwise merge it with another member. Merges
2243 * will fail if the result is not implementable, requires too many distinct top
2244 * events, or if it losses the ability to be accelerated. The merge will modify
2245 * the existing suffix graph of the one member (g1), the other member updates
2246 * it graph to refer to g1 instead of its previous graph (g2) and use the new
2247 * tops created. Other roles may have been sharing g1 - these are unaffected by
2248 * the change as the existing top events are left untouched. Other roles using
2249 * g2 are also unaffected as g2 will continue to exist until while it has any
2250 * roles triggering it.
2251 *
2252 * Note: suffixes destined for the LBR are not considered for these merges as
2253 * the LBR can only handle a single repeat and this type of repeat is ideally
2254 * handled outside of an NFA or DFA.
2255 */
2256void mergeSmallSuffixes(RoseBuildImpl &tbi) {
2257 DEBUG_PRINTF("entry\n");
2258
2259 if (!tbi.cc.grey.mergeSuffixes) {
2260 return;
2261 }
2262
2263 RoseGraph &g = tbi.g;
2264 SuffixBouquet suffixes;
2265
2266 for (auto v : vertices_range(g)) {
2267 shared_ptr<NGHolder> h = g[v].suffix.graph;
2268 if (!h || tbi.isInETable(v)) {
2269 continue;
2270 }
2271 assert(!g[v].suffix.haig);
2272
2273 // Leave acyclics out for the moment.
2274 if (isAcyclic(*h)) {
2275 continue;
2276 }
2277
2278 // Small-ish suffixes only.
2279 if (num_vertices(*h) > 32) {
2280 continue;
2281 }
2282
2283 suffixes.insert(g[v].suffix, v);
2284 }
2285
2286 deque<SuffixBouquet> suff_groups;
2287 chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
2288 DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
2289 suff_groups.size());
2290 suffixes.clear();
2291
2292 for (auto &group : suff_groups) {
2293 mergeSuffixes(tbi, group, false);
2294 }
2295}
2296
2297static
2298void removeDeadOutfixes(vector<OutfixInfo> &outfixes) {
2299 auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); };
2300 outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead),
2301 end(outfixes));
2302}
2303
2304static
2305void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) {
2306 assert(!winner.is_dead());
2307
2308 winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth);
2309 winner.minWidth = min(winner.minWidth, victim.minWidth);
2310 winner.maxWidth = max(winner.maxWidth, victim.maxWidth);
2311 winner.maxOffset = max(winner.maxOffset, victim.maxOffset);
2312 mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info);
2313
2314 // This outfix can be ignored in small block mode if both were. The dedupe
2315 // layer at runtime will protect us from extra matches if only one was in
2316 // the small block matcher.
2317 winner.in_sbmatcher &= victim.in_sbmatcher;
2318}
2319
2320static
2321map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build,
2322 const vector<NGHolder *> &nfas) {
2323 map<NGHolder *, NGHolder *> merged;
2324
2325 vector<NGHolder *> batch;
2326 for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) {
2327 batch.push_back(*it);
2328 assert((*it)->kind == NFA_OUTFIX);
2329 if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) {
2330 auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc);
2331 insert(&merged, batch_merged);
2332 batch.clear();
2333 }
2334 }
2335
2336 return merged;
2337}
2338
2339static
2340void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) {
2341 DEBUG_PRINTF("merging %zu nfas\n", nfas.size());
2342 if (nfas.size() < 2) {
2343 return;
2344 }
2345
2346 vector<OutfixInfo> &outfixes = tbi.outfixes;
2347
2348 map<NGHolder *, size_t> nfa_mapping;
2349 for (size_t i = 0; i < outfixes.size(); i++) {
2350 auto *holder = outfixes[i].holder();
2351 if (holder) {
2352 nfa_mapping[holder] = i;
2353 }
2354 }
2355
2356 map<NGHolder *, NGHolder *> merged = chunkedNfaMerge(tbi, nfas);
2357 if (merged.empty()) {
2358 return;
2359 }
2360
2361 DEBUG_PRINTF("%zu nfas merged\n", merged.size());
2362
2363 // Update the outfix info for merged holders.
2364 for (const auto &m : merged) {
2365 OutfixInfo &victim = outfixes.at(nfa_mapping[m.first]);
2366 OutfixInfo &winner = outfixes.at(nfa_mapping[m.second]);
2367 mergeOutfixInfo(winner, victim);
2368 victim.clear();
2369 }
2370
2371 removeDeadOutfixes(outfixes);
2372}
2373
2374namespace {
2375struct MergeMcClellan {
2376 MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in)
2377 : rm(rm_in), grey(grey_in) {}
2378
2379 unique_ptr<raw_dfa> operator()(const raw_dfa *d1, const raw_dfa *d2) const {
2380 assert(d1 && d2);
2381 return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey);
2382 }
2383
2384private:
2385 const ReportManager &rm;
2386 const Grey &grey;
2387};
2388
2389struct MergeHaig {
2390 explicit MergeHaig(u32 limit_in) : limit(limit_in) {}
2391
2392 unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1,
2393 const raw_som_dfa *d2) const {
2394 assert(d1 && d2);
2395 return attemptToMergeHaig({d1, d2}, limit);
2396 }
2397
2398private:
2399 const u32 limit; //!< state limit for merged result.
2400};
2401}
2402
2403/**
2404 * Generic pairwise merge algorithm that can be used for either McClellan
2405 * (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge
2406 * operation to a merge functor, which allows the caller to set some policy
2407 * (state limits, etc).
2408 *
2409 * This is currently astonishingly simple and just considers every pair of
2410 * DFAs, slow and steady. We may wish to actually apply a merge ordering
2411 * strategy in the future.
2412 */
2413template<class RawDfa, class MergeFunctor>
2414static
2415void pairwiseDfaMerge(vector<RawDfa *> &dfas,
2416 unordered_map<RawDfa *, size_t> &dfa_mapping,
2417 vector<OutfixInfo> &outfixes,
2418 MergeFunctor merge_func) {
2419 DEBUG_PRINTF("merging group of size %zu\n", dfas.size());
2420
2421 for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) {
2422 if (!*it) {
2423 continue;
2424 }
2425 for (auto jt = next(it); jt != ite; ++jt) {
2426 if (!*jt) {
2427 continue;
2428 }
2429
2430 DEBUG_PRINTF("try merge %p and %p\n", *it, *jt);
2431 unique_ptr<RawDfa> rdfa = merge_func(*it, *jt);
2432 if (!rdfa) {
2433 continue; // Merge failed.
2434 }
2435
2436 DEBUG_PRINTF("merge succeeded, built %p\n", rdfa.get());
2437 OutfixInfo &winner = outfixes.at(dfa_mapping[*it]);
2438 OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]);
2439 assert(!winner.is_dead() && !victim.is_dead());
2440
2441 RawDfa *dfa_ptr = rdfa.get();
2442 dfa_mapping[dfa_ptr] = dfa_mapping[*it];
2443 dfa_mapping.erase(*it);
2444 winner.proto = move(rdfa);
2445
2446 mergeOutfixInfo(winner, victim);
2447
2448 victim.clear();
2449 *jt = nullptr; // to be deleted.
2450 *it = dfa_ptr;
2451 }
2452 }
2453}
2454
2455template<class RawDfa, class MergeFunctor>
2456static
2457void chunkedDfaMerge(vector<RawDfa *> &dfas,
2458 unordered_map<RawDfa *, size_t> &dfa_mapping,
2459 vector<OutfixInfo> &outfixes,
2460 MergeFunctor merge_func) {
2461 DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size());
2462
2463 vector<RawDfa *> out_dfas;
2464 vector<RawDfa *> chunk;
2465 for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) {
2466 chunk.push_back(*it);
2467 if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) {
2468 pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func);
2469 out_dfas.insert(end(out_dfas), begin(chunk), end(chunk));
2470 chunk.clear();
2471 }
2472 }
2473
2474 // Remove null (merged) DFAs and update vector for subsequent use.
2475 out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr),
2476 out_dfas.end());
2477 dfas.swap(out_dfas);
2478 DEBUG_PRINTF("after merge there are %zu dfas\n", dfas.size());
2479}
2480
2481static
2482void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) {
2483 DEBUG_PRINTF("merging %zu nfas\n", dfas.size());
2484 if (dfas.size() < 2) {
2485 return;
2486 }
2487
2488 vector<OutfixInfo> &outfixes = tbi.outfixes;
2489
2490 /* key is index into outfix array as iterators, etc may be invalidated by
2491 * element addition. */
2492 unordered_map<raw_dfa *, size_t> dfa_mapping;
2493 for (size_t i = 0; i < outfixes.size(); i++) {
2494 auto *rdfa = outfixes[i].rdfa();
2495 if (rdfa) {
2496 dfa_mapping[rdfa] = i;
2497 }
2498 }
2499
2500 chunkedDfaMerge(dfas, dfa_mapping, outfixes,
2501 MergeMcClellan(tbi.rm, tbi.cc.grey));
2502 removeDeadOutfixes(outfixes);
2503}
2504
2505static
2506void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm,
2507 const Grey &grey) {
2508 if (!grey.roseMcClellanOutfix) {
2509 return;
2510 }
2511
2512 DEBUG_PRINTF("merge combo\n");
2513
2514 bool seen_dfa = false;
2515 u32 nfa_count = 0;
2516 for (const auto &outfix : tbi.outfixes) {
2517 if (outfix.holder()) {
2518 DEBUG_PRINTF("nfa\n");
2519 nfa_count++;
2520 } else if (outfix.rdfa()) {
2521 DEBUG_PRINTF("dfa\n");
2522 seen_dfa = true;
2523 }
2524 }
2525
2526 DEBUG_PRINTF("nfa %u dfas present %d\n", nfa_count,
2527 (int)seen_dfa);
2528 if (!nfa_count || (nfa_count == 1 && !seen_dfa)) {
2529 DEBUG_PRINTF("no combo merges possible\n");
2530 return;
2531 }
2532
2533 /* key is index into outfix array as iterators, etc may be invalidated by
2534 * element addition. */
2535 size_t new_dfas = 0;
2536 unordered_map<raw_dfa *, size_t> dfa_mapping;
2537 vector<raw_dfa *> dfas;
2538
2539 for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) {
2540 auto &outfix = *it;
2541 assert(!outfix.is_dead());
2542
2543 if (outfix.rdfa()) {
2544 auto *rdfa = outfix.rdfa();
2545 dfas.push_back(rdfa);
2546 dfa_mapping[rdfa] = it - tbi.outfixes.begin();
2547 continue;
2548 }
2549
2550 if (!outfix.holder()) {
2551 continue;
2552 }
2553
2554 NGHolder *h = outfix.holder();
2555 assert(h->kind == NFA_OUTFIX);
2556 auto rdfa = buildMcClellan(*h, &rm, grey);
2557 if (rdfa) {
2558 // Transform this outfix into a DFA and add it to the merge set.
2559 dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin();
2560 dfas.push_back(rdfa.get());
2561 outfix.proto = move(rdfa);
2562 new_dfas++;
2563 }
2564 }
2565
2566 DEBUG_PRINTF("constructed %zu new dfas\n", new_dfas);
2567
2568 if (!new_dfas) {
2569 /* assumes normal dfas have already been fully merged */
2570 return;
2571 }
2572
2573 chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes,
2574 MergeMcClellan(tbi.rm, tbi.cc.grey));
2575 removeDeadOutfixes(tbi.outfixes);
2576}
2577
2578static
2579void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas,
2580 u32 limit) {
2581 if (dfas.size() < 2) {
2582 return;
2583 }
2584
2585 vector<OutfixInfo> &outfixes = tbi.outfixes;
2586
2587 unordered_map<raw_som_dfa *, size_t> dfa_mapping;
2588 for (size_t i = 0; i < outfixes.size(); i++) {
2589 auto *haig = outfixes[i].haig();
2590 if (haig) {
2591 dfa_mapping[haig] = i;
2592 }
2593 }
2594
2595 chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig(limit));
2596 removeDeadOutfixes(outfixes);
2597}
2598
2599/**
2600 * This pass attempts to merge outfix engines together. At this point in time,
2601 * the engine type (NFA, DFA, Haig) has already been decided for each outfix
2602 * and outfixes can only merged with others of their same type. NFAs are merged
2603 * in a priority order based on common prefix length. The other types are
2604 * merged blindly. Engines are merged to the extent that they can still be
2605 * implemented efficiently.
2606 */
2607void mergeOutfixes(RoseBuildImpl &tbi) {
2608 if (!tbi.cc.grey.mergeOutfixes) {
2609 return;
2610 }
2611
2612 vector<NGHolder *> nfas;
2613 vector<raw_dfa *> dfas;
2614 vector<raw_som_dfa *> som_dfas;
2615
2616 for (auto &outfix : tbi.outfixes) {
2617 if (outfix.rdfa()) {
2618 dfas.push_back(outfix.rdfa());
2619 } else if (outfix.holder()) {
2620 nfas.push_back(outfix.holder());
2621 } else if (outfix.haig()) {
2622 som_dfas.push_back(outfix.haig());
2623 }
2624 }
2625
2626 DEBUG_PRINTF("merging %zu dfas, %zu nfas\n",
2627 dfas.size(), nfas.size());
2628
2629 mergeOutfixNfas(tbi, nfas);
2630 mergeOutfixDfas(tbi, dfas);
2631 mergeOutfixHaigs(tbi, som_dfas, 255);
2632 mergeOutfixHaigs(tbi, som_dfas, 8192);
2633 mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey);
2634}
2635
2636static
2637u32 allowedSquashDistance(const CharReach &cr, u32 min_width,
2638 const RoseBuildImpl &tbi,
2639 RoseVertex tv) {
2640 CharReach accept_cr;
2641 DEBUG_PRINTF("hello |cr|=%zu\n", cr.count());
2642
2643 const RoseGraph &g = tbi.g;
2644
2645 /* TODO: inspect further back in the pattern */
2646 for (u32 lit_id : g[tv].literals) {
2647 const rose_literal_id &lit = tbi.literals.at(lit_id);
2648 if (lit.delay) {
2649 return 0; /* TODO: better */
2650 }
2651 if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) {
2652 return 0;
2653 }
2654 assert(!lit.s.empty());
2655 accept_cr |= *lit.s.rbegin();
2656 }
2657
2658 DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
2659
2660 if ((accept_cr & cr).any()) {
2661 DEBUG_PRINTF("no squash\n");
2662 return 0; /* the accept byte doesn't always kill the puffette. TODO:
2663 * maybe if we look further back we could find something that
2664 * would kill the puffette... */
2665 }
2666
2667 DEBUG_PRINTF("allowed to squash %u\n", min_width);
2668 return min_width;
2669}
2670
2671void mergePuffixes(RoseBuildImpl &tbi) {
2672 DEBUG_PRINTF("entry\n");
2673
2674 if (!tbi.cc.grey.mergeSuffixes) {
2675 return;
2676 }
2677
2678 RoseGraph &g = tbi.g;
2679
2680 for (auto v : vertices_range(g)) {
2681 shared_ptr<NGHolder> h = g[v].suffix.graph;
2682 if (!h) {
2683 continue;
2684 }
2685 assert(!g[v].suffix.haig);
2686 assert(!g[v].eod_accept);
2687
2688 assert(onlyOneTop(*h)); /* we should not have merged yet */
2689 bool fixed_depth = g[v].min_offset == g[v].max_offset;
2690
2691 if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) {
2692 continue;
2693 }
2694
2695 PureRepeat repeat;
2696 if (!isPureRepeat(*h, repeat)) {
2697 assert(0);
2698 continue;
2699 }
2700
2701 if (repeat.bounds.min == depth(0)) {
2702 assert(0); // No vacuous puffs allowed.
2703 continue;
2704 }
2705
2706 assert(repeat.bounds.min.is_finite() &&
2707 repeat.bounds.max.is_reachable());
2708 assert(repeat.bounds.max == repeat.bounds.min ||
2709 repeat.bounds.max.is_infinite());
2710
2711 const bool unbounded = repeat.bounds.max.is_infinite();
2712 const set<ReportID> reports = all_reports(*h);
2713 assert(reports.size() == 1);
2714 ReportID report = *reports.begin();
2715
2716 DEBUG_PRINTF("got puffette candidate %u:%s\n", report,
2717 repeat.bounds.str().c_str());
2718
2719 raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach);
2720
2721 u32 queue;
2722 u32 event;
2723 tbi.addChainTail(rp, &queue, &event);
2724 u32 squashDistance =
2725 allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v);
2726
2727 Report ir = makeMpvTrigger(event, squashDistance);
2728 ReportID id = tbi.rm.getInternalId(ir);
2729
2730 DEBUG_PRINTF("puffette event q%u t%u\n", queue, event);
2731 g[v].suffix.reset();
2732 g[v].reports.insert(id);
2733 }
2734}
2735
2736static
2737void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m,
2738 u32 top, const vector<RoseVertex> &verts) {
2739 DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top,
2740 m.get(), verts.size());
2741
2742 for (auto v : verts) {
2743 assert(g[v].suffix.castle);
2744 g[v].suffix.castle = m;
2745 g[v].suffix.top = top;
2746 }
2747}
2748
2749static
2750void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles,
2751 const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) {
2752 if (castles.size() <= 1) {
2753 return;
2754 }
2755
2756 DEBUG_PRINTF("merging reach %s, %zu elements\n",
2757 describeClass(castles[0]->reach()).c_str(), castles.size());
2758
2759 CastleProto *m = nullptr;
2760
2761 for (CastleProto *c : castles) {
2762 assert(c->repeats.size() == 1); // Not yet merged.
2763 assert(g[eng_verts.at(c).front()].suffix.castle.get() == c);
2764 if (!m) {
2765 m = c;
2766 continue;
2767 }
2768
2769 u32 top = m->merge(c->repeats[0]);
2770 if (top == CastleProto::max_occupancy) {
2771 // No room left to merge into 'm'. This one becomes the new 'm'.
2772 DEBUG_PRINTF("next mergee\n");
2773 m = c;
2774 continue;
2775 }
2776 updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top,
2777 eng_verts.at(c));
2778 DEBUG_PRINTF("added to %p, top %u\n", m, top);
2779 }
2780}
2781
2782void mergeCastleSuffixes(RoseBuildImpl &build) {
2783 DEBUG_PRINTF("entry\n");
2784
2785 if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) {
2786 return;
2787 }
2788
2789 unordered_map<CastleProto *, vector<RoseVertex>> eng_verts;
2790 map<CharReach, vector<CastleProto *>> by_reach;
2791
2792 RoseGraph &g = build.g;
2793
2794 for (auto v : vertices_range(g)) {
2795 if (!g[v].suffix.castle) {
2796 continue;
2797 }
2798
2799 CastleProto *c = g[v].suffix.castle.get();
2800
2801 if (c->repeats.size() != 1) {
2802 // This code assumes it's the only place merging is being done.
2803 assert(0);
2804 continue;
2805 }
2806
2807 if (!contains(eng_verts, c)) {
2808 by_reach[c->reach()].push_back(c);
2809 }
2810 eng_verts[c].push_back(v);
2811 }
2812
2813 for (auto &chunk : by_reach | map_values) {
2814 mergeCastleSuffixChunk(g, chunk, eng_verts);
2815 }
2816}
2817
2818} // namespace ue2
2819