rose_build_merge.cpp source code [ClickHouse/contrib/hyperscan/src/rose/rose_build_merge.cpp]

1	/*
2	* Copyright (c) 2015-2018, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief Rose Build: functions for reducing the size of the Rose graph
31	* through merging.
32	*/
33	#include "rose_build_merge.h"
34
35	#include "grey.h"
36	#include "rose_build.h"
37	#include "rose_build_impl.h"
38	#include "rose_build_util.h"
39	#include "ue2common.h"
40	#include "nfa/castlecompile.h"
41	#include "nfa/goughcompile.h"
42	#include "nfa/limex_limits.h"
43	#include "nfa/mcclellancompile.h"
44	#include "nfa/nfa_build_util.h"
45	#include "nfa/rdfa_merge.h"
46	#include "nfagraph/ng_holder.h"
47	#include "nfagraph/ng_haig.h"
48	#include "nfagraph/ng_is_equal.h"
49	#include "nfagraph/ng_lbr.h"
50	#include "nfagraph/ng_limex.h"
51	#include "nfagraph/ng_mcclellan.h"
52	#include "nfagraph/ng_puff.h"
53	#include "nfagraph/ng_redundancy.h"
54	#include "nfagraph/ng_repeat.h"
55	#include "nfagraph/ng_reports.h"
56	#include "nfagraph/ng_stop.h"
57	#include "nfagraph/ng_uncalc_components.h"
58	#include "nfagraph/ng_util.h"
59	#include "nfagraph/ng_width.h"
60	#include "util/bitutils.h"
61	#include "util/charreach.h"
62	#include "util/compile_context.h"
63	#include "util/container.h"
64	#include "util/dump_charclass.h"
65	#include "util/graph_range.h"
66	#include "util/hash.h"
67	#include "util/insertion_ordered.h"
68	#include "util/order_check.h"
69	#include "util/report_manager.h"
70	#include "util/ue2string.h"
71	#include "util/unordered.h"
72
73	#include <algorithm>
74	#include <functional>
75	#include <list>
76	#include <map>
77	#include <queue>
78	#include <set>
79	#include <string>
80	#include <vector>
81	#include <utility>
82
83	#include <boost/range/adaptor/map.hpp>
84
85	using namespace std;
86	using boost::adaptors::map_values;
87	using boost::adaptors::map_keys;
88
89	namespace ue2 {
90
91	static const size_t NARROW_START_MAX = `10`;
92	static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = `128`;
93	static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = `64`;
94	static const size_t SMALL_ROSE_THRESHOLD_STREAM = `32`;
95	static const size_t SMALL_ROSE_THRESHOLD_BLOCK = `10`;
96	static const size_t MERGE_GROUP_SIZE_MAX = `200`;
97	static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = `1000`;
98
99	/* \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. /
100	static const size_t DFA_CHUNK_SIZE_MAX = `200`;
101
102	/* \brief Max DFA states in a merged DFA. /
103	static const size_t DFA_MERGE_MAX_STATES = `8000`;
104
105	/* \brief In block mode, merge two prefixes even if they don't have identical*
106	* literal sets if they have fewer than this many states and the merged graph
107	* is also small. */
108	static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = `32`;
109
110	static
111	size_t small_merge_max_vertices(const CompileContext &cc) {
112	return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM
113	: SMALL_MERGE_MAX_VERTICES_BLOCK;
114	}
115
116	static
117	size_t small_rose_threshold(const CompileContext &cc) {
118	return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM
119	: SMALL_ROSE_THRESHOLD_BLOCK;
120	}
121
122	/**
123	* Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that
124	* reports should not contribute to the hash.
125	*/
126	static
127	size_t hashLeftfix(const left_id &left) {
128	size_t val = `0`;
129
130	if (left.castle()) {
131	hash_combine(val, left.castle()->reach());
132	for (const auto &pr : left.castle()->repeats) {
133	hash_combine(val, pr.first); // top
134	hash_combine(val, pr.second.bounds);
135	}
136	} else if (left.graph()) {
137	hash_combine(val, hash_holder(*left.graph()));
138	}
139
140	return val;
141	}
142
143	namespace {
144
145	/* Key used to group sets of leftfixes by the dedupeLeftfixes path. /
146	struct RoseGroup {
147	RoseGroup(const RoseBuildImpl &build, RoseVertex v)
148	: left_hash(hashLeftfix(build.g [v].left)),
149	lag(build.g [v].left.lag), eod_table(build.isInETable(v)) {
150	const RoseGraph &g = build.g;
151	assert(in_degree(v, g) == `1`);
152	RoseVertex u = *inv_adjacent_vertices(v, g).first;
153	parent = g [u].index;
154	}
155
156	bool operator<(const RoseGroup &b) const {
157	const RoseGroup &a = *this;
158	ORDER_CHECK(parent);
159	ORDER_CHECK(left_hash);
160	ORDER_CHECK(lag);
161	ORDER_CHECK(eod_table);
162	return false;
163	}
164
165	private:
166	/* Parent vertex index. We must use the index, rather than the descriptor,*
167	* for compile determinism. */
168	size_t parent;
169
170	/* Quick hash of the leftfix itself. Must be identical for a given pair of*
171	* graphs if is_equal would return true. */
172	size_t left_hash;
173
174	/* Leftfix lag value. /
175	u32 lag;
176
177	/* True if associated vertex (successor) is in the EOD table. We don't*
178	* allow sharing of leftfix engines between "normal" and EOD operation. */
179	bool eod_table;
180	};
181
182	/**
183	* Intended to find graphs that are identical except for their report
184	* IDs. Relies on vertex and edge indices to pick up graphs that have been
185	* messily put together in different orderings. Only implemented for castles and
186	* holders.
187	*/
188	static
189	bool is_equal(const left_id &u_left, ReportID u_report,
190	const left_id &v_left, ReportID v_report) {
191	if (u_left.castle() && v_left.castle()) {
192	return is_equal(u_left.castle(), u_report, v_left.castle(), v_report);
193	}
194
195	if (!u_left.graph() \|\| !v_left.graph()) {
196	return false;
197	}
198
199	return is_equal(u_left.graph(), u_report, v_left.graph(), v_report);
200	}
201
202	} // namespace
203
204	/**
205	* This pass performs work similar to \ref dedupeSuffixes - it removes
206	* duplicate prefix/infixes (that is, leftfixes) which are identical graphs and
207	* share the same trigger vertex and lag. Leftfixes are first grouped by
208	* parent role and lag to reduce the number of candidates to be inspected
209	* for each leftfix. The graphs in each cluster are then compared with each
210	* other and the graph is updated to only refer to a canonical version of each
211	* graph.
212	*
213	* Note: only roles with a single predecessor vertex are considered for this
214	* transform - it should probably be generalised to work for roles which share
215	* the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or
216	* it should be retired entirely.
217	*/
218	bool dedupeLeftfixes(RoseBuildImpl &tbi) {
219	DEBUG_PRINTF("deduping leftfixes\n");
220	map<RoseGroup, deque<RoseVertex>> roses;
221	bool work_done = false;
222
223	/ Note: a leftfix's transientness will not be altered by deduping /
224
225	// Collect leftfixes into groups.
226	RoseGraph &g = tbi.g;
227	for (auto v : vertices_range(g)) {
228	if (!g [v].left) {
229	continue;
230	}
231	const left_id left(g [v].left);
232
233	if (left.haig()) {
234	/ TODO: allow merging of identical haigs /
235	continue;
236	}
237
238	if (in_degree(v, g) != `1`) {
239	continue;
240	}
241
242	roses [RoseGroup (tbi, v)].push_back(v);
243	}
244
245	DEBUG_PRINTF("collected %zu rose groups\n", roses.size());
246
247	// Walk groups and dedupe the roses therein.
248	for (deque<RoseVertex> &verts : roses \| map_values) {
249	DEBUG_PRINTF("group has %zu vertices\n", verts.size());
250
251	unordered_set<left_id> seen;
252
253	for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) {
254	RoseVertex v = *jt;
255	left_id left(g [v].left);
256
257	// Skip cases we've already handled, and mark as seen otherwise.
258	if (!seen.insert(left).second) {
259	continue;
260	}
261
262	// Scan the rest of the list for dupes.
263	for (auto kt = std::next(jt); kt != jte; ++kt) {
264	if (g [v].left == g [*kt].left
265	\|\| !is_equal(g [v].left, g [v].left.leftfix_report,
266	g [kt].left, g [kt].left.leftfix_report)) {
267	continue;
268	}
269
270	// Dupe found.
271	DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n",
272	g[*kt].index, g[v].index);
273	assert(g[v].left.lag == g[*kt].left.lag);
274	g [*kt].left = g [v].left;
275	work_done = true;
276	}
277	}
278	}
279
280	return work_done;
281	}
282
283	/**
284	* \brief Returns a numeric key that can be used to group this suffix with
285	* others that may be its duplicate.
286	*/
287	static
288	size_t suffix_size_key(const suffix_id &s) {
289	if (s.graph()) {
290	return num_vertices(*s.graph());
291	}
292	if (s.castle()) {
293	return s.castle()->repeats.size();
294	}
295	return `0`;
296	}
297
298	static
299	bool is_equal(const suffix_id &s1, const suffix_id &s2) {
300	if (s1.graph() && s2.graph()) {
301	return is_equal(s1.graph(), s2.graph());
302	} else if (s1.castle() && s2.castle()) {
303	return is_equal(s1.castle(), s2.castle());
304	}
305	return false;
306	}
307
308	/**
309	* This function simply looks for suffix NGHolder graphs which are identical
310	* and updates the roles in the RoseGraph to refer to only a single copy. This
311	* obviously has benefits in terms of both performance (as we don't run
312	* multiple engines doing the same work) and stream state. This function first
313	* groups all suffixes by number of vertices and report set to restrict the set
314	* of possible candidates. Each group is then walked to find duplicates using
315	* the \ref is_equal comparator for NGHolders and updating the RoseGraph as it
316	* goes.
317	*
318	* Note: does not dedupe suffixes of vertices in the EOD table.
319	*/
320	void dedupeSuffixes(RoseBuildImpl &tbi) {
321	DEBUG_PRINTF("deduping suffixes\n");
322
323	unordered_map<suffix_id, set<RoseVertex>> suffix_map;
324	map<pair<size_t, set<ReportID>>, vector<suffix_id>> part;
325
326	// Collect suffixes into groups.
327	RoseGraph &g = tbi.g;
328	for (auto v : vertices_range(g)) {
329	if (!g [v].suffix \|\| tbi.isInETable(v)) {
330	continue;
331	}
332
333	const suffix_id s(g [v].suffix);
334
335	if (!(s.graph() \|\| s.castle())) {
336	continue; // e.g. Haig
337	}
338
339	set<RoseVertex> &verts = suffix_map [s];
340	if (verts.empty()) {
341	part [make_pair(suffix_size_key(s), all_reports(s))].push_back(s);
342	}
343	verts.insert(v);
344	}
345
346	DEBUG_PRINTF("collected %zu groups\n", part.size());
347
348	for (const auto &cand : part \| map_values) {
349	if (cand.size() <= `1`) {
350	continue;
351	}
352	DEBUG_PRINTF("deduping cand set of size %zu\n", cand.size());
353
354	for (auto jt = cand.begin(); jt != cand.end(); ++jt) {
355	if (suffix_map [*jt].empty()) {
356	continue;
357	}
358	for (auto kt = next(jt); kt != cand.end(); ++kt) {
359	if (suffix_map [kt].empty() \|\| !is_equal(jt, *kt)) {
360	continue;
361	}
362	DEBUG_PRINTF("found dupe\n");
363	for (auto v : suffix_map [*kt]) {
364	RoseVertex dupe = suffix_map [jt].begin();
365	assert(dupe != v);
366	g [v].suffix.graph = g [dupe].suffix.graph;
367	g [v].suffix.castle = g [dupe].suffix.castle;
368	assert(suffix_id(g[v].suffix) ==
369	suffix_id(g[dupe].suffix));
370	suffix_map [*jt].insert(v);
371	}
372	suffix_map [*kt].clear();
373	}
374	}
375	}
376	}
377
378	namespace {
379
380	/**
381	* This class stores a mapping from an engine reference (left_id, suffix_id,
382	* etc) to a list of vertices, and also allows us to iterate over the set of
383	* engine references in insertion order -- we add to the mapping in vertex
384	* iteration order, so this allows us to provide a consistent ordering.
385	*/
386	template<class EngineRef>
387	class Bouquet {
388	private:
389	list<EngineRef> ordering; // Unique list in insert order.
390	using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>;
391	BouquetMap bouquet;
392	public:
393	void insert(const EngineRef &h, RoseVertex v) {
394	typename BouquetMap::iterator f = bouquet.find(h);
395	if (f == bouquet.end()) {
396	ordering.push_back(h);
397	bouquet[h].push_back(v);
398	} else {
399	f->second.push_back(v);
400	}
401	}
402
403	void insert(const EngineRef &h, const deque<RoseVertex> &verts) {
404	typename BouquetMap::iterator f = bouquet.find(h);
405	if (f == bouquet.end()) {
406	ordering.push_back(h);
407	bouquet.insert(make_pair(h, verts));
408	} else {
409	f->second.insert(f->second.end(), verts.begin(), verts.end());
410	}
411	}
412
413	const deque<RoseVertex> &vertices(const EngineRef &h) const {
414	typename BouquetMap::const_iterator it = bouquet.find(h);
415	assert(it != bouquet.end()); // must be present
416	return it->second;
417	}
418
419	void erase(const EngineRef &h) {
420	assert(bouquet.find(h) != bouquet.end());
421	bouquet.erase(h);
422	ordering.remove(h);
423	}
424
425	/* Remove all the elements in the given iterator range. /
426	template <class Iter>
427	void erase_all(Iter erase_begin, Iter erase_end) {
428	for (Iter it = erase_begin; it != erase_end; ++it) {
429	bouquet.erase(*it);
430	}
431
432	// Use a quick-lookup container so that we only have to traverse the
433	// 'ordering' list once.
434	const set<EngineRef> dead(erase_begin, erase_end);
435	for (iterator it = begin(); it != end(); / incremented inside /) {
436	if (contains(dead, *it)) {
437	ordering.erase(it++);
438	} else {
439	++it;
440	}
441	}
442	}
443
444	void clear() {
445	ordering.clear();
446	bouquet.clear();
447	}
448
449	size_t size() const { return bouquet.size(); }
450
451	// iterate over holders in insert order
452	typedef typename list<EngineRef>::iterator iterator;
453	iterator begin() { return ordering.begin(); }
454	iterator end() { return ordering.end(); }
455
456	// const iterate over holders in insert order
457	typedef typename list<EngineRef>::const_iterator const_iterator;
458	const_iterator begin() const { return ordering.begin(); }
459	const_iterator end() const { return ordering.end(); }
460	};
461
462	typedef Bouquet<left_id> LeftfixBouquet;
463	typedef Bouquet<suffix_id> SuffixBouquet;
464
465	} // namespace
466
467	/**
468	* Split a \ref Bouquet of some type into several smaller ones.
469	*/
470	template <class EngineRef>
471	static void chunkBouquets(const Bouquet<EngineRef> &in,
472	deque<Bouquet<EngineRef>> &out,
473	const size_t chunk_size) {
474	if (in.size() <= chunk_size) {
475	out.push_back(in);
476	return;
477	}
478
479	out.push_back(Bouquet<EngineRef>());
480	for (const auto &engine : in) {
481	if (out.back().size() >= chunk_size) {
482	out.push_back(Bouquet<EngineRef>());
483	}
484	out.back().insert(engine, in.vertices(engine));
485	}
486	}
487
488	static
489	bool stringsCanFinishAtSameSpot(const ue2_literal &u,
490	ue2_literal::const_iterator v_b,
491	ue2_literal::const_iterator v_e) {
492	ue2_literal::const_iterator u_e = u.end();
493	ue2_literal::const_iterator u_b = u.begin();
494
495	while (u_e != u_b && v_e != v_b) {
496	--u_e;
497	--v_e;
498
499	if (!overlaps(u_e, v_e)) {
500	return false;
501	}
502	}
503
504	return true;
505	}
506
507	/**
508	* Check that if after u has been seen, that it is impossible for the arrival of
509	* v to require the inspection of an engine earlier than u did.
510	*
511	* Let delta be the earliest that v can be seen after u (may be zero)
512	*
513	* ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta)
514	* ==> - ulag <= delta - vlag
515	* ==> vlag - ulag <= delta
516	*/
517	static
518	bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
519	const rose_literal_id &vl, const u32 vlag) {
520	DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag,
521	escapeString(vl.s).c_str(), vlag);
522
523	if (vl.delay \|\| ul.delay) {
524	/ engine related literals should not be delayed anyway /
525	return false;
526	}
527
528	if (ulag >= vlag) {
529	assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag);
530	return true;
531	}
532
533	size_t min_allowed_delta = vlag - ulag;
534	DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta);
535
536	for (size_t i = `0`; i < min_allowed_delta; i++) {
537	if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
538	DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
539	return false;
540	}
541	}
542
543	DEBUG_PRINTF("OK\n");
544	return true;
545	}
546
547	static
548	bool hasSameEngineType(const RoseVertexProps &u_prop,
549	const RoseVertexProps &v_prop) {
550	const left_id u_left = u_prop.left;
551	const left_id v_left = v_prop.left;
552
553	return !u_left.haig() == !v_left.haig()
554	&& !u_left.dfa() == !v_left.dfa()
555	&& !u_left.castle() == !v_left.castle()
556	&& !u_left.graph() == !v_left.graph();
557	}
558
559	/**
560	* Verifies that merging the leftfix of vertices does not cause conflicts due
561	* to the literals on the right.
562	*
563	* The main concern is that the lags of the literals and overlap between them
564	* allow the engine check offset to potentially regress.
565	*
566	* Parameters are vectors of literals + lag pairs.
567	*
568	* Note: if more constraints of when the leftfixes were going to be checked
569	* (mandatory lookarounds passing, offset checks), more merges may be allowed.
570	*/
571	static
572	bool compatibleLiteralsForMerge(
573	const vector<pair<const rose_literal_id *, u32>> &ulits,
574	const vector<pair<const rose_literal_id *, u32>> &vlits) {
575	assert(!ulits.empty());
576	assert(!vlits.empty());
577
578	// We cannot merge engines that prefix literals in different tables.
579	if (ulits [`0`].first->table != vlits [`0`].first->table) {
580	DEBUG_PRINTF("literals in different tables\n");
581	return false;
582	}
583
584	// We don't handle delayed cases yet.
585	for (const auto &ue : ulits) {
586	const rose_literal_id &ul = *ue.first;
587	if (ul.delay) {
588	return false;
589	}
590	}
591
592	for (const auto &ve : vlits) {
593	const rose_literal_id &vl = *ve.first;
594	if (vl.delay) {
595	return false;
596	}
597	}
598
599	/ An engine requires that all accesses to it are ordered by offsets. (ie,*
600	we can not check an engine's state at offset Y, if we have already
601	checked its status at offset X and X > Y). If we can not establish that
602	the literals used for triggering will satisfy this property, then it is
603	not safe to merge the engine. /*
604	for (const auto &ue : ulits) {
605	const rose_literal_id &ul = *ue.first;
606	u32 ulag = ue.second;
607
608	for (const auto &ve : vlits) {
609	const rose_literal_id &vl = *ve.first;
610	u32 vlag = ve.second;
611
612	if (!checkPrefix(ul, ulag, vl, vlag)
613	\|\| !checkPrefix(vl, vlag, ul, ulag)) {
614	DEBUG_PRINTF("prefix check failed\n");
615	return false;
616	}
617	}
618	}
619
620	return true;
621	}
622
623	/**
624	* True if this graph has few enough accel states to be implemented as an NFA
625	* with all of those states actually becoming accel schemes.
626	*/
627	static
628	bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) {
629	u32 num = countAccelStates(g, &build.rm, build.cc);
630	DEBUG_PRINTF("graph with %zu vertices has %u accel states\n",
631	num_vertices(g), num);
632	return num <= NFA_MAX_ACCEL_STATES;
633	}
634
635	/**
636	* In block mode, we want to be a little more selective -- We will only merge
637	* prefix engines when the literal sets are the same or if the merged graph
638	* has only grown by a small amount.
639	*/
640	static
641	bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u,
642	RoseVertex v) {
643	assert(!build.cc.streaming);
644	assert(build.isRootSuccessor(u) == build.isRootSuccessor(v));
645
646	// Always merge infixes if we can (subject to the other criteria in
647	// mergeableRoseVertices).
648	if (!build.isRootSuccessor(u)) {
649	return true;
650	}
651
652	const RoseGraph &g = build.g;
653
654	// Merge prefixes with identical literal sets (as we'd have to run them
655	// both when we see those literals anyway).
656	if (g [u].literals == g [v].literals) {
657	return true;
658	}
659
660	// The rest of this function only deals with the case when both vertices
661	// have graph leftfixes.
662	if (!g [u].left.graph \|\| !g [v].left.graph) {
663	return false;
664	}
665
666	const size_t u_count = num_vertices(*g [u].left.graph);
667	const size_t v_count = num_vertices(*g [v].left.graph);
668	DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n",
669	u_count, v_count);
670	if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES \|\|
671	v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
672	DEBUG_PRINTF("prefixes too big already\n");
673	return false;
674	}
675
676	DEBUG_PRINTF("trying merge\n");
677	NGHolder h;
678	cloneHolder(h, *g [v].left.graph);
679	if (!mergeNfaPair(g [u].left.graph, h, nullptr*, build.cc)) {
680	DEBUG_PRINTF("couldn't merge\n");
681	return false;
682	}
683
684	const size_t merged_count = num_vertices(h);
685	DEBUG_PRINTF("merged result has %zu vertices\n", merged_count);
686	if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
687	DEBUG_PRINTF("exceeded limit\n");
688	return false;
689	}
690
691	// We want to only perform merges that take advantage of some
692	// commonality in the two input graphs, so we check that the number of
693	// vertices has only grown a small amount: somewhere between the sum
694	// (no commonality) and the max (no growth at all) of the vertex counts
695	// of the input graphs.
696	const size_t max_size = u_count + v_count;
697	const size_t min_size = max(u_count, v_count);
698	const size_t max_growth = ((max_size - min_size) * `25`) / `100`;
699	if (merged_count > min_size + max_growth) {
700	DEBUG_PRINTF("grew too much\n");
701	return false;
702	}
703
704	// We don't want to squander any chances at accelerating.
705	if (!isAccelerableLeftfix(build, h) &&
706	(isAccelerableLeftfix(build, *g [u].left.graph) \|\|
707	isAccelerableLeftfix(build, *g [v].left.graph))) {
708	DEBUG_PRINTF("would lose accel property\n");
709	return false;
710	}
711
712	DEBUG_PRINTF("safe to merge\n");
713	return true;
714	}
715
716	bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
717	RoseVertex v) {
718	assert(u != v);
719
720	if (!hasSameEngineType(tbi.g [u], tbi.g [v])) {
721	return false;
722	}
723
724	if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) {
725	return false;
726	}
727
728	/ We cannot merge prefixes/vertices if they are successors of different*
729	* root vertices */
730	if (tbi.isRootSuccessor(u)) {
731	assert(tbi.isRootSuccessor(v));
732	set<RoseVertex> u_preds;
733	set<RoseVertex> v_preds;
734	insert(&u_preds, inv_adjacent_vertices(u, tbi.g));
735	insert(&v_preds, inv_adjacent_vertices(v, tbi.g));
736
737	if (u_preds != v_preds) {
738	return false;
739	}
740	}
741
742	u32 ulag = tbi.g [u].left.lag;
743	vector<pair<const rose_literal_id *, u32>> ulits;
744	ulits.reserve(tbi.g [u].literals.size());
745	for (u32 id : tbi.g [u].literals) {
746	ulits.emplace_back(&tbi.literals.at(id), ulag);
747	}
748
749	u32 vlag = tbi.g [v].left.lag;
750	vector<pair<const rose_literal_id *, u32>> vlits;
751	vlits.reserve(tbi.g [v].literals.size());
752	for (u32 id : tbi.g [v].literals) {
753	vlits.emplace_back(&tbi.literals.at(id), vlag);
754	}
755
756	if (!compatibleLiteralsForMerge(ulits, vlits)) {
757	return false;
758	}
759
760	DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index,
761	tbi.g[v].index);
762	return true;
763	}
764
765	/ We cannot merge an engine, if a trigger literal and a post literal overlap*
766	* in such a way that engine status needs to be check at a point before the
767	* engine's current location.
768	*
769	* i.e., for a trigger literal u and a pos literal v,
770	* where delta is the earliest v can appear after t,
771	* we require that v_loc - v_lag >= u_loc
772	* ==> u_loc + delta - v_lag >= u_loc
773	* ==> delta >= v_lag
774	*
775	*/
776	static
777	bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl,
778	u32 vlag) {
779	DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(),
780	escapeString(vl.s).c_str(), vlag);
781
782	for (size_t i = `0`; i < vlag; i++) {
783	if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
784	DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
785	return false;
786	}
787	}
788
789	DEBUG_PRINTF("OK\n");
790	return true;
791	}
792
793	template<typename VertexCont>
794	static never_inline
795	bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1,
796	const VertexCont &v2) {
797	flat_set<RoseVertex> preds;
798	for (auto v : v1) {
799	insert(&preds, inv_adjacent_vertices(v, build.g));
800	}
801
802	flat_set<u32> pred_lits;
803
804	/ No need to examine delays of a common pred - as it must already have*
805	* survived the delay checks.
806	*
807	* This is important when the pred is in the anchored table as
808	* the literal is no longer available. */
809	flat_set<RoseVertex> known_good_preds;
810	for (auto v : v2) {
811	insert(&known_good_preds, inv_adjacent_vertices(v, build.g));
812	}
813
814	for (auto u : preds) {
815	if (!contains(known_good_preds, u)) {
816	insert(&pred_lits, build.g [u].literals);
817	}
818	}
819
820	vector<const rose_literal_id *> pred_rose_lits;
821	pred_rose_lits.reserve(pred_lits.size());
822	for (const auto &p : pred_lits) {
823	pred_rose_lits.push_back(&build.literals.at(p));
824	}
825
826	for (auto v : v2) {
827	u32 vlag = build.g[v].left.lag;
828	if (!vlag) {
829	continue;
830	}
831
832	for (const u32 vlit : build.g[v].literals) {
833	const rose_literal_id &vl = build.literals.at(vlit);
834	assert(!vl.delay); // this should never have got this far?
835	for (const auto &ul : pred_rose_lits) {
836	assert(!ul->delay); // this should never have got this far?
837
838	if (!checkPredDelay(*ul, vl, vlag)) {
839	return false;
840	}
841	}
842	}
843	}
844
845	return true;
846	}
847
848	static
849	bool mergeableRoseVertices(const RoseBuildImpl &tbi,
850	const deque<RoseVertex> &verts1,
851	const deque<RoseVertex> &verts2) {
852	assert(!verts1.empty());
853	assert(!verts2.empty());
854
855	RoseVertex u_front = verts1.front();
856	RoseVertex v_front = verts2.front();
857
858	/ all vertices must have the same engine type: assume all verts in each*
859	* group are already of the same type */
860	if (!hasSameEngineType(tbi.g [u_front], tbi.g [v_front])) {
861	return false;
862	}
863
864	bool is_prefix = tbi.isRootSuccessor(u_front);
865
866	/ We cannot merge prefixes/vertices if they are successors of different*
867	* root vertices: similarly, assume the grouped vertices are compatible */
868	if (is_prefix) {
869	assert(tbi.isRootSuccessor(v_front));
870	set<RoseVertex> u_preds;
871	set<RoseVertex> v_preds;
872	insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g));
873	insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g));
874
875	if (u_preds != v_preds) {
876	return false;
877	}
878	}
879
880	vector<pair<const rose_literal_id , u32>> ulits; /* lit + lag pairs /
881	for (auto a : verts1) {
882	if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) {
883	return false;
884	}
885
886	u32 ulag = tbi.g [a].left.lag;
887	for (u32 id : tbi.g [a].literals) {
888	ulits.emplace_back(&tbi.literals.at(id), ulag);
889	}
890	}
891
892	vector<pair<const rose_literal_id *, u32>> vlits;
893	for (auto a : verts2) {
894	if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) {
895	return false;
896	}
897
898	u32 vlag = tbi.g [a].left.lag;
899	for (u32 id : tbi.g [a].literals) {
900	vlits.emplace_back(&tbi.literals.at(id), vlag);
901	}
902	}
903
904	if (!compatibleLiteralsForMerge(ulits, vlits)) {
905	return false;
906	}
907
908	// Check preds are compatible as well.
909	if (!checkPredDelays(tbi, verts1, verts2)
910	\|\| !checkPredDelays(tbi, verts2, verts1)) {
911	return false;
912	}
913
914	DEBUG_PRINTF("vertex sets are mergeable\n");
915	return true;
916	}
917
918	bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1,
919	const set<RoseVertex> &v2) {
920	const deque<RoseVertex> vv1(v1.begin(), v1.end());
921	const deque<RoseVertex> vv2(v2.begin(), v2.end());
922	return mergeableRoseVertices(tbi, vv1, vv2);
923	}
924
925	/* \brief Priority queue element for Rose merges. /
926	namespace {
927	struct RoseMergeCandidate {
928	RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in,
929	u32 tb)
930	: r1 (r1_in), r2 (r2_in), stopxor(`0`), cpl(cpl_in), states(`0`),
931	tie_breaker(tb) {
932	if (r1.graph() && r2.graph()) {
933	const NGHolder &h1 = r1.graph(), &h2 = r2.graph();
934	/ som_none as haigs don't merge and just a guiding heuristic /
935	CharReach stop1 = findStopAlphabet(h1, SOM_NONE);
936	CharReach stop2 = findStopAlphabet(h2, SOM_NONE);
937	stopxor = (stop1 ^ stop2).count();
938
939	// We use the number of vertices as an approximation of the state
940	// count here, as this is just feeding a comparison.
941	u32 vertex_count = num_vertices(h1) + num_vertices(h2);
942	states = vertex_count - min(vertex_count, cpl);
943	} else if (r1.castle() && r2.castle()) {
944	// FIXME
945	}
946	}
947
948	bool operator<(const RoseMergeCandidate &a) const {
949	if (stopxor != a.stopxor) {
950	return stopxor > a.stopxor;
951	}
952	if (cpl != a.cpl) {
953	return cpl < a.cpl;
954	}
955	if (states != a.states) {
956	return states > a.states;
957	}
958	return tie_breaker < a.tie_breaker;
959	}
960
961	left_id r1;
962	left_id r2;
963	u32 stopxor;
964	u32 cpl; //!< common prefix length
965	u32 states;
966	u32 tie_breaker; //!< determinism
967	};
968	}
969
970	static
971	bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2,
972	const vector<RoseVertex> &verts1,
973	const vector<RoseVertex> &verts2) {
974	assert(!verts1.empty() && !verts2.empty());
975
976	DEBUG_PRINTF("merging pair of leftfixes:\n");
977	DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(),
978	as_string_list(all_tops(r1)).c_str());
979	DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(),
980	as_string_list(all_tops(r2)).c_str());
981
982	RoseGraph &g = build.g;
983
984	if (r1.graph()) {
985	assert(r2.graph());
986	assert(r1.graph()->kind == r2.graph()->kind);
987	if (!mergeNfaPair(r1.graph(), r2.graph(), nullptr, build.cc)) {
988	DEBUG_PRINTF("nfa merge failed\n");
989	return false;
990	}
991
992	/ The graph in r1 has been merged into the graph in r2. Update r1's*
993	* vertices with the new graph ptr. mergeNfaPair() does not alter the
994	* tops from the input graph so no need to update top values.
995	*
996	* It is the responsibility of the caller to ensure that the tops are
997	* distinct when they have different trigger conditions.
998	* [Note: mergeLeftfixesVariableLag() should have a common parent set]
999	*/
1000	shared_ptr<NGHolder> &h = g [verts2.front()].left.graph;
1001	for (RoseVertex v : verts1) {
1002	g [v].left.graph = h;
1003	}
1004
1005	return true;
1006	} else if (r1.castle()) {
1007	assert(r2.castle());
1008	assert(build.cc.grey.allowCastle);
1009
1010	map<u32, u32> top_map;
1011	if (!mergeCastle(r2.castle(), r1.castle(), top_map)) {
1012	DEBUG_PRINTF("castle merge failed\n");
1013	return false;
1014	}
1015
1016	// The castle in r1 has been merged into the castle in r2, with tops
1017	// remapped as per top_map.
1018	const shared_ptr<CastleProto> &c = g [verts2.front()].left.castle;
1019	for (RoseVertex v : verts1) {
1020	g [v].left.castle = c;
1021	for (const auto &e : in_edges_range(v, g)) {
1022	g [e].rose_top = top_map.at(g [e].rose_top);
1023	}
1024	}
1025	return true;
1026	}
1027
1028	assert(`0`);
1029	return false;
1030	}
1031
1032	/**
1033	* Checks that there is no problem due to the involved vertices if we merge two
1034	* leftfix engines.
1035	*
1036	* This functions takes the vertices on the right of the two engines.
1037	*
1038	* Unlike mergeableRoseVertices(), this does not:
1039	* - check that engines themselves can be merged
1040	* - use heuristics to find out if merging the engines is wise.
1041	*/
1042	static
1043	bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build,
1044	const vector<RoseVertex> &targets_1,
1045	const vector<RoseVertex> &targets_2) {
1046	assert(!targets_1.empty());
1047	assert(!targets_2.empty());
1048
1049	vector<pair<const rose_literal_id , u32>> ulits; /* lit + lag pairs /
1050	for (auto a : targets_1) {
1051	u32 ulag = build.g [a].left.lag;
1052	for (u32 id : build.g [a].literals) {
1053	ulits.emplace_back(&build.literals.at(id), ulag);
1054	}
1055	}
1056
1057	vector<pair<const rose_literal_id *, u32>> vlits;
1058	for (auto a : targets_2) {
1059	u32 vlag = build.g [a].left.lag;
1060	for (u32 id : build.g [a].literals) {
1061	vlits.emplace_back(&build.literals.at(id), vlag);
1062	}
1063	}
1064
1065	if (!compatibleLiteralsForMerge(ulits, vlits)) {
1066	return false;
1067	}
1068
1069	// Check preds are compatible as well.
1070	if (!checkPredDelays(build, targets_1, targets_2)
1071	\|\| !checkPredDelays(build, targets_2, targets_1)) {
1072	return false;
1073	}
1074
1075	DEBUG_PRINTF("vertex sets are mergeable\n");
1076	return true;
1077	}
1078
1079	/**
1080	* In block mode, we want to be a little more selective -- we will only merge
1081	* prefix engines when the literal sets are the same or if the merged graph
1082	* has only grown by a small amount.
1083	*/
1084	static
1085	bool goodBlockModeMerge(const RoseBuildImpl &build,
1086	const vector<RoseVertex> &u_verts, const left_id &u_eng,
1087	const vector<RoseVertex> &v_verts,
1088	const left_id &v_eng) {
1089	assert(!build.cc.streaming);
1090
1091	// Always merge infixes if we can (subject to the other criteria in
1092	// mergeableRoseVertices).
1093	if (!build.isRootSuccessor(u_verts.front())) {
1094	return true;
1095	}
1096
1097	const RoseGraph &g = build.g;
1098
1099	flat_set<u32> u_lits;
1100	for (RoseVertex u : u_verts) {
1101	insert(&u_lits, g [u].literals);
1102	}
1103
1104	flat_set<u32> v_lits;
1105	for (RoseVertex v : v_verts) {
1106	insert(&v_lits, g [v].literals);
1107	}
1108
1109	// Merge prefixes with identical literal sets (as we'd have to run them
1110	// both when we see those literals anyway).
1111	if (u_lits == v_lits) {
1112	return true;
1113	}
1114
1115	// The rest of this function only deals with the case when have graph
1116	// leftfixes.
1117	if (!u_eng.graph()) {
1118	return false;
1119	}
1120	assert(v_eng.graph());
1121	const NGHolder &ug = *u_eng.graph();
1122	const NGHolder &vg = *v_eng.graph();
1123
1124	size_t u_count = num_vertices(ug);
1125	size_t v_count = num_vertices(vg);
1126	DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n",
1127	u_count, v_count);
1128	if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES \|\|
1129	v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
1130	DEBUG_PRINTF("prefixes too big already\n");
1131	return false;
1132	}
1133
1134	DEBUG_PRINTF("trying merge\n");
1135	NGHolder h;
1136	cloneHolder(h, vg);
1137	if (!mergeNfaPair(ug, h, nullptr, build.cc)) {
1138	DEBUG_PRINTF("couldn't merge\n");
1139	return false;
1140	}
1141
1142	const size_t merged_count = num_vertices(h);
1143	DEBUG_PRINTF("merged result has %zu vertices\n", merged_count);
1144	if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) {
1145	DEBUG_PRINTF("exceeded limit\n");
1146	return false;
1147	}
1148
1149	// We want to only perform merges that take advantage of some
1150	// commonality in the two input graphs, so we check that the number of
1151	// vertices has only grown a small amount: somewhere between the sum
1152	// (no commonality) and the max (no growth at all) of the vertex counts
1153	// of the input graphs.
1154	size_t max_size = u_count + v_count;
1155	size_t min_size = max(u_count, v_count);
1156	size_t max_growth = ((max_size - min_size) * `25`) / `100`;
1157	if (merged_count > min_size + max_growth) {
1158	DEBUG_PRINTF("grew too much\n");
1159	return false;
1160	}
1161
1162	// We don't want to squander any chances at accelerating.
1163	if (!isAccelerableLeftfix(build, h)
1164	&& (isAccelerableLeftfix(build, ug)
1165	\|\| isAccelerableLeftfix(build, vg))) {
1166	DEBUG_PRINTF("would lose accel property\n");
1167	return false;
1168	}
1169
1170	DEBUG_PRINTF("safe to merge\n");
1171	return true;
1172	}
1173
1174	/**
1175	* Merge r1 into r2 if safe and appropriate. Returns true on success.
1176	*/
1177	static
1178	bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1,
1179	const vector<RoseVertex> &targets_1,
1180	left_id &r2,
1181	const vector<RoseVertex> &targets_2) {
1182	if (targets_1.empty() \|\| targets_2.empty()) {
1183	/ one of the engines has already been merged away /
1184	return false;
1185	}
1186
1187	assert(!r1.graph() == !r2.graph());
1188	if (r1.graph()) {
1189	NGHolder *h1 = r1.graph();
1190	NGHolder *h2 = r2.graph();
1191	CharReach stop1 = findStopAlphabet(*h1, SOM_NONE);
1192	CharReach stop2 = findStopAlphabet(*h2, SOM_NONE);
1193	CharReach stopboth = stop1 & stop2;
1194	DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n", stop1.count(),
1195	stop2.count(), stopboth.count());
1196	if (stopboth.count() < `10`
1197	&& (stop1.count() > `10` \|\| stop2.count() > `10`)) {
1198	DEBUG_PRINTF("skip merge, would kill stop alphabet\n");
1199	return false;
1200	}
1201	size_t maxstop = max(stop1.count(), stop2.count());
1202	if (maxstop > `200` && stopboth.count() < `200`) {
1203	DEBUG_PRINTF("skip merge, would reduce stop alphabet\n");
1204	return false;
1205	}
1206	}
1207
1208	/ Rechecking that the targets are compatible, as we may have already*
1209	* merged new states into r1 or r2 and we need to verify that this
1210	* candidate is still ok. */
1211	if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) {
1212	return false;
1213	}
1214
1215	if (!build.cc.streaming
1216	&& !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) {
1217	return false;
1218	}
1219
1220	return mergeLeftfixPair(build, r1, r2, targets_1, targets_2);
1221	}
1222
1223	static
1224	bool nfaHasNarrowStart(const NGHolder &g) {
1225	if (out_degree(g.startDs, g) > `1`) {
1226	return false; // unanchored
1227	}
1228
1229	CharReach cr;
1230
1231	for (auto v : adjacent_vertices_range(g.start, g)) {
1232	if (v == g.startDs) {
1233	continue;
1234	}
1235	cr \|= g [v].char_reach;
1236	}
1237	return cr.count() <= NARROW_START_MAX;
1238	}
1239
1240	static
1241	bool nfaHasFiniteMaxWidth(const NGHolder &g) {
1242	return findMaxWidth(g).is_finite();
1243	}
1244
1245	static
1246	bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) {
1247	if (!proper_out_degree(h.startDs, h)) {
1248	return false;
1249	}
1250
1251	assert(!is_triggered(h));
1252
1253	NGHolder h_temp;
1254	cloneHolder(h_temp, h);
1255
1256	vector<BoundedRepeatData> repeats;
1257	bool suitable_for_sds_reforming = false;
1258	const map<u32, u32> fixed_depth_tops; / not relevant for cfa check /
1259	const map<u32, vector<vector<CharReach>>> triggers; / not for cfa check /
1260	const bool simple_model_selection = true; // FIRST is considered simple
1261	analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true,
1262	simple_model_selection, grey, &suitable_for_sds_reforming);
1263
1264	return suitable_for_sds_reforming;
1265	}
1266
1267	static
1268	u32 commonPrefixLength(left_id &r1, left_id &r2) {
1269	if (r1.graph() && r2.graph()) {
1270	return commonPrefixLength(r1.graph(), r2.graph());
1271	} else if (r1.castle() && r2.castle()) {
1272	return min(findMinWidth(r1.castle()), findMinWidth(r2.castle()));
1273	}
1274	return `0`;
1275	}
1276
1277	namespace {
1278	struct MergeKey {
1279	MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) :
1280	parents (std::move(parents_in)) {
1281
1282	// We want to distinguish prefixes (but not infixes) on whether they
1283	// have a narrow start or max width.
1284	if (left.graph() && !is_triggered(*left.graph())) {
1285	const NGHolder &h = *left.graph();
1286	narrowStart = nfaHasNarrowStart(h);
1287	hasMaxWidth = nfaHasFiniteMaxWidth(h);
1288	} else {
1289	narrowStart = false;
1290	hasMaxWidth = false;
1291	}
1292
1293	if (left.castle()) {
1294	/ castles should have a non-empty reach /
1295	assert(left.castle()->reach().any());
1296	castle_cr = left.castle()->reach();
1297	} else {
1298	assert(left.graph());
1299	}
1300	}
1301
1302	bool operator<(const MergeKey &b) const {
1303	const MergeKey &a = *this;
1304	ORDER_CHECK(narrowStart);
1305	ORDER_CHECK(hasMaxWidth);
1306	ORDER_CHECK(castle_cr);
1307	ORDER_CHECK(parents);
1308	return false;
1309	}
1310
1311	// NOTE: these two bool discriminators are only used for prefixes, not
1312	// infixes.
1313	bool narrowStart;
1314	bool hasMaxWidth;
1315	CharReach castle_cr; / empty for graphs, reach (non-empty) for castles. /
1316
1317	flat_set<RoseVertex> parents;
1318	};
1319	}
1320
1321	template <typename T>
1322	static
1323	void chunk(vector<T> in, vector<vector<T>> *out, size_t chunk_size) {
1324	if (in.size() <= chunk_size) {
1325	out->push_back(std::move(in));
1326	return;
1327	}
1328
1329	out->push_back(vector<T>());
1330	out->back().reserve(chunk_size);
1331	for (const auto &t : in) {
1332	if (out->back().size() >= chunk_size) {
1333	out->push_back(vector<T>());
1334	out->back().reserve(chunk_size);
1335	}
1336	out->back().push_back(std::move(t));
1337	}
1338	}
1339
1340	static
1341	insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) {
1342	insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
1343	for (auto v : vertices_range(g)) {
1344	const auto &left = g [v].left;
1345	if (!left) {
1346	continue;
1347	}
1348	assert(contains(all_reports(left), left.leftfix_report));
1349	eng_verts [left].push_back(v);
1350	}
1351
1352	return eng_verts;
1353	}
1354
1355	/**
1356	* This pass attempts to merge prefix/infix engines which share a common set of
1357	* parent vertices.
1358	*
1359	* Engines are greedily merged pairwise by this process based on a priority
1360	* queue keyed off the common prefix length.
1361	*
1362	* Engines are not merged if the lags are not compatible or if it would damage
1363	* the stop alphabet.
1364	*
1365	* Infixes:
1366	* - It is expected that when this is run all infixes are still at the single
1367	* top stage as we have not yet merged unrelated infixes together. After
1368	* execution, castles may have multiple (but equivalent) tops.
1369	*
1370	* Prefixes:
1371	* - transient prefixes are not considered.
1372	* - with a max width or a narrow start are kept segregated by
1373	* this phase and can only be merged with similar infixes.
1374	* - in block mode, merges are only performed if literal sets are the same.
1375	* - merges are not considered in cases where dot star start state will be
1376	* reformed to optimise a leading repeat.
1377	*/
1378	void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
1379	if (!build.cc.grey.mergeRose) {
1380	return;
1381	}
1382	assert(!hasOrphanedTops(build));
1383
1384	RoseGraph &g = build.g;
1385
1386	DEBUG_PRINTF("-----\n");
1387	DEBUG_PRINTF("entry\n");
1388	DEBUG_PRINTF("-----\n");
1389
1390	auto eng_verts = get_eng_verts(g);
1391
1392	map<MergeKey, vector<left_id>> engine_groups;
1393	for (const auto &e : eng_verts) {
1394	const left_id &left = e.first;
1395	const auto &verts = e.second;
1396	// Only non-transient for the moment.
1397	if (contains(build.transient, left)) {
1398	continue;
1399	}
1400
1401	// No forced McClellan or Haig infix merges.
1402	if (left.dfa() \|\| left.haig()) {
1403	continue;
1404	}
1405	assert(left.graph() \|\| left.castle());
1406
1407	if (left.graph()) {
1408	const NGHolder &h = *left.graph();
1409	/ we should not have merged yet /
1410	assert(!is_triggered(h) \|\| onlyOneTop(h));
1411
1412	if (hasReformedStartDotStar(h, build.cc.grey)) {
1413	continue; // preserve the optimisation of the leading repeat
1414	}
1415	} else {
1416	assert(left.castle());
1417
1418	if (!build.cc.grey.allowCastle) {
1419	DEBUG_PRINTF("castle merging disallowed by greybox\n");
1420	continue;
1421	}
1422	}
1423
1424	// We collapse the anchored root into the root vertex when calculating
1425	// parents, so that we can merge differently-anchored prefix roses
1426	// together. (Prompted by UE-2100)
1427
1428	flat_set<RoseVertex> parents;
1429	for (RoseVertex v : verts) {
1430	insert(&parents, inv_adjacent_vertices_range(v, g));
1431	}
1432
1433	if (contains(parents, build.anchored_root)) {
1434	parents.erase(build.anchored_root);
1435	parents.insert(build.root);
1436	}
1437
1438	assert(!parents.empty());
1439
1440	#ifndef _WIN32
1441	engine_groups [MergeKey (left, parents)].push_back(left);
1442	#else
1443	// On windows, when passing MergeKey object into map 'engine_groups',
1444	// it will not be copied, but will be freed along with
1445	// engine_groups.clear().
1446	// If we construct MergeKey object on the stack, it will be destructed
1447	// on its life cycle ending, then on engine_groups.clear(), which
1448	// will cause is_block_type_valid() assertion error in MergeKey
1449	// destructor.
1450	MergeKey mk = new* MergeKey(left, parents);
1451	engine_groups[*mk].push_back(left);
1452	#endif
1453	}
1454
1455	vector<vector<left_id>> chunks;
1456	for (auto &raw_group : engine_groups \| map_values) {
1457	chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX);
1458	}
1459	engine_groups.clear();
1460
1461	DEBUG_PRINTF("chunked roses into %zu groups\n", chunks.size());
1462
1463	for (auto &roses : chunks) {
1464	if (roses.size() < `2`) {
1465	continue;
1466	}
1467	// All pairs on the prio queue.
1468	u32 tie_breaker = `0`;
1469	priority_queue<RoseMergeCandidate> pq;
1470	for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) {
1471	left_id r1 = *it;
1472	const vector<RoseVertex> &targets_1 = eng_verts [r1];
1473
1474	for (auto jt = next(it); jt != ite; ++jt) {
1475	left_id r2 = *jt;
1476
1477	/ we should have already split on engine types and reach /
1478	assert(!r1.castle() == !r2.castle());
1479	assert(!r1.graph() == !r2.graph());
1480	assert(!r1.castle()
1481	\|\| r1.castle()->reach() == r2.castle()->reach());
1482
1483	const vector<RoseVertex> &targets_2 = eng_verts [r2];
1484	if (!checkVerticesOkForLeftfixMerge(build, targets_1,
1485	targets_2)) {
1486	continue; // No point queueing unmergeable cases.
1487	}
1488
1489	u32 cpl = commonPrefixLength(r1, r2);
1490	pq.push(RoseMergeCandidate (r1, r2, cpl, tie_breaker++));
1491	}
1492	}
1493
1494	DEBUG_PRINTF("merge queue has %zu entries\n", pq.size());
1495
1496	while (!pq.empty()) {
1497	left_id r1 = pq.top().r1;
1498	left_id r2 = pq.top().r2;
1499	DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n",
1500	r1.graph(), r2.graph(), pq.top().cpl, pq.top().states);
1501	pq.pop();
1502	vector<RoseVertex> &targets_1 = eng_verts [r1];
1503	vector<RoseVertex> &targets_2 = eng_verts [r2];
1504	if (mergeLeftVL_tryMergeCandidate(build, r1, targets_1, r2,
1505	targets_2)) {
1506	insert(&targets_2, targets_2.end(), targets_1);
1507	targets_1.clear();
1508	}
1509	}
1510	}
1511
1512	DEBUG_PRINTF("-----\n");
1513	DEBUG_PRINTF("exit\n");
1514	DEBUG_PRINTF("-----\n");
1515	assert(!hasOrphanedTops(build));
1516	}
1517
1518	namespace {
1519
1520	/**
1521	* Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path.
1522	*/
1523	struct DedupeLeftKey {
1524	DedupeLeftKey(const RoseBuildImpl &build,
1525	flat_set<pair<size_t, u32>> preds_in, const left_id &left)
1526	: left_hash(hashLeftfix(left)), preds (move(preds_in)),
1527	transient(contains(build.transient, left)) {
1528	}
1529
1530	bool operator<(const DedupeLeftKey &b) const {
1531	return tie(left_hash, preds, transient)
1532	< tie(b.left_hash, b.preds, b.transient);
1533	}
1534
1535	private:
1536	/* Quick hash of the leftfix itself. Must be identical for a given pair of*
1537	* graphs if is_equal would return true. */
1538	size_t left_hash;
1539
1540	/* For each in-edge, the pair of (parent index, edge top). /
1541	flat_set<pair<size_t, u32>> preds;
1542
1543	/* We don't want to combine transient with non-transient. /
1544	bool transient;
1545	};
1546
1547	} // namespace
1548
1549	static
1550	flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) {
1551	flat_set<pair<size_t, u32>> preds;
1552	for (const auto &e : in_edges_range(v, g)) {
1553	preds.emplace(g [source(e, g)].index, g [e].rose_top);
1554	}
1555	return preds;
1556	}
1557
1558	/**
1559	* This is a generalisation of \ref dedupeLeftfixes which relaxes two
1560	* restrictions: multiple predecessor roles are allowed and the delay used by
1561	* each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes,
1562	* the leftfixes' successor vertices are first grouped to reduce the number of
1563	* potential candidates - the grouping in this case is by the set of
1564	* predecessor roles with their associated top events. For the dedupe to be
1565	* possible, it is required that:
1566	*
1567	* 1. the nfa graphs with respect to the relevant reports are identical
1568	* 2. the nfa graphs are triggered by the same roles with same events (ensured
1569	* by the initial grouping pass)
1570	* 3. all the successor roles of either graph can inspect the combined leftfix
1571	* without advancing the state of the leftfix past the point that another
1572	* successor may want to inspect it; the overlap relationships between the
1573	* involved literals are examined to ensure that this property holds.
1574	*
1575	* Note: this is unable to dedupe when delayed literals are involved unlike
1576	* dedupeLeftfixes.
1577	*/
1578	void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
1579	DEBUG_PRINTF("entry\n");
1580
1581	RoseGraph &g = build.g;
1582	auto eng_verts = get_eng_verts(g);
1583
1584	map<DedupeLeftKey, vector<left_id>> engine_groups;
1585	for (const auto &e : eng_verts) {
1586	const left_id &left = e.first;
1587	const auto &verts = e.second;
1588
1589	/ There should only be one report on an engine as no merges have*
1590	* happened yet. (aside from eod prefixes) */
1591	if (all_reports(left).size() != `1`) {
1592	assert(any_of_in(adjacent_vertices_range(verts.front(), g),
1593	[&](RoseVertex w) { return g[w].eod_accept; }));
1594	continue;
1595	}
1596
1597	if (left.haig()) {
1598	/ TODO: allow deduping of identical haigs /
1599	continue;
1600	}
1601
1602	if (left.graph()) {
1603	/ we should not have merged yet /
1604	assert(!is_triggered(left.graph()) \|\| onlyOneTop(left.graph()));
1605	}
1606
1607	auto preds = get_pred_tops(verts.front(), g);
1608	for (RoseVertex v : verts) {
1609	if (preds != get_pred_tops(v, g)) {
1610	DEBUG_PRINTF("distinct pred sets\n");
1611	continue;
1612	}
1613	}
1614	engine_groups [DedupeLeftKey (build, move(preds), left)].push_back(left);
1615	}
1616
1617	/ We don't bother chunking as we expect deduping to be successful if the*
1618	* hashes match */
1619
1620	for (auto &group : engine_groups \| map_values) {
1621	DEBUG_PRINTF("group of %zu roses\n", group.size());
1622
1623	if (group.size() < `2`) {
1624	continue;
1625	}
1626
1627	for (auto it = group.begin(); it != group.end(); ++it) {
1628	left_id r1 = *it;
1629	vector<RoseVertex> &verts1 = eng_verts [r1];
1630	assert(!verts1.empty()); / cleared engines should be behind us /
1631
1632	assert(all_reports(r1).size() == `1`);
1633	ReportID r1_report = *all_reports(r1).begin();
1634
1635	for (auto jt = next(it); jt != group.end(); ++jt) {
1636	left_id r2 = *jt;
1637	vector<RoseVertex> &verts2 = eng_verts [r2];
1638	assert(!verts2.empty());
1639	assert(all_reports(r2).size() == `1`);
1640	ReportID r2_report = *all_reports(r2).begin();
1641
1642	if (!is_equal(r1, r1_report, r2, r2_report)) {
1643	continue;
1644	}
1645
1646	if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) {
1647	continue;
1648	}
1649
1650	DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph());
1651
1652	// Replace r1 with r2.
1653
1654	for (auto v : verts1) {
1655	DEBUG_PRINTF("replacing report %u with %u on %zu\n",
1656	r2_report, r1_report, g[v].index);
1657	u32 orig_lag = g [v].left.lag;
1658	g [v].left = g [verts2.front()].left;
1659	g [v].left.lag = orig_lag;
1660	}
1661
1662	insert(&verts2, verts2.end(), verts1);
1663	verts1.clear();
1664
1665	/ remove stale entry from transient set, if present /
1666	build.transient.erase(r1);
1667
1668	break;
1669	}
1670	}
1671	}
1672	}
1673
1674	static
1675	u32 findUnusedTop(const flat_set<u32> &tops) {
1676	u32 i = `0`;
1677	while (contains(tops, i)) {
1678	i++;
1679	}
1680	return i;
1681	}
1682
1683	// Replace top 't' on edges with new top 'u'.
1684	static
1685	void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) {
1686	for (const auto &e : out_edges_range(h.start, h)) {
1687	NFAVertex v = target(e, h);
1688	if (v == h.startDs) {
1689	continue;
1690	}
1691	flat_set<u32> new_tops;
1692	for (u32 t : h [e].tops) {
1693	DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t);
1694	new_tops.insert(top_mapping.at(t));
1695	}
1696	h [e].tops = std::move(new_tops);
1697	}
1698	}
1699
1700	static
1701	bool setDistinctTops(NGHolder &h1, const NGHolder &h2,
1702	map<u32, u32> &top_mapping) {
1703	flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2);
1704
1705	DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(),
1706	tops2.size());
1707
1708	// If our tops don't intersect, we're OK to merge with no changes.
1709	if (!has_intersection(tops1, tops2)) {
1710	DEBUG_PRINTF("tops don't intersect\n");
1711	return true;
1712	}
1713
1714	// Otherwise, we have to renumber the tops in h1 so that they don't overlap
1715	// with the tops in h2.
1716	top_mapping.clear();
1717	for (u32 t : tops1) {
1718	u32 u = findUnusedTop(tops2);
1719	DEBUG_PRINTF("replacing top %u with %u in h1\n", t, u);
1720	top_mapping.insert(make_pair(t, u));
1721	assert(!contains(tops2, u));
1722	tops2.insert(u);
1723	}
1724
1725	replaceTops(h1, top_mapping);
1726	return true;
1727	}
1728
1729	bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
1730	const deque<RoseVertex> &verts1) {
1731	map<u32, u32> top_mapping;
1732	if (!setDistinctTops(h1, h2, top_mapping)) {
1733	return false;
1734	}
1735
1736	if (top_mapping.empty()) {
1737	return true; // No remapping necessary.
1738	}
1739
1740	for (auto v : verts1) {
1741	DEBUG_PRINTF("vertex %zu\n", g[v].index);
1742	assert(!g[v].left.haig);
1743	assert(!g[v].left.dfa);
1744	for (const auto &e : in_edges_range(v, g)) {
1745	u32 t = g [e].rose_top;
1746	DEBUG_PRINTF("t=%u\n", t);
1747	assert(contains(top_mapping, t));
1748	g [e].rose_top = top_mapping [t];
1749	DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n",
1750	g[source(e, g)].index, g[target(e, g)].index, t,
1751	top_mapping[t]);
1752	}
1753	}
1754
1755	return true;
1756	}
1757
1758	static
1759	bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
1760	const deque<RoseVertex> &verts1) {
1761	map<u32, u32> top_mapping;
1762	if (!setDistinctTops(h1, h2, top_mapping)) {
1763	return false;
1764	}
1765
1766	if (top_mapping.empty()) {
1767	return true; // No remapping necessary.
1768	}
1769
1770	for (auto v : verts1) {
1771	DEBUG_PRINTF("vertex %zu\n", g[v].index);
1772	u32 t = g [v].suffix.top;
1773	assert(contains(top_mapping, t));
1774	g [v].suffix.top = top_mapping [t];
1775	}
1776
1777	return true;
1778	}
1779
1780	/* \brief Estimate the number of accel states in the given graph when built as*
1781	* an NFA.
1782	*
1783	* (The easiest way to estimate something like this is to actually build it:
1784	* the criteria for NFA acceleration are quite complicated and buried in
1785	* limex_compile.)
1786	*/
1787	static
1788	u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) {
1789	return countAccelStates(h, &tbi.rm, tbi.cc);
1790	}
1791
1792	static
1793	void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) {
1794	RoseGraph &g = tbi.g;
1795	DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size());
1796
1797	// We track the number of accelerable states for each graph in a map and
1798	// only recompute them when the graph is modified.
1799	unordered_map<left_id, u32> accel_count;
1800	for (const auto &rose : roses) {
1801	assert(rose.graph()->kind == NFA_INFIX);
1802	accel_count [rose] = estimatedAccelStates(tbi, *rose.graph());
1803	}
1804
1805	for (auto it = roses.begin(); it != roses.end(); ++it) {
1806	left_id r1 = *it;
1807	const deque<RoseVertex> &verts1 = roses.vertices(r1);
1808
1809	deque<left_id> merged;
1810	for (auto jt = next(it); jt != roses.end(); ++jt) {
1811	left_id r2 = *jt;
1812	const deque<RoseVertex> &verts2 = roses.vertices(r2);
1813
1814	DEBUG_PRINTF("consider merging rose %p (%zu verts) "
1815	"with %p (%zu verts)\n",
1816	r1.graph(), verts1.size(), r2.graph(), verts2.size());
1817
1818	u32 accel1 = accel_count [r1];
1819	if (accel1 >= NFA_MAX_ACCEL_STATES) {
1820	DEBUG_PRINTF("h1 has hit max accel\n");
1821	break; // next h1
1822	}
1823
1824	u32 accel2 = accel_count [r2];
1825	if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
1826	DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
1827	"accel2=%u)\n",
1828	accel1, accel2);
1829	continue; // next h2
1830	}
1831
1832	if (!mergeableRoseVertices(tbi, verts1, verts2)) {
1833	DEBUG_PRINTF("not mergeable\n");
1834	continue; // next h2
1835	}
1836
1837	// Attempt to merge h2 into h1.
1838
1839	NGHolder victim;
1840	cloneHolder(victim, *r2.graph());
1841
1842	// Store a copy of the in-edge properties in case we have to roll
1843	// back.
1844	map<RoseEdge, RoseEdgeProps> edge_props;
1845	for (auto v : verts2) {
1846	for (const auto &e : in_edges_range(v, g)) {
1847	edge_props [e] = g [e];
1848	}
1849	}
1850
1851	if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) {
1852	DEBUG_PRINTF("can't set distinct tops\n");
1853	continue; // next h2
1854	}
1855
1856	assert(victim.kind == r1.graph()->kind);
1857	assert(!generates_callbacks(*r1.graph()));
1858	if (!mergeNfaPair(victim, r1.graph(), nullptr*, tbi.cc)) {
1859	DEBUG_PRINTF("merge failed\n");
1860	// Roll back in-edge properties.
1861	for (const auto &m : edge_props) {
1862	g [m.first] = m.second;
1863	}
1864	continue; // next h2
1865	}
1866
1867	// Update h2's roses to point to h1 now
1868	shared_ptr<NGHolder> winner = g [verts1.front()].left.graph;
1869	for (auto v : verts2) {
1870	g [v].left.graph = winner;
1871	}
1872	roses.insert(r1, verts2);
1873
1874	merged.push_back(r2);
1875
1876	if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) {
1877	DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
1878	num_vertices(*winner));
1879	break; // next h1
1880	}
1881
1882	// Update h1's accel count estimate.
1883	accel_count [r1] = estimatedAccelStates(tbi, *winner);
1884	}
1885
1886	DEBUG_PRINTF("%zu roses merged\n", merged.size());
1887	roses.erase_all(merged.begin(), merged.end());
1888	}
1889	}
1890
1891	/**
1892	* This pass attempts to merge prefix/infix engines with a small number of
1893	* vertices together into larger engines. The engines must not be have a
1894	* reformed start dot star (due to a leading repeat) nor an infix LBR. Engines
1895	* that have compatible lag are greedily grouped such that they remain
1896	* accelerable and only have a small number of states. Note: if a role has an
1897	* infix with multiple trigger vertices, the role will be left unchanged by this
1898	* pass and will remain using an unmerged graph.
1899	*/
1900	void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
1901	DEBUG_PRINTF("entry\n");
1902
1903	if (!tbi.cc.grey.mergeRose \|\| !tbi.cc.grey.roseMultiTopRoses) {
1904	return;
1905	}
1906
1907	RoseGraph &g = tbi.g;
1908
1909	LeftfixBouquet nfa_leftfixes;
1910
1911	for (auto v : vertices_range(g)) {
1912	if (!g [v].left) {
1913	continue;
1914	}
1915
1916	// Handle single-parent infixes only.
1917	if (tbi.isRootSuccessor(v)) {
1918	continue;
1919	}
1920
1921	left_id left(g [v].left);
1922
1923	// Only non-transient for the moment.
1924	if (contains(tbi.transient, left)) {
1925	continue;
1926	}
1927
1928	// No DFAs or Haigs right now.
1929	if (left.dfa() \|\| left.haig()) {
1930	continue;
1931	}
1932
1933	// Castles are handled by a different pass.
1934	if (left.castle()) {
1935	continue;
1936	}
1937
1938	assert(left.graph());
1939	NGHolder &h = *left.graph();
1940
1941	/ Ensure that kind on the graph is correct /
1942	assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX));
1943
1944	if (hasReformedStartDotStar(h, tbi.cc.grey)) {
1945	/ We would lose optimisations of the leading repeat by merging. /
1946	continue;
1947	}
1948
1949	// Small roses only.
1950	if (num_vertices(h) > small_rose_threshold(tbi.cc)) {
1951	continue;
1952	}
1953
1954	nfa_leftfixes.insert(left, v);
1955	}
1956
1957	deque<LeftfixBouquet> leftfix_groups;
1958	chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX);
1959	nfa_leftfixes.clear();
1960	DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n",
1961	leftfix_groups.size());
1962
1963	for (auto &group : leftfix_groups) {
1964	mergeNfaLeftfixes(tbi, group);
1965	}
1966	}
1967
1968	static
1969	void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands,
1970	insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) {
1971	/ caller must have already ensured that candidates have the same reach /
1972	RoseGraph &g = build.g;
1973	DEBUG_PRINTF("%zu castle leftfix merge candidates\n", cands.size());
1974
1975	for (auto it = cands.begin(); it != cands.end(); ++it) {
1976	left_id &cand_1 = *it;
1977	vector<RoseVertex> &verts_1 = eng_verts [cand_1];
1978	if (verts_1.empty()) {
1979	continue;
1980	}
1981
1982	for (auto jt = next(it); jt != cands.end(); ++jt) {
1983	const left_id &cand_2 = *jt;
1984	vector<RoseVertex> &verts_2 = eng_verts [cand_2];
1985	if (verts_2.empty()) {
1986	continue;
1987	}
1988
1989	assert(cand_1.castle()->reach() == cand_2.castle()->reach());
1990
1991	if (!checkVerticesOkForLeftfixMerge(build, verts_1, verts_2)) {
1992	DEBUG_PRINTF("not mergeable\n");
1993	continue; // next cand_2
1994	}
1995
1996	DEBUG_PRINTF("castle1=%p (size %zu)\n", cand_1.castle(),
1997	cand_1.castle()->repeats.size());
1998	DEBUG_PRINTF("castle2=%p (size %zu)\n", cand_2.castle(),
1999	cand_2.castle()->repeats.size());
2000
2001	map<u32, u32> top_map;
2002	if (!mergeCastle(cand_1.castle(), cand_2.castle(), top_map)) {
2003	DEBUG_PRINTF("couldn't merge\n");
2004	continue; // next cand_2
2005	}
2006
2007	// Update castle2's roses to point to castle1 now.
2008	shared_ptr<CastleProto> winner = g [verts_1.front()].left.castle;
2009	for (auto v : verts_2) {
2010	assert(g[v].left.castle.get() == cand_2.castle());
2011	g [v].left.castle = winner;
2012	for (const auto &e : in_edges_range(v, g)) {
2013	g [e].rose_top = top_map.at(g [e].rose_top);
2014	}
2015	}
2016
2017	insert(&verts_1, verts_1.end(), verts_2);
2018	verts_2.clear();
2019	}
2020	}
2021	}
2022
2023	/**
2024	* Merges castles with the same reach together regardless of where in the rose
2025	* graph they are. Note: there is no requirement for the castles to have common
2026	* parent or target vertices.
2027	*
2028	* There are no heuristics for reducing block mode merges as castle speed
2029	* mainly depends on the reach being scanned.
2030	*/
2031	void mergeCastleLeftfixes(RoseBuildImpl &build) {
2032	DEBUG_PRINTF("entry\n");
2033
2034	if (!build.cc.grey.mergeRose \|\| !build.cc.grey.roseMultiTopRoses
2035	\|\| !build.cc.grey.allowCastle) {
2036	return;
2037	}
2038
2039	RoseGraph &g = build.g;
2040
2041	insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
2042
2043	for (auto v : vertices_range(g)) {
2044	if (!g [v].left.castle) {
2045	continue;
2046	}
2047
2048	// Handle infixes only.
2049	if (build.isRootSuccessor(v)) {
2050	continue;
2051	}
2052
2053	eng_verts [g [v].left].push_back(v);
2054	}
2055
2056	map<CharReach, vector<left_id>> by_reach;
2057	for (const auto &left : eng_verts \| map_keys) {
2058	by_reach [left.castle()->reach()].push_back(left);
2059	}
2060
2061	vector<vector<left_id>> chunks;
2062	for (auto &raw_group : by_reach \| map_values) {
2063	chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX);
2064	}
2065	by_reach.clear();
2066
2067	DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size());
2068
2069	for (auto &chunk : chunks) {
2070	mergeCastleChunk(build, chunk, eng_verts);
2071	}
2072	}
2073
2074	static
2075	void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes,
2076	const bool acyclic) {
2077	RoseGraph &g = tbi.g;
2078
2079	DEBUG_PRINTF("group has %zu suffixes\n", suffixes.size());
2080
2081	// If this isn't an acyclic case, we track the number of accelerable states
2082	// for each graph in a map and only recompute them when the graph is
2083	// modified.
2084	unordered_map<suffix_id, u32> accel_count;
2085	if (!acyclic) {
2086	for (const auto &suffix : suffixes) {
2087	assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX);
2088	accel_count [suffix] = estimatedAccelStates(tbi, *suffix.graph());
2089	}
2090	}
2091
2092	for (auto it = suffixes.begin(); it != suffixes.end(); ++it) {
2093	suffix_id s1 = *it;
2094	const deque<RoseVertex> &verts1 = suffixes.vertices(s1);
2095	assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX);
2096
2097	// Caller should ensure that we don't propose merges of graphs that are
2098	// already too big.
2099	assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc));
2100
2101	deque<suffix_id> merged;
2102	for (auto jt = next(it); jt != suffixes.end(); ++jt) {
2103	suffix_id s2 = *jt;
2104	const deque<RoseVertex> &verts2 = suffixes.vertices(s2);
2105	assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX);
2106
2107	if (!acyclic) {
2108	u32 accel1 = accel_count [s1];
2109	if (accel1 >= NFA_MAX_ACCEL_STATES) {
2110	DEBUG_PRINTF("h1 has hit max accel\n");
2111	break; // next h1
2112	}
2113
2114	u32 accel2 = accel_count [s2];
2115	if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
2116	DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
2117	"accel2=%u)\n",
2118	accel1, accel2);
2119	continue; // next h2
2120	}
2121	}
2122
2123	// Attempt to merge h2 into h1.
2124
2125	NGHolder victim;
2126	cloneHolder(victim, *s2.graph());
2127
2128	// Store a copy of the suffix tops in case we have to roll back.
2129	map<RoseVertex, u32> old_tops;
2130	for (auto v : verts2) {
2131	old_tops [v] = g [v].suffix.top;
2132	}
2133
2134	if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) {
2135	DEBUG_PRINTF("can't set distinct tops\n");
2136	continue; // next h2
2137	}
2138
2139	if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) {
2140	DEBUG_PRINTF("merge failed\n");
2141	// Roll back in-edge properties.
2142	for (const auto &m : old_tops) {
2143	g [m.first].suffix.top = m.second;
2144	}
2145	continue; // next h2
2146	}
2147
2148	// Update h2's roses to point to h1 now
2149	shared_ptr<NGHolder> winner = g [verts1.front()].suffix.graph;
2150	for (auto v : verts2) {
2151	g [v].suffix.graph = winner;
2152	}
2153	suffixes.insert(s1, verts2);
2154	merged.push_back(s2);
2155
2156	if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) {
2157	DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
2158	num_vertices(*s1.graph()));
2159	break; // next h1
2160	}
2161
2162	if (!acyclic) {
2163	// Update h1's accel count estimate.
2164	accel_count [s1] = estimatedAccelStates(tbi, *s1.graph());
2165	}
2166	}
2167
2168	DEBUG_PRINTF("%zu suffixes merged\n", merged.size());
2169	suffixes.erase_all(merged.begin(), merged.end());
2170	}
2171	}
2172
2173	/**
2174	* This merge pass combines suffixes from unrelated roles into a single
2175	* suffix with multiple top events in order to distinguish the triggers
2176	* from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
2177	* while mergeSmallSuffixes only considers small suffixes. The merges will
2178	* group roles with suffixes in the graph into clusters of at most
2179	* \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
2180	* suffixes and attempting to pairwise merge it with another member. Merges
2181	* will fail if the result is not implementable, requires too many distinct top
2182	* events, or if it losses the ability to be accelerated. The merge will modify
2183	* the existing suffix graph of the one member (g1), the other member updates
2184	* it graph to refer to g1 instead of its previous graph (g2) and use the new
2185	* tops created. Other roles may have been sharing g1 - these are unaffected by
2186	* the change as the existing top events are left untouched. Other roles using
2187	* g2 are also unaffected as g2 will continue to exist until while it has any
2188	* roles triggering it.
2189	*
2190	* Note: suffixes destined for the LBR are not considered for these merges as
2191	* the LBR can only handle a single repeat and this type of repeat is ideally
2192	* handled outside of an NFA or DFA.
2193	*/
2194	void mergeAcyclicSuffixes(RoseBuildImpl &tbi) {
2195	DEBUG_PRINTF("entry\n");
2196
2197	if (!tbi.cc.grey.mergeSuffixes) {
2198	return;
2199	}
2200
2201	SuffixBouquet suffixes;
2202
2203	RoseGraph &g = tbi.g;
2204
2205	for (auto v : vertices_range(g)) {
2206	shared_ptr<NGHolder> h = g [v].suffix.graph;
2207	if (!h \|\| tbi.isInETable(v)) {
2208	continue;
2209	}
2210
2211	assert(!g[v].suffix.haig);
2212
2213	if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) {
2214	continue;
2215	}
2216
2217	if (!isAcyclic(*h)) {
2218	continue;
2219	}
2220
2221	suffixes.insert(g [v].suffix, v);
2222	}
2223
2224	deque<SuffixBouquet> suff_groups;
2225	chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
2226	DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
2227	suff_groups.size());
2228	suffixes.clear();
2229
2230	for (auto &group : suff_groups) {
2231	mergeSuffixes(tbi, group, true);
2232	}
2233	}
2234
2235	/**
2236	* This merge pass combines suffixes from unrelated roles into a single
2237	* suffix with multiple top events in order to distinguish the triggers
2238	* from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
2239	* while mergeSmallSuffixes only considers small suffixes. The merges will
2240	* group roles with suffixes in the graph into clusters of at most
2241	* \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
2242	* suffixes and attempting to pairwise merge it with another member. Merges
2243	* will fail if the result is not implementable, requires too many distinct top
2244	* events, or if it losses the ability to be accelerated. The merge will modify
2245	* the existing suffix graph of the one member (g1), the other member updates
2246	* it graph to refer to g1 instead of its previous graph (g2) and use the new
2247	* tops created. Other roles may have been sharing g1 - these are unaffected by
2248	* the change as the existing top events are left untouched. Other roles using
2249	* g2 are also unaffected as g2 will continue to exist until while it has any
2250	* roles triggering it.
2251	*
2252	* Note: suffixes destined for the LBR are not considered for these merges as
2253	* the LBR can only handle a single repeat and this type of repeat is ideally
2254	* handled outside of an NFA or DFA.
2255	*/
2256	void mergeSmallSuffixes(RoseBuildImpl &tbi) {
2257	DEBUG_PRINTF("entry\n");
2258
2259	if (!tbi.cc.grey.mergeSuffixes) {
2260	return;
2261	}
2262
2263	RoseGraph &g = tbi.g;
2264	SuffixBouquet suffixes;
2265
2266	for (auto v : vertices_range(g)) {
2267	shared_ptr<NGHolder> h = g [v].suffix.graph;
2268	if (!h \|\| tbi.isInETable(v)) {
2269	continue;
2270	}
2271	assert(!g[v].suffix.haig);
2272
2273	// Leave acyclics out for the moment.
2274	if (isAcyclic(*h)) {
2275	continue;
2276	}
2277
2278	// Small-ish suffixes only.
2279	if (num_vertices(*h) > `32`) {
2280	continue;
2281	}
2282
2283	suffixes.insert(g [v].suffix, v);
2284	}
2285
2286	deque<SuffixBouquet> suff_groups;
2287	chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
2288	DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
2289	suff_groups.size());
2290	suffixes.clear();
2291
2292	for (auto &group : suff_groups) {
2293	mergeSuffixes(tbi, group, false);
2294	}
2295	}
2296
2297	static
2298	void removeDeadOutfixes(vector<OutfixInfo> &outfixes) {
2299	auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); };
2300	outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead),
2301	end(outfixes));
2302	}
2303
2304	static
2305	void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) {
2306	assert(!winner.is_dead());
2307
2308	winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth);
2309	winner.minWidth = min(winner.minWidth, victim.minWidth);
2310	winner.maxWidth = max(winner.maxWidth, victim.maxWidth);
2311	winner.maxOffset = max(winner.maxOffset, victim.maxOffset);
2312	mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info);
2313
2314	// This outfix can be ignored in small block mode if both were. The dedupe
2315	// layer at runtime will protect us from extra matches if only one was in
2316	// the small block matcher.
2317	winner.in_sbmatcher &= victim.in_sbmatcher;
2318	}
2319
2320	static
2321	map<NGHolder , NGHolder > chunkedNfaMerge(RoseBuildImpl &build,
2322	const vector<NGHolder *> &nfas) {
2323	map<NGHolder , NGHolder > merged;
2324
2325	vector<NGHolder *> batch;
2326	for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) {
2327	batch.push_back(*it);
2328	assert((*it)->kind == NFA_OUTFIX);
2329	if (batch.size() == MERGE_GROUP_SIZE_MAX \|\| next(it) == ite) {
2330	auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc);
2331	insert(&merged, batch_merged);
2332	batch.clear();
2333	}
2334	}
2335
2336	return merged;
2337	}
2338
2339	static
2340	void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) {
2341	DEBUG_PRINTF("merging %zu nfas\n", nfas.size());
2342	if (nfas.size() < `2`) {
2343	return;
2344	}
2345
2346	vector<OutfixInfo> &outfixes = tbi.outfixes;
2347
2348	map<NGHolder *, size_t> nfa_mapping;
2349	for (size_t i = `0`; i < outfixes.size(); i++) {
2350	auto *holder = outfixes [i].holder();
2351	if (holder) {
2352	nfa_mapping [holder] = i;
2353	}
2354	}
2355
2356	map<NGHolder , NGHolder > merged = chunkedNfaMerge(tbi, nfas);
2357	if (merged.empty()) {
2358	return;
2359	}
2360
2361	DEBUG_PRINTF("%zu nfas merged\n", merged.size());
2362
2363	// Update the outfix info for merged holders.
2364	for (const auto &m : merged) {
2365	OutfixInfo &victim = outfixes.at(nfa_mapping [m.first]);
2366	OutfixInfo &winner = outfixes.at(nfa_mapping [m.second]);
2367	mergeOutfixInfo(winner, victim);
2368	victim.clear();
2369	}
2370
2371	removeDeadOutfixes(outfixes);
2372	}
2373
2374	namespace {
2375	struct MergeMcClellan {
2376	MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in)
2377	: rm(rm_in), grey(grey_in) {}
2378
2379	unique_ptr<raw_dfa> operator()(const raw_dfa d1, const* raw_dfa d2) const* {
2380	assert(d1 && d2);
2381	return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey);
2382	}
2383
2384	private:
2385	const ReportManager &rm;
2386	const Grey &grey;
2387	};
2388
2389	struct MergeHaig {
2390	explicit MergeHaig(u32 limit_in) : limit(limit_in) {}
2391
2392	unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1,
2393	const raw_som_dfa d2) const* {
2394	assert(d1 && d2);
2395	return attemptToMergeHaig({d1, d2}, limit);
2396	}
2397
2398	private:
2399	const u32 limit; //!< state limit for merged result.
2400	};
2401	}
2402
2403	/**
2404	* Generic pairwise merge algorithm that can be used for either McClellan
2405	* (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge
2406	* operation to a merge functor, which allows the caller to set some policy
2407	* (state limits, etc).
2408	*
2409	* This is currently astonishingly simple and just considers every pair of
2410	* DFAs, slow and steady. We may wish to actually apply a merge ordering
2411	* strategy in the future.
2412	*/
2413	template<class RawDfa, class MergeFunctor>
2414	static
2415	void pairwiseDfaMerge(vector<RawDfa *> &dfas,
2416	unordered_map<RawDfa *, size_t> &dfa_mapping,
2417	vector<OutfixInfo> &outfixes,
2418	MergeFunctor merge_func) {
2419	DEBUG_PRINTF("merging group of size %zu\n", dfas.size());
2420
2421	for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) {
2422	if (!*it) {
2423	continue;
2424	}
2425	for (auto jt = next(it); jt != ite; ++jt) {
2426	if (!*jt) {
2427	continue;
2428	}
2429
2430	DEBUG_PRINTF("try merge %p and %p\n", it, jt);
2431	unique_ptr<RawDfa> rdfa = merge_func(it, jt);
2432	if (!rdfa) {
2433	continue; // Merge failed.
2434	}
2435
2436	DEBUG_PRINTF("merge succeeded, built %p\n", rdfa.get());
2437	OutfixInfo &winner = outfixes.at(dfa_mapping[*it]);
2438	OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]);
2439	assert(!winner.is_dead() && !victim.is_dead());
2440
2441	RawDfa *dfa_ptr = rdfa.get();
2442	dfa_mapping[dfa_ptr] = dfa_mapping[*it];
2443	dfa_mapping.erase(*it);
2444	winner.proto = move(rdfa);
2445
2446	mergeOutfixInfo(winner, victim);
2447
2448	victim.clear();
2449	jt = nullptr; // to be deleted.*
2450	*it = dfa_ptr;
2451	}
2452	}
2453	}
2454
2455	template<class RawDfa, class MergeFunctor>
2456	static
2457	void chunkedDfaMerge(vector<RawDfa *> &dfas,
2458	unordered_map<RawDfa *, size_t> &dfa_mapping,
2459	vector<OutfixInfo> &outfixes,
2460	MergeFunctor merge_func) {
2461	DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size());
2462
2463	vector<RawDfa *> out_dfas;
2464	vector<RawDfa *> chunk;
2465	for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) {
2466	chunk.push_back(*it);
2467	if (chunk.size() >= DFA_CHUNK_SIZE_MAX \|\| next(it) == ite) {
2468	pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func);
2469	out_dfas.insert(end(out_dfas), begin(chunk), end(chunk));
2470	chunk.clear();
2471	}
2472	}
2473
2474	// Remove null (merged) DFAs and update vector for subsequent use.
2475	out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr),
2476	out_dfas.end());
2477	dfas.swap(out_dfas);
2478	DEBUG_PRINTF("after merge there are %zu dfas\n", dfas.size());
2479	}
2480
2481	static
2482	void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) {
2483	DEBUG_PRINTF("merging %zu nfas\n", dfas.size());
2484	if (dfas.size() < `2`) {
2485	return;
2486	}
2487
2488	vector<OutfixInfo> &outfixes = tbi.outfixes;
2489
2490	/ key is index into outfix array as iterators, etc may be invalidated by*
2491	* element addition. */
2492	unordered_map<raw_dfa *, size_t> dfa_mapping;
2493	for (size_t i = `0`; i < outfixes.size(); i++) {
2494	auto *rdfa = outfixes [i].rdfa();
2495	if (rdfa) {
2496	dfa_mapping [rdfa] = i;
2497	}
2498	}
2499
2500	chunkedDfaMerge(dfas, dfa_mapping, outfixes,
2501	MergeMcClellan (tbi.rm, tbi.cc.grey));
2502	removeDeadOutfixes(outfixes);
2503	}
2504
2505	static
2506	void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm,
2507	const Grey &grey) {
2508	if (!grey.roseMcClellanOutfix) {
2509	return;
2510	}
2511
2512	DEBUG_PRINTF("merge combo\n");
2513
2514	bool seen_dfa = false;
2515	u32 nfa_count = `0`;
2516	for (const auto &outfix : tbi.outfixes) {
2517	if (outfix.holder()) {
2518	DEBUG_PRINTF("nfa\n");
2519	nfa_count++;
2520	} else if (outfix.rdfa()) {
2521	DEBUG_PRINTF("dfa\n");
2522	seen_dfa = true;
2523	}
2524	}
2525
2526	DEBUG_PRINTF("nfa %u dfas present %d\n", nfa_count,
2527	(int)seen_dfa);
2528	if (!nfa_count \|\| (nfa_count == `1` && !seen_dfa)) {
2529	DEBUG_PRINTF("no combo merges possible\n");
2530	return;
2531	}
2532
2533	/ key is index into outfix array as iterators, etc may be invalidated by*
2534	* element addition. */
2535	size_t new_dfas = `0`;
2536	unordered_map<raw_dfa *, size_t> dfa_mapping;
2537	vector<raw_dfa *> dfas;
2538
2539	for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) {
2540	auto &outfix = *it;
2541	assert(!outfix.is_dead());
2542
2543	if (outfix.rdfa()) {
2544	auto *rdfa = outfix.rdfa();
2545	dfas.push_back(rdfa);
2546	dfa_mapping [rdfa] = it - tbi.outfixes.begin();
2547	continue;
2548	}
2549
2550	if (!outfix.holder()) {
2551	continue;
2552	}
2553
2554	NGHolder *h = outfix.holder();
2555	assert(h->kind == NFA_OUTFIX);
2556	auto rdfa = buildMcClellan(*h, &rm, grey);
2557	if (rdfa) {
2558	// Transform this outfix into a DFA and add it to the merge set.
2559	dfa_mapping [rdfa.get()] = it - tbi.outfixes.begin();
2560	dfas.push_back(rdfa.get());
2561	outfix.proto = move(rdfa);
2562	new_dfas++;
2563	}
2564	}
2565
2566	DEBUG_PRINTF("constructed %zu new dfas\n", new_dfas);
2567
2568	if (!new_dfas) {
2569	/ assumes normal dfas have already been fully merged /
2570	return;
2571	}
2572
2573	chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes,
2574	MergeMcClellan (tbi.rm, tbi.cc.grey));
2575	removeDeadOutfixes(tbi.outfixes);
2576	}
2577
2578	static
2579	void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas,
2580	u32 limit) {
2581	if (dfas.size() < `2`) {
2582	return;
2583	}
2584
2585	vector<OutfixInfo> &outfixes = tbi.outfixes;
2586
2587	unordered_map<raw_som_dfa *, size_t> dfa_mapping;
2588	for (size_t i = `0`; i < outfixes.size(); i++) {
2589	auto *haig = outfixes [i].haig();
2590	if (haig) {
2591	dfa_mapping [haig] = i;
2592	}
2593	}
2594
2595	chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig (limit));
2596	removeDeadOutfixes(outfixes);
2597	}
2598
2599	/**
2600	* This pass attempts to merge outfix engines together. At this point in time,
2601	* the engine type (NFA, DFA, Haig) has already been decided for each outfix
2602	* and outfixes can only merged with others of their same type. NFAs are merged
2603	* in a priority order based on common prefix length. The other types are
2604	* merged blindly. Engines are merged to the extent that they can still be
2605	* implemented efficiently.
2606	*/
2607	void mergeOutfixes(RoseBuildImpl &tbi) {
2608	if (!tbi.cc.grey.mergeOutfixes) {
2609	return;
2610	}
2611
2612	vector<NGHolder *> nfas;
2613	vector<raw_dfa *> dfas;
2614	vector<raw_som_dfa *> som_dfas;
2615
2616	for (auto &outfix : tbi.outfixes) {
2617	if (outfix.rdfa()) {
2618	dfas.push_back(outfix.rdfa());
2619	} else if (outfix.holder()) {
2620	nfas.push_back(outfix.holder());
2621	} else if (outfix.haig()) {
2622	som_dfas.push_back(outfix.haig());
2623	}
2624	}
2625
2626	DEBUG_PRINTF("merging %zu dfas, %zu nfas\n",
2627	dfas.size(), nfas.size());
2628
2629	mergeOutfixNfas(tbi, nfas);
2630	mergeOutfixDfas(tbi, dfas);
2631	mergeOutfixHaigs(tbi, som_dfas, `255`);
2632	mergeOutfixHaigs(tbi, som_dfas, `8192`);
2633	mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey);
2634	}
2635
2636	static
2637	u32 allowedSquashDistance(const CharReach &cr, u32 min_width,
2638	const RoseBuildImpl &tbi,
2639	RoseVertex tv) {
2640	CharReach accept_cr;
2641	DEBUG_PRINTF("hello \|cr\|=%zu\n", cr.count());
2642
2643	const RoseGraph &g = tbi.g;
2644
2645	/ TODO: inspect further back in the pattern /
2646	for (u32 lit_id : g [tv].literals) {
2647	const rose_literal_id &lit = tbi.literals.at(lit_id);
2648	if (lit.delay) {
2649	return `0`; / TODO: better /
2650	}
2651	if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) {
2652	return `0`;
2653	}
2654	assert(!lit.s.empty());
2655	accept_cr \|= *lit.s.rbegin();
2656	}
2657
2658	DEBUG_PRINTF("\|accept_cr\|=%zu\n", accept_cr.count());
2659
2660	if ((accept_cr & cr).any()) {
2661	DEBUG_PRINTF("no squash\n");
2662	return `0`; / the accept byte doesn't always kill the puffette. TODO:*
2663	* maybe if we look further back we could find something that
2664	* would kill the puffette... */
2665	}
2666
2667	DEBUG_PRINTF("allowed to squash %u\n", min_width);
2668	return min_width;
2669	}
2670
2671	void mergePuffixes(RoseBuildImpl &tbi) {
2672	DEBUG_PRINTF("entry\n");
2673
2674	if (!tbi.cc.grey.mergeSuffixes) {
2675	return;
2676	}
2677
2678	RoseGraph &g = tbi.g;
2679
2680	for (auto v : vertices_range(g)) {
2681	shared_ptr<NGHolder> h = g [v].suffix.graph;
2682	if (!h) {
2683	continue;
2684	}
2685	assert(!g[v].suffix.haig);
2686	assert(!g[v].eod_accept);
2687
2688	assert(onlyOneTop(h)); /* we should not have merged yet /
2689	bool fixed_depth = g [v].min_offset == g [v].max_offset;
2690
2691	if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) {
2692	continue;
2693	}
2694
2695	PureRepeat repeat;
2696	if (!isPureRepeat(*h, repeat)) {
2697	assert(`0`);
2698	continue;
2699	}
2700
2701	if (repeat.bounds.min == depth (`0`)) {
2702	assert(`0`); // No vacuous puffs allowed.
2703	continue;
2704	}
2705
2706	assert(repeat.bounds.min.is_finite() &&
2707	repeat.bounds.max.is_reachable());
2708	assert(repeat.bounds.max == repeat.bounds.min \|\|
2709	repeat.bounds.max.is_infinite());
2710
2711	const bool unbounded = repeat.bounds.max.is_infinite();
2712	const set<ReportID> reports = all_reports(*h);
2713	assert(reports.size() == `1`);
2714	ReportID report = *reports.begin();
2715
2716	DEBUG_PRINTF("got puffette candidate %u:%s\n", report,
2717	repeat.bounds.str().c_str());
2718
2719	raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach);
2720
2721	u32 queue;
2722	u32 event;
2723	tbi.addChainTail(rp, &queue, &event);
2724	u32 squashDistance =
2725	allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v);
2726
2727	Report ir = makeMpvTrigger(event, squashDistance);
2728	ReportID id = tbi.rm.getInternalId(ir);
2729
2730	DEBUG_PRINTF("puffette event q%u t%u\n", queue, event);
2731	g [v].suffix.reset();
2732	g [v].reports.insert(id);
2733	}
2734	}
2735
2736	static
2737	void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m,
2738	u32 top, const vector<RoseVertex> &verts) {
2739	DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top,
2740	m.get(), verts.size());
2741
2742	for (auto v : verts) {
2743	assert(g[v].suffix.castle);
2744	g [v].suffix.castle = m;
2745	g [v].suffix.top = top;
2746	}
2747	}
2748
2749	static
2750	void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles,
2751	const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) {
2752	if (castles.size() <= `1`) {
2753	return;
2754	}
2755
2756	DEBUG_PRINTF("merging reach %s, %zu elements\n",
2757	describeClass(castles[`0`]->reach()).c_str(), castles.size());
2758
2759	CastleProto m = nullptr*;
2760
2761	for (CastleProto *c : castles) {
2762	assert(c->repeats.size() == `1`); // Not yet merged.
2763	assert(g[eng_verts.at(c).front()].suffix.castle.get() == c);
2764	if (!m) {
2765	m = c;
2766	continue;
2767	}
2768
2769	u32 top = m->merge(c->repeats [`0`]);
2770	if (top == CastleProto::max_occupancy) {
2771	// No room left to merge into 'm'. This one becomes the new 'm'.
2772	DEBUG_PRINTF("next mergee\n");
2773	m = c;
2774	continue;
2775	}
2776	updateCastleSuffix(g, g [eng_verts.at(m).front()].suffix.castle, top,
2777	eng_verts.at(c));
2778	DEBUG_PRINTF("added to %p, top %u\n", m, top);
2779	}
2780	}
2781
2782	void mergeCastleSuffixes(RoseBuildImpl &build) {
2783	DEBUG_PRINTF("entry\n");
2784
2785	if (!build.cc.grey.allowCastle \|\| !build.cc.grey.mergeSuffixes) {
2786	return;
2787	}
2788
2789	unordered_map<CastleProto *, vector<RoseVertex>> eng_verts;
2790	map<CharReach, vector<CastleProto *>> by_reach;
2791
2792	RoseGraph &g = build.g;
2793
2794	for (auto v : vertices_range(g)) {
2795	if (!g [v].suffix.castle) {
2796	continue;
2797	}
2798
2799	CastleProto *c = g [v].suffix.castle.get();
2800
2801	if (c->repeats.size() != `1`) {
2802	// This code assumes it's the only place merging is being done.
2803	assert(`0`);
2804	continue;
2805	}
2806
2807	if (!contains(eng_verts, c)) {
2808	by_reach [c->reach()].push_back(c);
2809	}
2810	eng_verts [c].push_back(v);
2811	}
2812
2813	for (auto &chunk : by_reach \| map_values) {
2814	mergeCastleSuffixChunk(g, chunk, eng_verts);
2815	}
2816	}
2817
2818	} // namespace ue2
2819

Browse the source code of ClickHouse/contrib/hyperscan/src/rose/rose_build_merge.cpp