1 | /* |
2 | * Copyright (c) 2015-2018, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Rose Build: functions for reducing the size of the Rose graph |
31 | * through merging. |
32 | */ |
33 | #include "rose_build_merge.h" |
34 | |
35 | #include "grey.h" |
36 | #include "rose_build.h" |
37 | #include "rose_build_impl.h" |
38 | #include "rose_build_util.h" |
39 | #include "ue2common.h" |
40 | #include "nfa/castlecompile.h" |
41 | #include "nfa/goughcompile.h" |
42 | #include "nfa/limex_limits.h" |
43 | #include "nfa/mcclellancompile.h" |
44 | #include "nfa/nfa_build_util.h" |
45 | #include "nfa/rdfa_merge.h" |
46 | #include "nfagraph/ng_holder.h" |
47 | #include "nfagraph/ng_haig.h" |
48 | #include "nfagraph/ng_is_equal.h" |
49 | #include "nfagraph/ng_lbr.h" |
50 | #include "nfagraph/ng_limex.h" |
51 | #include "nfagraph/ng_mcclellan.h" |
52 | #include "nfagraph/ng_puff.h" |
53 | #include "nfagraph/ng_redundancy.h" |
54 | #include "nfagraph/ng_repeat.h" |
55 | #include "nfagraph/ng_reports.h" |
56 | #include "nfagraph/ng_stop.h" |
57 | #include "nfagraph/ng_uncalc_components.h" |
58 | #include "nfagraph/ng_util.h" |
59 | #include "nfagraph/ng_width.h" |
60 | #include "util/bitutils.h" |
61 | #include "util/charreach.h" |
62 | #include "util/compile_context.h" |
63 | #include "util/container.h" |
64 | #include "util/dump_charclass.h" |
65 | #include "util/graph_range.h" |
66 | #include "util/hash.h" |
67 | #include "util/insertion_ordered.h" |
68 | #include "util/order_check.h" |
69 | #include "util/report_manager.h" |
70 | #include "util/ue2string.h" |
71 | #include "util/unordered.h" |
72 | |
73 | #include <algorithm> |
74 | #include <functional> |
75 | #include <list> |
76 | #include <map> |
77 | #include <queue> |
78 | #include <set> |
79 | #include <string> |
80 | #include <vector> |
81 | #include <utility> |
82 | |
83 | #include <boost/range/adaptor/map.hpp> |
84 | |
85 | using namespace std; |
86 | using boost::adaptors::map_values; |
87 | using boost::adaptors::map_keys; |
88 | |
89 | namespace ue2 { |
90 | |
91 | static const size_t NARROW_START_MAX = 10; |
92 | static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = 128; |
93 | static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64; |
94 | static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32; |
95 | static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10; |
96 | static const size_t MERGE_GROUP_SIZE_MAX = 200; |
97 | static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000; |
98 | |
99 | /** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */ |
100 | static const size_t DFA_CHUNK_SIZE_MAX = 200; |
101 | |
102 | /** \brief Max DFA states in a merged DFA. */ |
103 | static const size_t DFA_MERGE_MAX_STATES = 8000; |
104 | |
105 | /** \brief In block mode, merge two prefixes even if they don't have identical |
106 | * literal sets if they have fewer than this many states and the merged graph |
107 | * is also small. */ |
108 | static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32; |
109 | |
110 | static |
111 | size_t small_merge_max_vertices(const CompileContext &cc) { |
112 | return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM |
113 | : SMALL_MERGE_MAX_VERTICES_BLOCK; |
114 | } |
115 | |
116 | static |
117 | size_t small_rose_threshold(const CompileContext &cc) { |
118 | return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM |
119 | : SMALL_ROSE_THRESHOLD_BLOCK; |
120 | } |
121 | |
122 | /** |
123 | * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that |
124 | * reports should not contribute to the hash. |
125 | */ |
126 | static |
127 | size_t hashLeftfix(const left_id &left) { |
128 | size_t val = 0; |
129 | |
130 | if (left.castle()) { |
131 | hash_combine(val, left.castle()->reach()); |
132 | for (const auto &pr : left.castle()->repeats) { |
133 | hash_combine(val, pr.first); // top |
134 | hash_combine(val, pr.second.bounds); |
135 | } |
136 | } else if (left.graph()) { |
137 | hash_combine(val, hash_holder(*left.graph())); |
138 | } |
139 | |
140 | return val; |
141 | } |
142 | |
143 | namespace { |
144 | |
145 | /** Key used to group sets of leftfixes by the dedupeLeftfixes path. */ |
146 | struct RoseGroup { |
147 | RoseGroup(const RoseBuildImpl &build, RoseVertex v) |
148 | : left_hash(hashLeftfix(build.g[v].left)), |
149 | lag(build.g[v].left.lag), eod_table(build.isInETable(v)) { |
150 | const RoseGraph &g = build.g; |
151 | assert(in_degree(v, g) == 1); |
152 | RoseVertex u = *inv_adjacent_vertices(v, g).first; |
153 | parent = g[u].index; |
154 | } |
155 | |
156 | bool operator<(const RoseGroup &b) const { |
157 | const RoseGroup &a = *this; |
158 | ORDER_CHECK(parent); |
159 | ORDER_CHECK(left_hash); |
160 | ORDER_CHECK(lag); |
161 | ORDER_CHECK(eod_table); |
162 | return false; |
163 | } |
164 | |
165 | private: |
166 | /** Parent vertex index. We must use the index, rather than the descriptor, |
167 | * for compile determinism. */ |
168 | size_t parent; |
169 | |
170 | /** Quick hash of the leftfix itself. Must be identical for a given pair of |
171 | * graphs if is_equal would return true. */ |
172 | size_t left_hash; |
173 | |
174 | /** Leftfix lag value. */ |
175 | u32 lag; |
176 | |
177 | /** True if associated vertex (successor) is in the EOD table. We don't |
178 | * allow sharing of leftfix engines between "normal" and EOD operation. */ |
179 | bool eod_table; |
180 | }; |
181 | |
182 | /** |
183 | * Intended to find graphs that are identical except for their report |
184 | * IDs. Relies on vertex and edge indices to pick up graphs that have been |
185 | * messily put together in different orderings. Only implemented for castles and |
186 | * holders. |
187 | */ |
188 | static |
189 | bool is_equal(const left_id &u_left, ReportID u_report, |
190 | const left_id &v_left, ReportID v_report) { |
191 | if (u_left.castle() && v_left.castle()) { |
192 | return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report); |
193 | } |
194 | |
195 | if (!u_left.graph() || !v_left.graph()) { |
196 | return false; |
197 | } |
198 | |
199 | return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report); |
200 | } |
201 | |
202 | } // namespace |
203 | |
204 | /** |
205 | * This pass performs work similar to \ref dedupeSuffixes - it removes |
206 | * duplicate prefix/infixes (that is, leftfixes) which are identical graphs and |
207 | * share the same trigger vertex and lag. Leftfixes are first grouped by |
208 | * parent role and lag to reduce the number of candidates to be inspected |
209 | * for each leftfix. The graphs in each cluster are then compared with each |
210 | * other and the graph is updated to only refer to a canonical version of each |
211 | * graph. |
212 | * |
213 | * Note: only roles with a single predecessor vertex are considered for this |
214 | * transform - it should probably be generalised to work for roles which share |
215 | * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or |
216 | * it should be retired entirely. |
217 | */ |
218 | bool dedupeLeftfixes(RoseBuildImpl &tbi) { |
219 | DEBUG_PRINTF("deduping leftfixes\n" ); |
220 | map<RoseGroup, deque<RoseVertex>> roses; |
221 | bool work_done = false; |
222 | |
223 | /* Note: a leftfix's transientness will not be altered by deduping */ |
224 | |
225 | // Collect leftfixes into groups. |
226 | RoseGraph &g = tbi.g; |
227 | for (auto v : vertices_range(g)) { |
228 | if (!g[v].left) { |
229 | continue; |
230 | } |
231 | const left_id left(g[v].left); |
232 | |
233 | if (left.haig()) { |
234 | /* TODO: allow merging of identical haigs */ |
235 | continue; |
236 | } |
237 | |
238 | if (in_degree(v, g) != 1) { |
239 | continue; |
240 | } |
241 | |
242 | roses[RoseGroup(tbi, v)].push_back(v); |
243 | } |
244 | |
245 | DEBUG_PRINTF("collected %zu rose groups\n" , roses.size()); |
246 | |
247 | // Walk groups and dedupe the roses therein. |
248 | for (deque<RoseVertex> &verts : roses | map_values) { |
249 | DEBUG_PRINTF("group has %zu vertices\n" , verts.size()); |
250 | |
251 | unordered_set<left_id> seen; |
252 | |
253 | for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) { |
254 | RoseVertex v = *jt; |
255 | left_id left(g[v].left); |
256 | |
257 | // Skip cases we've already handled, and mark as seen otherwise. |
258 | if (!seen.insert(left).second) { |
259 | continue; |
260 | } |
261 | |
262 | // Scan the rest of the list for dupes. |
263 | for (auto kt = std::next(jt); kt != jte; ++kt) { |
264 | if (g[v].left == g[*kt].left |
265 | || !is_equal(g[v].left, g[v].left.leftfix_report, |
266 | g[*kt].left, g[*kt].left.leftfix_report)) { |
267 | continue; |
268 | } |
269 | |
270 | // Dupe found. |
271 | DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n" , |
272 | g[*kt].index, g[v].index); |
273 | assert(g[v].left.lag == g[*kt].left.lag); |
274 | g[*kt].left = g[v].left; |
275 | work_done = true; |
276 | } |
277 | } |
278 | } |
279 | |
280 | return work_done; |
281 | } |
282 | |
283 | /** |
284 | * \brief Returns a numeric key that can be used to group this suffix with |
285 | * others that may be its duplicate. |
286 | */ |
287 | static |
288 | size_t suffix_size_key(const suffix_id &s) { |
289 | if (s.graph()) { |
290 | return num_vertices(*s.graph()); |
291 | } |
292 | if (s.castle()) { |
293 | return s.castle()->repeats.size(); |
294 | } |
295 | return 0; |
296 | } |
297 | |
298 | static |
299 | bool is_equal(const suffix_id &s1, const suffix_id &s2) { |
300 | if (s1.graph() && s2.graph()) { |
301 | return is_equal(*s1.graph(), *s2.graph()); |
302 | } else if (s1.castle() && s2.castle()) { |
303 | return is_equal(*s1.castle(), *s2.castle()); |
304 | } |
305 | return false; |
306 | } |
307 | |
308 | /** |
309 | * This function simply looks for suffix NGHolder graphs which are identical |
310 | * and updates the roles in the RoseGraph to refer to only a single copy. This |
311 | * obviously has benefits in terms of both performance (as we don't run |
312 | * multiple engines doing the same work) and stream state. This function first |
313 | * groups all suffixes by number of vertices and report set to restrict the set |
314 | * of possible candidates. Each group is then walked to find duplicates using |
315 | * the \ref is_equal comparator for NGHolders and updating the RoseGraph as it |
316 | * goes. |
317 | * |
318 | * Note: does not dedupe suffixes of vertices in the EOD table. |
319 | */ |
320 | void dedupeSuffixes(RoseBuildImpl &tbi) { |
321 | DEBUG_PRINTF("deduping suffixes\n" ); |
322 | |
323 | unordered_map<suffix_id, set<RoseVertex>> suffix_map; |
324 | map<pair<size_t, set<ReportID>>, vector<suffix_id>> part; |
325 | |
326 | // Collect suffixes into groups. |
327 | RoseGraph &g = tbi.g; |
328 | for (auto v : vertices_range(g)) { |
329 | if (!g[v].suffix || tbi.isInETable(v)) { |
330 | continue; |
331 | } |
332 | |
333 | const suffix_id s(g[v].suffix); |
334 | |
335 | if (!(s.graph() || s.castle())) { |
336 | continue; // e.g. Haig |
337 | } |
338 | |
339 | set<RoseVertex> &verts = suffix_map[s]; |
340 | if (verts.empty()) { |
341 | part[make_pair(suffix_size_key(s), all_reports(s))].push_back(s); |
342 | } |
343 | verts.insert(v); |
344 | } |
345 | |
346 | DEBUG_PRINTF("collected %zu groups\n" , part.size()); |
347 | |
348 | for (const auto &cand : part | map_values) { |
349 | if (cand.size() <= 1) { |
350 | continue; |
351 | } |
352 | DEBUG_PRINTF("deduping cand set of size %zu\n" , cand.size()); |
353 | |
354 | for (auto jt = cand.begin(); jt != cand.end(); ++jt) { |
355 | if (suffix_map[*jt].empty()) { |
356 | continue; |
357 | } |
358 | for (auto kt = next(jt); kt != cand.end(); ++kt) { |
359 | if (suffix_map[*kt].empty() || !is_equal(*jt, *kt)) { |
360 | continue; |
361 | } |
362 | DEBUG_PRINTF("found dupe\n" ); |
363 | for (auto v : suffix_map[*kt]) { |
364 | RoseVertex dupe = *suffix_map[*jt].begin(); |
365 | assert(dupe != v); |
366 | g[v].suffix.graph = g[dupe].suffix.graph; |
367 | g[v].suffix.castle = g[dupe].suffix.castle; |
368 | assert(suffix_id(g[v].suffix) == |
369 | suffix_id(g[dupe].suffix)); |
370 | suffix_map[*jt].insert(v); |
371 | } |
372 | suffix_map[*kt].clear(); |
373 | } |
374 | } |
375 | } |
376 | } |
377 | |
378 | namespace { |
379 | |
380 | /** |
381 | * This class stores a mapping from an engine reference (left_id, suffix_id, |
382 | * etc) to a list of vertices, and also allows us to iterate over the set of |
383 | * engine references in insertion order -- we add to the mapping in vertex |
384 | * iteration order, so this allows us to provide a consistent ordering. |
385 | */ |
386 | template<class EngineRef> |
387 | class Bouquet { |
388 | private: |
389 | list<EngineRef> ordering; // Unique list in insert order. |
390 | using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>; |
391 | BouquetMap bouquet; |
392 | public: |
393 | void insert(const EngineRef &h, RoseVertex v) { |
394 | typename BouquetMap::iterator f = bouquet.find(h); |
395 | if (f == bouquet.end()) { |
396 | ordering.push_back(h); |
397 | bouquet[h].push_back(v); |
398 | } else { |
399 | f->second.push_back(v); |
400 | } |
401 | } |
402 | |
403 | void insert(const EngineRef &h, const deque<RoseVertex> &verts) { |
404 | typename BouquetMap::iterator f = bouquet.find(h); |
405 | if (f == bouquet.end()) { |
406 | ordering.push_back(h); |
407 | bouquet.insert(make_pair(h, verts)); |
408 | } else { |
409 | f->second.insert(f->second.end(), verts.begin(), verts.end()); |
410 | } |
411 | } |
412 | |
413 | const deque<RoseVertex> &vertices(const EngineRef &h) const { |
414 | typename BouquetMap::const_iterator it = bouquet.find(h); |
415 | assert(it != bouquet.end()); // must be present |
416 | return it->second; |
417 | } |
418 | |
419 | void erase(const EngineRef &h) { |
420 | assert(bouquet.find(h) != bouquet.end()); |
421 | bouquet.erase(h); |
422 | ordering.remove(h); |
423 | } |
424 | |
425 | /** Remove all the elements in the given iterator range. */ |
426 | template <class Iter> |
427 | void erase_all(Iter erase_begin, Iter erase_end) { |
428 | for (Iter it = erase_begin; it != erase_end; ++it) { |
429 | bouquet.erase(*it); |
430 | } |
431 | |
432 | // Use a quick-lookup container so that we only have to traverse the |
433 | // 'ordering' list once. |
434 | const set<EngineRef> dead(erase_begin, erase_end); |
435 | for (iterator it = begin(); it != end(); /* incremented inside */) { |
436 | if (contains(dead, *it)) { |
437 | ordering.erase(it++); |
438 | } else { |
439 | ++it; |
440 | } |
441 | } |
442 | } |
443 | |
444 | void clear() { |
445 | ordering.clear(); |
446 | bouquet.clear(); |
447 | } |
448 | |
449 | size_t size() const { return bouquet.size(); } |
450 | |
451 | // iterate over holders in insert order |
452 | typedef typename list<EngineRef>::iterator iterator; |
453 | iterator begin() { return ordering.begin(); } |
454 | iterator end() { return ordering.end(); } |
455 | |
456 | // const iterate over holders in insert order |
457 | typedef typename list<EngineRef>::const_iterator const_iterator; |
458 | const_iterator begin() const { return ordering.begin(); } |
459 | const_iterator end() const { return ordering.end(); } |
460 | }; |
461 | |
462 | typedef Bouquet<left_id> LeftfixBouquet; |
463 | typedef Bouquet<suffix_id> SuffixBouquet; |
464 | |
465 | } // namespace |
466 | |
467 | /** |
468 | * Split a \ref Bouquet of some type into several smaller ones. |
469 | */ |
470 | template <class EngineRef> |
471 | static void chunkBouquets(const Bouquet<EngineRef> &in, |
472 | deque<Bouquet<EngineRef>> &out, |
473 | const size_t chunk_size) { |
474 | if (in.size() <= chunk_size) { |
475 | out.push_back(in); |
476 | return; |
477 | } |
478 | |
479 | out.push_back(Bouquet<EngineRef>()); |
480 | for (const auto &engine : in) { |
481 | if (out.back().size() >= chunk_size) { |
482 | out.push_back(Bouquet<EngineRef>()); |
483 | } |
484 | out.back().insert(engine, in.vertices(engine)); |
485 | } |
486 | } |
487 | |
488 | static |
489 | bool stringsCanFinishAtSameSpot(const ue2_literal &u, |
490 | ue2_literal::const_iterator v_b, |
491 | ue2_literal::const_iterator v_e) { |
492 | ue2_literal::const_iterator u_e = u.end(); |
493 | ue2_literal::const_iterator u_b = u.begin(); |
494 | |
495 | while (u_e != u_b && v_e != v_b) { |
496 | --u_e; |
497 | --v_e; |
498 | |
499 | if (!overlaps(*u_e, *v_e)) { |
500 | return false; |
501 | } |
502 | } |
503 | |
504 | return true; |
505 | } |
506 | |
507 | /** |
508 | * Check that if after u has been seen, that it is impossible for the arrival of |
509 | * v to require the inspection of an engine earlier than u did. |
510 | * |
511 | * Let delta be the earliest that v can be seen after u (may be zero) |
512 | * |
513 | * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta) |
514 | * ==> - ulag <= delta - vlag |
515 | * ==> vlag - ulag <= delta |
516 | */ |
517 | static |
518 | bool checkPrefix(const rose_literal_id &ul, const u32 ulag, |
519 | const rose_literal_id &vl, const u32 vlag) { |
520 | DEBUG_PRINTF("'%s'-%u '%s'-%u\n" , escapeString(ul.s).c_str(), ulag, |
521 | escapeString(vl.s).c_str(), vlag); |
522 | |
523 | if (vl.delay || ul.delay) { |
524 | /* engine related literals should not be delayed anyway */ |
525 | return false; |
526 | } |
527 | |
528 | if (ulag >= vlag) { |
529 | assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag); |
530 | return true; |
531 | } |
532 | |
533 | size_t min_allowed_delta = vlag - ulag; |
534 | DEBUG_PRINTF("min allow distace %zu\n" , min_allowed_delta); |
535 | |
536 | for (size_t i = 0; i < min_allowed_delta; i++) { |
537 | if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { |
538 | DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n" , i); |
539 | return false; |
540 | } |
541 | } |
542 | |
543 | DEBUG_PRINTF("OK\n" ); |
544 | return true; |
545 | } |
546 | |
547 | static |
548 | bool hasSameEngineType(const RoseVertexProps &u_prop, |
549 | const RoseVertexProps &v_prop) { |
550 | const left_id u_left = u_prop.left; |
551 | const left_id v_left = v_prop.left; |
552 | |
553 | return !u_left.haig() == !v_left.haig() |
554 | && !u_left.dfa() == !v_left.dfa() |
555 | && !u_left.castle() == !v_left.castle() |
556 | && !u_left.graph() == !v_left.graph(); |
557 | } |
558 | |
559 | /** |
560 | * Verifies that merging the leftfix of vertices does not cause conflicts due |
561 | * to the literals on the right. |
562 | * |
563 | * The main concern is that the lags of the literals and overlap between them |
564 | * allow the engine check offset to potentially regress. |
565 | * |
566 | * Parameters are vectors of literals + lag pairs. |
567 | * |
568 | * Note: if more constraints of when the leftfixes were going to be checked |
569 | * (mandatory lookarounds passing, offset checks), more merges may be allowed. |
570 | */ |
571 | static |
572 | bool compatibleLiteralsForMerge( |
573 | const vector<pair<const rose_literal_id *, u32>> &ulits, |
574 | const vector<pair<const rose_literal_id *, u32>> &vlits) { |
575 | assert(!ulits.empty()); |
576 | assert(!vlits.empty()); |
577 | |
578 | // We cannot merge engines that prefix literals in different tables. |
579 | if (ulits[0].first->table != vlits[0].first->table) { |
580 | DEBUG_PRINTF("literals in different tables\n" ); |
581 | return false; |
582 | } |
583 | |
584 | // We don't handle delayed cases yet. |
585 | for (const auto &ue : ulits) { |
586 | const rose_literal_id &ul = *ue.first; |
587 | if (ul.delay) { |
588 | return false; |
589 | } |
590 | } |
591 | |
592 | for (const auto &ve : vlits) { |
593 | const rose_literal_id &vl = *ve.first; |
594 | if (vl.delay) { |
595 | return false; |
596 | } |
597 | } |
598 | |
599 | /* An engine requires that all accesses to it are ordered by offsets. (ie, |
600 | we can not check an engine's state at offset Y, if we have already |
601 | checked its status at offset X and X > Y). If we can not establish that |
602 | the literals used for triggering will satisfy this property, then it is |
603 | not safe to merge the engine. */ |
604 | for (const auto &ue : ulits) { |
605 | const rose_literal_id &ul = *ue.first; |
606 | u32 ulag = ue.second; |
607 | |
608 | for (const auto &ve : vlits) { |
609 | const rose_literal_id &vl = *ve.first; |
610 | u32 vlag = ve.second; |
611 | |
612 | if (!checkPrefix(ul, ulag, vl, vlag) |
613 | || !checkPrefix(vl, vlag, ul, ulag)) { |
614 | DEBUG_PRINTF("prefix check failed\n" ); |
615 | return false; |
616 | } |
617 | } |
618 | } |
619 | |
620 | return true; |
621 | } |
622 | |
623 | /** |
624 | * True if this graph has few enough accel states to be implemented as an NFA |
625 | * with all of those states actually becoming accel schemes. |
626 | */ |
627 | static |
628 | bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) { |
629 | u32 num = countAccelStates(g, &build.rm, build.cc); |
630 | DEBUG_PRINTF("graph with %zu vertices has %u accel states\n" , |
631 | num_vertices(g), num); |
632 | return num <= NFA_MAX_ACCEL_STATES; |
633 | } |
634 | |
635 | /** |
636 | * In block mode, we want to be a little more selective -- We will only merge |
637 | * prefix engines when the literal sets are the same or if the merged graph |
638 | * has only grown by a small amount. |
639 | */ |
640 | static |
641 | bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u, |
642 | RoseVertex v) { |
643 | assert(!build.cc.streaming); |
644 | assert(build.isRootSuccessor(u) == build.isRootSuccessor(v)); |
645 | |
646 | // Always merge infixes if we can (subject to the other criteria in |
647 | // mergeableRoseVertices). |
648 | if (!build.isRootSuccessor(u)) { |
649 | return true; |
650 | } |
651 | |
652 | const RoseGraph &g = build.g; |
653 | |
654 | // Merge prefixes with identical literal sets (as we'd have to run them |
655 | // both when we see those literals anyway). |
656 | if (g[u].literals == g[v].literals) { |
657 | return true; |
658 | } |
659 | |
660 | // The rest of this function only deals with the case when both vertices |
661 | // have graph leftfixes. |
662 | if (!g[u].left.graph || !g[v].left.graph) { |
663 | return false; |
664 | } |
665 | |
666 | const size_t u_count = num_vertices(*g[u].left.graph); |
667 | const size_t v_count = num_vertices(*g[v].left.graph); |
668 | DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n" , |
669 | u_count, v_count); |
670 | if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || |
671 | v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { |
672 | DEBUG_PRINTF("prefixes too big already\n" ); |
673 | return false; |
674 | } |
675 | |
676 | DEBUG_PRINTF("trying merge\n" ); |
677 | NGHolder h; |
678 | cloneHolder(h, *g[v].left.graph); |
679 | if (!mergeNfaPair(*g[u].left.graph, h, nullptr, build.cc)) { |
680 | DEBUG_PRINTF("couldn't merge\n" ); |
681 | return false; |
682 | } |
683 | |
684 | const size_t merged_count = num_vertices(h); |
685 | DEBUG_PRINTF("merged result has %zu vertices\n" , merged_count); |
686 | if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { |
687 | DEBUG_PRINTF("exceeded limit\n" ); |
688 | return false; |
689 | } |
690 | |
691 | // We want to only perform merges that take advantage of some |
692 | // commonality in the two input graphs, so we check that the number of |
693 | // vertices has only grown a small amount: somewhere between the sum |
694 | // (no commonality) and the max (no growth at all) of the vertex counts |
695 | // of the input graphs. |
696 | const size_t max_size = u_count + v_count; |
697 | const size_t min_size = max(u_count, v_count); |
698 | const size_t max_growth = ((max_size - min_size) * 25) / 100; |
699 | if (merged_count > min_size + max_growth) { |
700 | DEBUG_PRINTF("grew too much\n" ); |
701 | return false; |
702 | } |
703 | |
704 | // We don't want to squander any chances at accelerating. |
705 | if (!isAccelerableLeftfix(build, h) && |
706 | (isAccelerableLeftfix(build, *g[u].left.graph) || |
707 | isAccelerableLeftfix(build, *g[v].left.graph))) { |
708 | DEBUG_PRINTF("would lose accel property\n" ); |
709 | return false; |
710 | } |
711 | |
712 | DEBUG_PRINTF("safe to merge\n" ); |
713 | return true; |
714 | } |
715 | |
716 | bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, |
717 | RoseVertex v) { |
718 | assert(u != v); |
719 | |
720 | if (!hasSameEngineType(tbi.g[u], tbi.g[v])) { |
721 | return false; |
722 | } |
723 | |
724 | if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) { |
725 | return false; |
726 | } |
727 | |
728 | /* We cannot merge prefixes/vertices if they are successors of different |
729 | * root vertices */ |
730 | if (tbi.isRootSuccessor(u)) { |
731 | assert(tbi.isRootSuccessor(v)); |
732 | set<RoseVertex> u_preds; |
733 | set<RoseVertex> v_preds; |
734 | insert(&u_preds, inv_adjacent_vertices(u, tbi.g)); |
735 | insert(&v_preds, inv_adjacent_vertices(v, tbi.g)); |
736 | |
737 | if (u_preds != v_preds) { |
738 | return false; |
739 | } |
740 | } |
741 | |
742 | u32 ulag = tbi.g[u].left.lag; |
743 | vector<pair<const rose_literal_id *, u32>> ulits; |
744 | ulits.reserve(tbi.g[u].literals.size()); |
745 | for (u32 id : tbi.g[u].literals) { |
746 | ulits.emplace_back(&tbi.literals.at(id), ulag); |
747 | } |
748 | |
749 | u32 vlag = tbi.g[v].left.lag; |
750 | vector<pair<const rose_literal_id *, u32>> vlits; |
751 | vlits.reserve(tbi.g[v].literals.size()); |
752 | for (u32 id : tbi.g[v].literals) { |
753 | vlits.emplace_back(&tbi.literals.at(id), vlag); |
754 | } |
755 | |
756 | if (!compatibleLiteralsForMerge(ulits, vlits)) { |
757 | return false; |
758 | } |
759 | |
760 | DEBUG_PRINTF("roses on %zu and %zu are mergeable\n" , tbi.g[u].index, |
761 | tbi.g[v].index); |
762 | return true; |
763 | } |
764 | |
765 | /* We cannot merge an engine, if a trigger literal and a post literal overlap |
766 | * in such a way that engine status needs to be check at a point before the |
767 | * engine's current location. |
768 | * |
769 | * i.e., for a trigger literal u and a pos literal v, |
770 | * where delta is the earliest v can appear after t, |
771 | * we require that v_loc - v_lag >= u_loc |
772 | * ==> u_loc + delta - v_lag >= u_loc |
773 | * ==> delta >= v_lag |
774 | * |
775 | */ |
776 | static |
777 | bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl, |
778 | u32 vlag) { |
779 | DEBUG_PRINTF("%s %s (lag %u)\n" , escapeString(ul.s).c_str(), |
780 | escapeString(vl.s).c_str(), vlag); |
781 | |
782 | for (size_t i = 0; i < vlag; i++) { |
783 | if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { |
784 | DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n" , i); |
785 | return false; |
786 | } |
787 | } |
788 | |
789 | DEBUG_PRINTF("OK\n" ); |
790 | return true; |
791 | } |
792 | |
793 | template<typename VertexCont> |
794 | static never_inline |
795 | bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, |
796 | const VertexCont &v2) { |
797 | flat_set<RoseVertex> preds; |
798 | for (auto v : v1) { |
799 | insert(&preds, inv_adjacent_vertices(v, build.g)); |
800 | } |
801 | |
802 | flat_set<u32> pred_lits; |
803 | |
804 | /* No need to examine delays of a common pred - as it must already have |
805 | * survived the delay checks. |
806 | * |
807 | * This is important when the pred is in the anchored table as |
808 | * the literal is no longer available. */ |
809 | flat_set<RoseVertex> known_good_preds; |
810 | for (auto v : v2) { |
811 | insert(&known_good_preds, inv_adjacent_vertices(v, build.g)); |
812 | } |
813 | |
814 | for (auto u : preds) { |
815 | if (!contains(known_good_preds, u)) { |
816 | insert(&pred_lits, build.g[u].literals); |
817 | } |
818 | } |
819 | |
820 | vector<const rose_literal_id *> pred_rose_lits; |
821 | pred_rose_lits.reserve(pred_lits.size()); |
822 | for (const auto &p : pred_lits) { |
823 | pred_rose_lits.push_back(&build.literals.at(p)); |
824 | } |
825 | |
826 | for (auto v : v2) { |
827 | u32 vlag = build.g[v].left.lag; |
828 | if (!vlag) { |
829 | continue; |
830 | } |
831 | |
832 | for (const u32 vlit : build.g[v].literals) { |
833 | const rose_literal_id &vl = build.literals.at(vlit); |
834 | assert(!vl.delay); // this should never have got this far? |
835 | for (const auto &ul : pred_rose_lits) { |
836 | assert(!ul->delay); // this should never have got this far? |
837 | |
838 | if (!checkPredDelay(*ul, vl, vlag)) { |
839 | return false; |
840 | } |
841 | } |
842 | } |
843 | } |
844 | |
845 | return true; |
846 | } |
847 | |
848 | static |
849 | bool mergeableRoseVertices(const RoseBuildImpl &tbi, |
850 | const deque<RoseVertex> &verts1, |
851 | const deque<RoseVertex> &verts2) { |
852 | assert(!verts1.empty()); |
853 | assert(!verts2.empty()); |
854 | |
855 | RoseVertex u_front = verts1.front(); |
856 | RoseVertex v_front = verts2.front(); |
857 | |
858 | /* all vertices must have the same engine type: assume all verts in each |
859 | * group are already of the same type */ |
860 | if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) { |
861 | return false; |
862 | } |
863 | |
864 | bool is_prefix = tbi.isRootSuccessor(u_front); |
865 | |
866 | /* We cannot merge prefixes/vertices if they are successors of different |
867 | * root vertices: similarly, assume the grouped vertices are compatible */ |
868 | if (is_prefix) { |
869 | assert(tbi.isRootSuccessor(v_front)); |
870 | set<RoseVertex> u_preds; |
871 | set<RoseVertex> v_preds; |
872 | insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g)); |
873 | insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g)); |
874 | |
875 | if (u_preds != v_preds) { |
876 | return false; |
877 | } |
878 | } |
879 | |
880 | vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ |
881 | for (auto a : verts1) { |
882 | if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) { |
883 | return false; |
884 | } |
885 | |
886 | u32 ulag = tbi.g[a].left.lag; |
887 | for (u32 id : tbi.g[a].literals) { |
888 | ulits.emplace_back(&tbi.literals.at(id), ulag); |
889 | } |
890 | } |
891 | |
892 | vector<pair<const rose_literal_id *, u32>> vlits; |
893 | for (auto a : verts2) { |
894 | if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) { |
895 | return false; |
896 | } |
897 | |
898 | u32 vlag = tbi.g[a].left.lag; |
899 | for (u32 id : tbi.g[a].literals) { |
900 | vlits.emplace_back(&tbi.literals.at(id), vlag); |
901 | } |
902 | } |
903 | |
904 | if (!compatibleLiteralsForMerge(ulits, vlits)) { |
905 | return false; |
906 | } |
907 | |
908 | // Check preds are compatible as well. |
909 | if (!checkPredDelays(tbi, verts1, verts2) |
910 | || !checkPredDelays(tbi, verts2, verts1)) { |
911 | return false; |
912 | } |
913 | |
914 | DEBUG_PRINTF("vertex sets are mergeable\n" ); |
915 | return true; |
916 | } |
917 | |
918 | bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1, |
919 | const set<RoseVertex> &v2) { |
920 | const deque<RoseVertex> vv1(v1.begin(), v1.end()); |
921 | const deque<RoseVertex> vv2(v2.begin(), v2.end()); |
922 | return mergeableRoseVertices(tbi, vv1, vv2); |
923 | } |
924 | |
925 | /** \brief Priority queue element for Rose merges. */ |
926 | namespace { |
927 | struct RoseMergeCandidate { |
928 | RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in, |
929 | u32 tb) |
930 | : r1(r1_in), r2(r2_in), stopxor(0), cpl(cpl_in), states(0), |
931 | tie_breaker(tb) { |
932 | if (r1.graph() && r2.graph()) { |
933 | const NGHolder &h1 = *r1.graph(), &h2 = *r2.graph(); |
934 | /* som_none as haigs don't merge and just a guiding heuristic */ |
935 | CharReach stop1 = findStopAlphabet(h1, SOM_NONE); |
936 | CharReach stop2 = findStopAlphabet(h2, SOM_NONE); |
937 | stopxor = (stop1 ^ stop2).count(); |
938 | |
939 | // We use the number of vertices as an approximation of the state |
940 | // count here, as this is just feeding a comparison. |
941 | u32 vertex_count = num_vertices(h1) + num_vertices(h2); |
942 | states = vertex_count - min(vertex_count, cpl); |
943 | } else if (r1.castle() && r2.castle()) { |
944 | // FIXME |
945 | } |
946 | } |
947 | |
948 | bool operator<(const RoseMergeCandidate &a) const { |
949 | if (stopxor != a.stopxor) { |
950 | return stopxor > a.stopxor; |
951 | } |
952 | if (cpl != a.cpl) { |
953 | return cpl < a.cpl; |
954 | } |
955 | if (states != a.states) { |
956 | return states > a.states; |
957 | } |
958 | return tie_breaker < a.tie_breaker; |
959 | } |
960 | |
961 | left_id r1; |
962 | left_id r2; |
963 | u32 stopxor; |
964 | u32 cpl; //!< common prefix length |
965 | u32 states; |
966 | u32 tie_breaker; //!< determinism |
967 | }; |
968 | } |
969 | |
970 | static |
971 | bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, |
972 | const vector<RoseVertex> &verts1, |
973 | const vector<RoseVertex> &verts2) { |
974 | assert(!verts1.empty() && !verts2.empty()); |
975 | |
976 | DEBUG_PRINTF("merging pair of leftfixes:\n" ); |
977 | DEBUG_PRINTF(" A:%016zx: tops %s\n" , r1.hash(), |
978 | as_string_list(all_tops(r1)).c_str()); |
979 | DEBUG_PRINTF(" B:%016zx: tops %s\n" , r2.hash(), |
980 | as_string_list(all_tops(r2)).c_str()); |
981 | |
982 | RoseGraph &g = build.g; |
983 | |
984 | if (r1.graph()) { |
985 | assert(r2.graph()); |
986 | assert(r1.graph()->kind == r2.graph()->kind); |
987 | if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) { |
988 | DEBUG_PRINTF("nfa merge failed\n" ); |
989 | return false; |
990 | } |
991 | |
992 | /* The graph in r1 has been merged into the graph in r2. Update r1's |
993 | * vertices with the new graph ptr. mergeNfaPair() does not alter the |
994 | * tops from the input graph so no need to update top values. |
995 | * |
996 | * It is the responsibility of the caller to ensure that the tops are |
997 | * distinct when they have different trigger conditions. |
998 | * [Note: mergeLeftfixesVariableLag() should have a common parent set] |
999 | */ |
1000 | shared_ptr<NGHolder> &h = g[verts2.front()].left.graph; |
1001 | for (RoseVertex v : verts1) { |
1002 | g[v].left.graph = h; |
1003 | } |
1004 | |
1005 | return true; |
1006 | } else if (r1.castle()) { |
1007 | assert(r2.castle()); |
1008 | assert(build.cc.grey.allowCastle); |
1009 | |
1010 | map<u32, u32> top_map; |
1011 | if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) { |
1012 | DEBUG_PRINTF("castle merge failed\n" ); |
1013 | return false; |
1014 | } |
1015 | |
1016 | // The castle in r1 has been merged into the castle in r2, with tops |
1017 | // remapped as per top_map. |
1018 | const shared_ptr<CastleProto> &c = g[verts2.front()].left.castle; |
1019 | for (RoseVertex v : verts1) { |
1020 | g[v].left.castle = c; |
1021 | for (const auto &e : in_edges_range(v, g)) { |
1022 | g[e].rose_top = top_map.at(g[e].rose_top); |
1023 | } |
1024 | } |
1025 | return true; |
1026 | } |
1027 | |
1028 | assert(0); |
1029 | return false; |
1030 | } |
1031 | |
1032 | /** |
1033 | * Checks that there is no problem due to the involved vertices if we merge two |
1034 | * leftfix engines. |
1035 | * |
1036 | * This functions takes the vertices on the right of the two engines. |
1037 | * |
1038 | * Unlike mergeableRoseVertices(), this does not: |
1039 | * - check that engines themselves can be merged |
1040 | * - use heuristics to find out if merging the engines is wise. |
1041 | */ |
1042 | static |
1043 | bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build, |
1044 | const vector<RoseVertex> &targets_1, |
1045 | const vector<RoseVertex> &targets_2) { |
1046 | assert(!targets_1.empty()); |
1047 | assert(!targets_2.empty()); |
1048 | |
1049 | vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ |
1050 | for (auto a : targets_1) { |
1051 | u32 ulag = build.g[a].left.lag; |
1052 | for (u32 id : build.g[a].literals) { |
1053 | ulits.emplace_back(&build.literals.at(id), ulag); |
1054 | } |
1055 | } |
1056 | |
1057 | vector<pair<const rose_literal_id *, u32>> vlits; |
1058 | for (auto a : targets_2) { |
1059 | u32 vlag = build.g[a].left.lag; |
1060 | for (u32 id : build.g[a].literals) { |
1061 | vlits.emplace_back(&build.literals.at(id), vlag); |
1062 | } |
1063 | } |
1064 | |
1065 | if (!compatibleLiteralsForMerge(ulits, vlits)) { |
1066 | return false; |
1067 | } |
1068 | |
1069 | // Check preds are compatible as well. |
1070 | if (!checkPredDelays(build, targets_1, targets_2) |
1071 | || !checkPredDelays(build, targets_2, targets_1)) { |
1072 | return false; |
1073 | } |
1074 | |
1075 | DEBUG_PRINTF("vertex sets are mergeable\n" ); |
1076 | return true; |
1077 | } |
1078 | |
1079 | /** |
1080 | * In block mode, we want to be a little more selective -- we will only merge |
1081 | * prefix engines when the literal sets are the same or if the merged graph |
1082 | * has only grown by a small amount. |
1083 | */ |
1084 | static |
1085 | bool goodBlockModeMerge(const RoseBuildImpl &build, |
1086 | const vector<RoseVertex> &u_verts, const left_id &u_eng, |
1087 | const vector<RoseVertex> &v_verts, |
1088 | const left_id &v_eng) { |
1089 | assert(!build.cc.streaming); |
1090 | |
1091 | // Always merge infixes if we can (subject to the other criteria in |
1092 | // mergeableRoseVertices). |
1093 | if (!build.isRootSuccessor(u_verts.front())) { |
1094 | return true; |
1095 | } |
1096 | |
1097 | const RoseGraph &g = build.g; |
1098 | |
1099 | flat_set<u32> u_lits; |
1100 | for (RoseVertex u : u_verts) { |
1101 | insert(&u_lits, g[u].literals); |
1102 | } |
1103 | |
1104 | flat_set<u32> v_lits; |
1105 | for (RoseVertex v : v_verts) { |
1106 | insert(&v_lits, g[v].literals); |
1107 | } |
1108 | |
1109 | // Merge prefixes with identical literal sets (as we'd have to run them |
1110 | // both when we see those literals anyway). |
1111 | if (u_lits == v_lits) { |
1112 | return true; |
1113 | } |
1114 | |
1115 | // The rest of this function only deals with the case when have graph |
1116 | // leftfixes. |
1117 | if (!u_eng.graph()) { |
1118 | return false; |
1119 | } |
1120 | assert(v_eng.graph()); |
1121 | const NGHolder &ug = *u_eng.graph(); |
1122 | const NGHolder &vg = *v_eng.graph(); |
1123 | |
1124 | size_t u_count = num_vertices(ug); |
1125 | size_t v_count = num_vertices(vg); |
1126 | DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n" , |
1127 | u_count, v_count); |
1128 | if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || |
1129 | v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { |
1130 | DEBUG_PRINTF("prefixes too big already\n" ); |
1131 | return false; |
1132 | } |
1133 | |
1134 | DEBUG_PRINTF("trying merge\n" ); |
1135 | NGHolder h; |
1136 | cloneHolder(h, vg); |
1137 | if (!mergeNfaPair(ug, h, nullptr, build.cc)) { |
1138 | DEBUG_PRINTF("couldn't merge\n" ); |
1139 | return false; |
1140 | } |
1141 | |
1142 | const size_t merged_count = num_vertices(h); |
1143 | DEBUG_PRINTF("merged result has %zu vertices\n" , merged_count); |
1144 | if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { |
1145 | DEBUG_PRINTF("exceeded limit\n" ); |
1146 | return false; |
1147 | } |
1148 | |
1149 | // We want to only perform merges that take advantage of some |
1150 | // commonality in the two input graphs, so we check that the number of |
1151 | // vertices has only grown a small amount: somewhere between the sum |
1152 | // (no commonality) and the max (no growth at all) of the vertex counts |
1153 | // of the input graphs. |
1154 | size_t max_size = u_count + v_count; |
1155 | size_t min_size = max(u_count, v_count); |
1156 | size_t max_growth = ((max_size - min_size) * 25) / 100; |
1157 | if (merged_count > min_size + max_growth) { |
1158 | DEBUG_PRINTF("grew too much\n" ); |
1159 | return false; |
1160 | } |
1161 | |
1162 | // We don't want to squander any chances at accelerating. |
1163 | if (!isAccelerableLeftfix(build, h) |
1164 | && (isAccelerableLeftfix(build, ug) |
1165 | || isAccelerableLeftfix(build, vg))) { |
1166 | DEBUG_PRINTF("would lose accel property\n" ); |
1167 | return false; |
1168 | } |
1169 | |
1170 | DEBUG_PRINTF("safe to merge\n" ); |
1171 | return true; |
1172 | } |
1173 | |
1174 | /** |
1175 | * Merge r1 into r2 if safe and appropriate. Returns true on success. |
1176 | */ |
1177 | static |
1178 | bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1, |
1179 | const vector<RoseVertex> &targets_1, |
1180 | left_id &r2, |
1181 | const vector<RoseVertex> &targets_2) { |
1182 | if (targets_1.empty() || targets_2.empty()) { |
1183 | /* one of the engines has already been merged away */ |
1184 | return false; |
1185 | } |
1186 | |
1187 | assert(!r1.graph() == !r2.graph()); |
1188 | if (r1.graph()) { |
1189 | NGHolder *h1 = r1.graph(); |
1190 | NGHolder *h2 = r2.graph(); |
1191 | CharReach stop1 = findStopAlphabet(*h1, SOM_NONE); |
1192 | CharReach stop2 = findStopAlphabet(*h2, SOM_NONE); |
1193 | CharReach stopboth = stop1 & stop2; |
1194 | DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n" , stop1.count(), |
1195 | stop2.count(), stopboth.count()); |
1196 | if (stopboth.count() < 10 |
1197 | && (stop1.count() > 10 || stop2.count() > 10)) { |
1198 | DEBUG_PRINTF("skip merge, would kill stop alphabet\n" ); |
1199 | return false; |
1200 | } |
1201 | size_t maxstop = max(stop1.count(), stop2.count()); |
1202 | if (maxstop > 200 && stopboth.count() < 200) { |
1203 | DEBUG_PRINTF("skip merge, would reduce stop alphabet\n" ); |
1204 | return false; |
1205 | } |
1206 | } |
1207 | |
1208 | /* Rechecking that the targets are compatible, as we may have already |
1209 | * merged new states into r1 or r2 and we need to verify that this |
1210 | * candidate is still ok. */ |
1211 | if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) { |
1212 | return false; |
1213 | } |
1214 | |
1215 | if (!build.cc.streaming |
1216 | && !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) { |
1217 | return false; |
1218 | } |
1219 | |
1220 | return mergeLeftfixPair(build, r1, r2, targets_1, targets_2); |
1221 | } |
1222 | |
1223 | static |
1224 | bool nfaHasNarrowStart(const NGHolder &g) { |
1225 | if (out_degree(g.startDs, g) > 1) { |
1226 | return false; // unanchored |
1227 | } |
1228 | |
1229 | CharReach cr; |
1230 | |
1231 | for (auto v : adjacent_vertices_range(g.start, g)) { |
1232 | if (v == g.startDs) { |
1233 | continue; |
1234 | } |
1235 | cr |= g[v].char_reach; |
1236 | } |
1237 | return cr.count() <= NARROW_START_MAX; |
1238 | } |
1239 | |
1240 | static |
1241 | bool nfaHasFiniteMaxWidth(const NGHolder &g) { |
1242 | return findMaxWidth(g).is_finite(); |
1243 | } |
1244 | |
1245 | static |
1246 | bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) { |
1247 | if (!proper_out_degree(h.startDs, h)) { |
1248 | return false; |
1249 | } |
1250 | |
1251 | assert(!is_triggered(h)); |
1252 | |
1253 | NGHolder h_temp; |
1254 | cloneHolder(h_temp, h); |
1255 | |
1256 | vector<BoundedRepeatData> repeats; |
1257 | bool suitable_for_sds_reforming = false; |
1258 | const map<u32, u32> fixed_depth_tops; /* not relevant for cfa check */ |
1259 | const map<u32, vector<vector<CharReach>>> triggers; /* not for cfa check */ |
1260 | const bool simple_model_selection = true; // FIRST is considered simple |
1261 | analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true, |
1262 | simple_model_selection, grey, &suitable_for_sds_reforming); |
1263 | |
1264 | return suitable_for_sds_reforming; |
1265 | } |
1266 | |
1267 | static |
1268 | u32 commonPrefixLength(left_id &r1, left_id &r2) { |
1269 | if (r1.graph() && r2.graph()) { |
1270 | return commonPrefixLength(*r1.graph(), *r2.graph()); |
1271 | } else if (r1.castle() && r2.castle()) { |
1272 | return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle())); |
1273 | } |
1274 | return 0; |
1275 | } |
1276 | |
1277 | namespace { |
1278 | struct MergeKey { |
1279 | MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) : |
1280 | parents(std::move(parents_in)) { |
1281 | |
1282 | // We want to distinguish prefixes (but not infixes) on whether they |
1283 | // have a narrow start or max width. |
1284 | if (left.graph() && !is_triggered(*left.graph())) { |
1285 | const NGHolder &h = *left.graph(); |
1286 | narrowStart = nfaHasNarrowStart(h); |
1287 | hasMaxWidth = nfaHasFiniteMaxWidth(h); |
1288 | } else { |
1289 | narrowStart = false; |
1290 | hasMaxWidth = false; |
1291 | } |
1292 | |
1293 | if (left.castle()) { |
1294 | /* castles should have a non-empty reach */ |
1295 | assert(left.castle()->reach().any()); |
1296 | castle_cr = left.castle()->reach(); |
1297 | } else { |
1298 | assert(left.graph()); |
1299 | } |
1300 | } |
1301 | |
1302 | bool operator<(const MergeKey &b) const { |
1303 | const MergeKey &a = *this; |
1304 | ORDER_CHECK(narrowStart); |
1305 | ORDER_CHECK(hasMaxWidth); |
1306 | ORDER_CHECK(castle_cr); |
1307 | ORDER_CHECK(parents); |
1308 | return false; |
1309 | } |
1310 | |
1311 | // NOTE: these two bool discriminators are only used for prefixes, not |
1312 | // infixes. |
1313 | bool narrowStart; |
1314 | bool hasMaxWidth; |
1315 | CharReach castle_cr; /* empty for graphs, reach (non-empty) for castles. */ |
1316 | |
1317 | flat_set<RoseVertex> parents; |
1318 | }; |
1319 | } |
1320 | |
1321 | template <typename T> |
1322 | static |
1323 | void chunk(vector<T> in, vector<vector<T>> *out, size_t chunk_size) { |
1324 | if (in.size() <= chunk_size) { |
1325 | out->push_back(std::move(in)); |
1326 | return; |
1327 | } |
1328 | |
1329 | out->push_back(vector<T>()); |
1330 | out->back().reserve(chunk_size); |
1331 | for (const auto &t : in) { |
1332 | if (out->back().size() >= chunk_size) { |
1333 | out->push_back(vector<T>()); |
1334 | out->back().reserve(chunk_size); |
1335 | } |
1336 | out->back().push_back(std::move(t)); |
1337 | } |
1338 | } |
1339 | |
1340 | static |
1341 | insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) { |
1342 | insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; |
1343 | for (auto v : vertices_range(g)) { |
1344 | const auto &left = g[v].left; |
1345 | if (!left) { |
1346 | continue; |
1347 | } |
1348 | assert(contains(all_reports(left), left.leftfix_report)); |
1349 | eng_verts[left].push_back(v); |
1350 | } |
1351 | |
1352 | return eng_verts; |
1353 | } |
1354 | |
1355 | /** |
1356 | * This pass attempts to merge prefix/infix engines which share a common set of |
1357 | * parent vertices. |
1358 | * |
1359 | * Engines are greedily merged pairwise by this process based on a priority |
1360 | * queue keyed off the common prefix length. |
1361 | * |
1362 | * Engines are not merged if the lags are not compatible or if it would damage |
1363 | * the stop alphabet. |
1364 | * |
1365 | * Infixes: |
1366 | * - It is expected that when this is run all infixes are still at the single |
1367 | * top stage as we have not yet merged unrelated infixes together. After |
1368 | * execution, castles may have multiple (but equivalent) tops. |
1369 | * |
1370 | * Prefixes: |
1371 | * - transient prefixes are not considered. |
1372 | * - with a max width or a narrow start are kept segregated by |
1373 | * this phase and can only be merged with similar infixes. |
1374 | * - in block mode, merges are only performed if literal sets are the same. |
1375 | * - merges are not considered in cases where dot star start state will be |
1376 | * reformed to optimise a leading repeat. |
1377 | */ |
1378 | void mergeLeftfixesVariableLag(RoseBuildImpl &build) { |
1379 | if (!build.cc.grey.mergeRose) { |
1380 | return; |
1381 | } |
1382 | assert(!hasOrphanedTops(build)); |
1383 | |
1384 | RoseGraph &g = build.g; |
1385 | |
1386 | DEBUG_PRINTF("-----\n" ); |
1387 | DEBUG_PRINTF("entry\n" ); |
1388 | DEBUG_PRINTF("-----\n" ); |
1389 | |
1390 | auto eng_verts = get_eng_verts(g); |
1391 | |
1392 | map<MergeKey, vector<left_id>> engine_groups; |
1393 | for (const auto &e : eng_verts) { |
1394 | const left_id &left = e.first; |
1395 | const auto &verts = e.second; |
1396 | // Only non-transient for the moment. |
1397 | if (contains(build.transient, left)) { |
1398 | continue; |
1399 | } |
1400 | |
1401 | // No forced McClellan or Haig infix merges. |
1402 | if (left.dfa() || left.haig()) { |
1403 | continue; |
1404 | } |
1405 | assert(left.graph() || left.castle()); |
1406 | |
1407 | if (left.graph()) { |
1408 | const NGHolder &h = *left.graph(); |
1409 | /* we should not have merged yet */ |
1410 | assert(!is_triggered(h) || onlyOneTop(h)); |
1411 | |
1412 | if (hasReformedStartDotStar(h, build.cc.grey)) { |
1413 | continue; // preserve the optimisation of the leading repeat |
1414 | } |
1415 | } else { |
1416 | assert(left.castle()); |
1417 | |
1418 | if (!build.cc.grey.allowCastle) { |
1419 | DEBUG_PRINTF("castle merging disallowed by greybox\n" ); |
1420 | continue; |
1421 | } |
1422 | } |
1423 | |
1424 | // We collapse the anchored root into the root vertex when calculating |
1425 | // parents, so that we can merge differently-anchored prefix roses |
1426 | // together. (Prompted by UE-2100) |
1427 | |
1428 | flat_set<RoseVertex> parents; |
1429 | for (RoseVertex v : verts) { |
1430 | insert(&parents, inv_adjacent_vertices_range(v, g)); |
1431 | } |
1432 | |
1433 | if (contains(parents, build.anchored_root)) { |
1434 | parents.erase(build.anchored_root); |
1435 | parents.insert(build.root); |
1436 | } |
1437 | |
1438 | assert(!parents.empty()); |
1439 | |
1440 | #ifndef _WIN32 |
1441 | engine_groups[MergeKey(left, parents)].push_back(left); |
1442 | #else |
1443 | // On windows, when passing MergeKey object into map 'engine_groups', |
1444 | // it will not be copied, but will be freed along with |
1445 | // engine_groups.clear(). |
1446 | // If we construct MergeKey object on the stack, it will be destructed |
1447 | // on its life cycle ending, then on engine_groups.clear(), which |
1448 | // will cause is_block_type_valid() assertion error in MergeKey |
1449 | // destructor. |
1450 | MergeKey *mk = new MergeKey(left, parents); |
1451 | engine_groups[*mk].push_back(left); |
1452 | #endif |
1453 | } |
1454 | |
1455 | vector<vector<left_id>> chunks; |
1456 | for (auto &raw_group : engine_groups | map_values) { |
1457 | chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX); |
1458 | } |
1459 | engine_groups.clear(); |
1460 | |
1461 | DEBUG_PRINTF("chunked roses into %zu groups\n" , chunks.size()); |
1462 | |
1463 | for (auto &roses : chunks) { |
1464 | if (roses.size() < 2) { |
1465 | continue; |
1466 | } |
1467 | // All pairs on the prio queue. |
1468 | u32 tie_breaker = 0; |
1469 | priority_queue<RoseMergeCandidate> pq; |
1470 | for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) { |
1471 | left_id r1 = *it; |
1472 | const vector<RoseVertex> &targets_1 = eng_verts[r1]; |
1473 | |
1474 | for (auto jt = next(it); jt != ite; ++jt) { |
1475 | left_id r2 = *jt; |
1476 | |
1477 | /* we should have already split on engine types and reach */ |
1478 | assert(!r1.castle() == !r2.castle()); |
1479 | assert(!r1.graph() == !r2.graph()); |
1480 | assert(!r1.castle() |
1481 | || r1.castle()->reach() == r2.castle()->reach()); |
1482 | |
1483 | const vector<RoseVertex> &targets_2 = eng_verts[r2]; |
1484 | if (!checkVerticesOkForLeftfixMerge(build, targets_1, |
1485 | targets_2)) { |
1486 | continue; // No point queueing unmergeable cases. |
1487 | } |
1488 | |
1489 | u32 cpl = commonPrefixLength(r1, r2); |
1490 | pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++)); |
1491 | } |
1492 | } |
1493 | |
1494 | DEBUG_PRINTF("merge queue has %zu entries\n" , pq.size()); |
1495 | |
1496 | while (!pq.empty()) { |
1497 | left_id r1 = pq.top().r1; |
1498 | left_id r2 = pq.top().r2; |
1499 | DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n" , |
1500 | r1.graph(), r2.graph(), pq.top().cpl, pq.top().states); |
1501 | pq.pop(); |
1502 | vector<RoseVertex> &targets_1 = eng_verts[r1]; |
1503 | vector<RoseVertex> &targets_2 = eng_verts[r2]; |
1504 | if (mergeLeftVL_tryMergeCandidate(build, r1, targets_1, r2, |
1505 | targets_2)) { |
1506 | insert(&targets_2, targets_2.end(), targets_1); |
1507 | targets_1.clear(); |
1508 | } |
1509 | } |
1510 | } |
1511 | |
1512 | DEBUG_PRINTF("-----\n" ); |
1513 | DEBUG_PRINTF("exit\n" ); |
1514 | DEBUG_PRINTF("-----\n" ); |
1515 | assert(!hasOrphanedTops(build)); |
1516 | } |
1517 | |
1518 | namespace { |
1519 | |
1520 | /** |
1521 | * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path. |
1522 | */ |
1523 | struct DedupeLeftKey { |
1524 | DedupeLeftKey(const RoseBuildImpl &build, |
1525 | flat_set<pair<size_t, u32>> preds_in, const left_id &left) |
1526 | : left_hash(hashLeftfix(left)), preds(move(preds_in)), |
1527 | transient(contains(build.transient, left)) { |
1528 | } |
1529 | |
1530 | bool operator<(const DedupeLeftKey &b) const { |
1531 | return tie(left_hash, preds, transient) |
1532 | < tie(b.left_hash, b.preds, b.transient); |
1533 | } |
1534 | |
1535 | private: |
1536 | /** Quick hash of the leftfix itself. Must be identical for a given pair of |
1537 | * graphs if is_equal would return true. */ |
1538 | size_t left_hash; |
1539 | |
1540 | /** For each in-edge, the pair of (parent index, edge top). */ |
1541 | flat_set<pair<size_t, u32>> preds; |
1542 | |
1543 | /** We don't want to combine transient with non-transient. */ |
1544 | bool transient; |
1545 | }; |
1546 | |
1547 | } // namespace |
1548 | |
1549 | static |
1550 | flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) { |
1551 | flat_set<pair<size_t, u32>> preds; |
1552 | for (const auto &e : in_edges_range(v, g)) { |
1553 | preds.emplace(g[source(e, g)].index, g[e].rose_top); |
1554 | } |
1555 | return preds; |
1556 | } |
1557 | |
1558 | /** |
1559 | * This is a generalisation of \ref dedupeLeftfixes which relaxes two |
1560 | * restrictions: multiple predecessor roles are allowed and the delay used by |
1561 | * each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes, |
1562 | * the leftfixes' successor vertices are first grouped to reduce the number of |
1563 | * potential candidates - the grouping in this case is by the set of |
1564 | * predecessor roles with their associated top events. For the dedupe to be |
1565 | * possible, it is required that: |
1566 | * |
1567 | * 1. the nfa graphs with respect to the relevant reports are identical |
1568 | * 2. the nfa graphs are triggered by the same roles with same events (ensured |
1569 | * by the initial grouping pass) |
1570 | * 3. all the successor roles of either graph can inspect the combined leftfix |
1571 | * without advancing the state of the leftfix past the point that another |
1572 | * successor may want to inspect it; the overlap relationships between the |
1573 | * involved literals are examined to ensure that this property holds. |
1574 | * |
1575 | * Note: this is unable to dedupe when delayed literals are involved unlike |
1576 | * dedupeLeftfixes. |
1577 | */ |
1578 | void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { |
1579 | DEBUG_PRINTF("entry\n" ); |
1580 | |
1581 | RoseGraph &g = build.g; |
1582 | auto eng_verts = get_eng_verts(g); |
1583 | |
1584 | map<DedupeLeftKey, vector<left_id>> engine_groups; |
1585 | for (const auto &e : eng_verts) { |
1586 | const left_id &left = e.first; |
1587 | const auto &verts = e.second; |
1588 | |
1589 | /* There should only be one report on an engine as no merges have |
1590 | * happened yet. (aside from eod prefixes) */ |
1591 | if (all_reports(left).size() != 1) { |
1592 | assert(any_of_in(adjacent_vertices_range(verts.front(), g), |
1593 | [&](RoseVertex w) { return g[w].eod_accept; })); |
1594 | continue; |
1595 | } |
1596 | |
1597 | if (left.haig()) { |
1598 | /* TODO: allow deduping of identical haigs */ |
1599 | continue; |
1600 | } |
1601 | |
1602 | if (left.graph()) { |
1603 | /* we should not have merged yet */ |
1604 | assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph())); |
1605 | } |
1606 | |
1607 | auto preds = get_pred_tops(verts.front(), g); |
1608 | for (RoseVertex v : verts) { |
1609 | if (preds != get_pred_tops(v, g)) { |
1610 | DEBUG_PRINTF("distinct pred sets\n" ); |
1611 | continue; |
1612 | } |
1613 | } |
1614 | engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left); |
1615 | } |
1616 | |
1617 | /* We don't bother chunking as we expect deduping to be successful if the |
1618 | * hashes match */ |
1619 | |
1620 | for (auto &group : engine_groups | map_values) { |
1621 | DEBUG_PRINTF("group of %zu roses\n" , group.size()); |
1622 | |
1623 | if (group.size() < 2) { |
1624 | continue; |
1625 | } |
1626 | |
1627 | for (auto it = group.begin(); it != group.end(); ++it) { |
1628 | left_id r1 = *it; |
1629 | vector<RoseVertex> &verts1 = eng_verts[r1]; |
1630 | assert(!verts1.empty()); /* cleared engines should be behind us */ |
1631 | |
1632 | assert(all_reports(r1).size() == 1); |
1633 | ReportID r1_report = *all_reports(r1).begin(); |
1634 | |
1635 | for (auto jt = next(it); jt != group.end(); ++jt) { |
1636 | left_id r2 = *jt; |
1637 | vector<RoseVertex> &verts2 = eng_verts[r2]; |
1638 | assert(!verts2.empty()); |
1639 | assert(all_reports(r2).size() == 1); |
1640 | ReportID r2_report = *all_reports(r2).begin(); |
1641 | |
1642 | if (!is_equal(r1, r1_report, r2, r2_report)) { |
1643 | continue; |
1644 | } |
1645 | |
1646 | if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) { |
1647 | continue; |
1648 | } |
1649 | |
1650 | DEBUG_PRINTF("%p and %p are dupes\n" , r1.graph(), r2.graph()); |
1651 | |
1652 | // Replace r1 with r2. |
1653 | |
1654 | for (auto v : verts1) { |
1655 | DEBUG_PRINTF("replacing report %u with %u on %zu\n" , |
1656 | r2_report, r1_report, g[v].index); |
1657 | u32 orig_lag = g[v].left.lag; |
1658 | g[v].left = g[verts2.front()].left; |
1659 | g[v].left.lag = orig_lag; |
1660 | } |
1661 | |
1662 | insert(&verts2, verts2.end(), verts1); |
1663 | verts1.clear(); |
1664 | |
1665 | /* remove stale entry from transient set, if present */ |
1666 | build.transient.erase(r1); |
1667 | |
1668 | break; |
1669 | } |
1670 | } |
1671 | } |
1672 | } |
1673 | |
1674 | static |
1675 | u32 findUnusedTop(const flat_set<u32> &tops) { |
1676 | u32 i = 0; |
1677 | while (contains(tops, i)) { |
1678 | i++; |
1679 | } |
1680 | return i; |
1681 | } |
1682 | |
1683 | // Replace top 't' on edges with new top 'u'. |
1684 | static |
1685 | void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) { |
1686 | for (const auto &e : out_edges_range(h.start, h)) { |
1687 | NFAVertex v = target(e, h); |
1688 | if (v == h.startDs) { |
1689 | continue; |
1690 | } |
1691 | flat_set<u32> new_tops; |
1692 | for (u32 t : h[e].tops) { |
1693 | DEBUG_PRINTF("vertex %zu has top %u\n" , h[v].index, t); |
1694 | new_tops.insert(top_mapping.at(t)); |
1695 | } |
1696 | h[e].tops = std::move(new_tops); |
1697 | } |
1698 | } |
1699 | |
1700 | static |
1701 | bool setDistinctTops(NGHolder &h1, const NGHolder &h2, |
1702 | map<u32, u32> &top_mapping) { |
1703 | flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2); |
1704 | |
1705 | DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n" , tops1.size(), |
1706 | tops2.size()); |
1707 | |
1708 | // If our tops don't intersect, we're OK to merge with no changes. |
1709 | if (!has_intersection(tops1, tops2)) { |
1710 | DEBUG_PRINTF("tops don't intersect\n" ); |
1711 | return true; |
1712 | } |
1713 | |
1714 | // Otherwise, we have to renumber the tops in h1 so that they don't overlap |
1715 | // with the tops in h2. |
1716 | top_mapping.clear(); |
1717 | for (u32 t : tops1) { |
1718 | u32 u = findUnusedTop(tops2); |
1719 | DEBUG_PRINTF("replacing top %u with %u in h1\n" , t, u); |
1720 | top_mapping.insert(make_pair(t, u)); |
1721 | assert(!contains(tops2, u)); |
1722 | tops2.insert(u); |
1723 | } |
1724 | |
1725 | replaceTops(h1, top_mapping); |
1726 | return true; |
1727 | } |
1728 | |
1729 | bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, |
1730 | const deque<RoseVertex> &verts1) { |
1731 | map<u32, u32> top_mapping; |
1732 | if (!setDistinctTops(h1, h2, top_mapping)) { |
1733 | return false; |
1734 | } |
1735 | |
1736 | if (top_mapping.empty()) { |
1737 | return true; // No remapping necessary. |
1738 | } |
1739 | |
1740 | for (auto v : verts1) { |
1741 | DEBUG_PRINTF("vertex %zu\n" , g[v].index); |
1742 | assert(!g[v].left.haig); |
1743 | assert(!g[v].left.dfa); |
1744 | for (const auto &e : in_edges_range(v, g)) { |
1745 | u32 t = g[e].rose_top; |
1746 | DEBUG_PRINTF("t=%u\n" , t); |
1747 | assert(contains(top_mapping, t)); |
1748 | g[e].rose_top = top_mapping[t]; |
1749 | DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n" , |
1750 | g[source(e, g)].index, g[target(e, g)].index, t, |
1751 | top_mapping[t]); |
1752 | } |
1753 | } |
1754 | |
1755 | return true; |
1756 | } |
1757 | |
1758 | static |
1759 | bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, |
1760 | const deque<RoseVertex> &verts1) { |
1761 | map<u32, u32> top_mapping; |
1762 | if (!setDistinctTops(h1, h2, top_mapping)) { |
1763 | return false; |
1764 | } |
1765 | |
1766 | if (top_mapping.empty()) { |
1767 | return true; // No remapping necessary. |
1768 | } |
1769 | |
1770 | for (auto v : verts1) { |
1771 | DEBUG_PRINTF("vertex %zu\n" , g[v].index); |
1772 | u32 t = g[v].suffix.top; |
1773 | assert(contains(top_mapping, t)); |
1774 | g[v].suffix.top = top_mapping[t]; |
1775 | } |
1776 | |
1777 | return true; |
1778 | } |
1779 | |
1780 | /** \brief Estimate the number of accel states in the given graph when built as |
1781 | * an NFA. |
1782 | * |
1783 | * (The easiest way to estimate something like this is to actually build it: |
1784 | * the criteria for NFA acceleration are quite complicated and buried in |
1785 | * limex_compile.) |
1786 | */ |
1787 | static |
1788 | u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) { |
1789 | return countAccelStates(h, &tbi.rm, tbi.cc); |
1790 | } |
1791 | |
1792 | static |
1793 | void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) { |
1794 | RoseGraph &g = tbi.g; |
1795 | DEBUG_PRINTF("%zu nfa rose merge candidates\n" , roses.size()); |
1796 | |
1797 | // We track the number of accelerable states for each graph in a map and |
1798 | // only recompute them when the graph is modified. |
1799 | unordered_map<left_id, u32> accel_count; |
1800 | for (const auto &rose : roses) { |
1801 | assert(rose.graph()->kind == NFA_INFIX); |
1802 | accel_count[rose] = estimatedAccelStates(tbi, *rose.graph()); |
1803 | } |
1804 | |
1805 | for (auto it = roses.begin(); it != roses.end(); ++it) { |
1806 | left_id r1 = *it; |
1807 | const deque<RoseVertex> &verts1 = roses.vertices(r1); |
1808 | |
1809 | deque<left_id> merged; |
1810 | for (auto jt = next(it); jt != roses.end(); ++jt) { |
1811 | left_id r2 = *jt; |
1812 | const deque<RoseVertex> &verts2 = roses.vertices(r2); |
1813 | |
1814 | DEBUG_PRINTF("consider merging rose %p (%zu verts) " |
1815 | "with %p (%zu verts)\n" , |
1816 | r1.graph(), verts1.size(), r2.graph(), verts2.size()); |
1817 | |
1818 | u32 accel1 = accel_count[r1]; |
1819 | if (accel1 >= NFA_MAX_ACCEL_STATES) { |
1820 | DEBUG_PRINTF("h1 has hit max accel\n" ); |
1821 | break; // next h1 |
1822 | } |
1823 | |
1824 | u32 accel2 = accel_count[r2]; |
1825 | if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) { |
1826 | DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, " |
1827 | "accel2=%u)\n" , |
1828 | accel1, accel2); |
1829 | continue; // next h2 |
1830 | } |
1831 | |
1832 | if (!mergeableRoseVertices(tbi, verts1, verts2)) { |
1833 | DEBUG_PRINTF("not mergeable\n" ); |
1834 | continue; // next h2 |
1835 | } |
1836 | |
1837 | // Attempt to merge h2 into h1. |
1838 | |
1839 | NGHolder victim; |
1840 | cloneHolder(victim, *r2.graph()); |
1841 | |
1842 | // Store a copy of the in-edge properties in case we have to roll |
1843 | // back. |
1844 | map<RoseEdge, RoseEdgeProps> edge_props; |
1845 | for (auto v : verts2) { |
1846 | for (const auto &e : in_edges_range(v, g)) { |
1847 | edge_props[e] = g[e]; |
1848 | } |
1849 | } |
1850 | |
1851 | if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) { |
1852 | DEBUG_PRINTF("can't set distinct tops\n" ); |
1853 | continue; // next h2 |
1854 | } |
1855 | |
1856 | assert(victim.kind == r1.graph()->kind); |
1857 | assert(!generates_callbacks(*r1.graph())); |
1858 | if (!mergeNfaPair(victim, *r1.graph(), nullptr, tbi.cc)) { |
1859 | DEBUG_PRINTF("merge failed\n" ); |
1860 | // Roll back in-edge properties. |
1861 | for (const auto &m : edge_props) { |
1862 | g[m.first] = m.second; |
1863 | } |
1864 | continue; // next h2 |
1865 | } |
1866 | |
1867 | // Update h2's roses to point to h1 now |
1868 | shared_ptr<NGHolder> winner = g[verts1.front()].left.graph; |
1869 | for (auto v : verts2) { |
1870 | g[v].left.graph = winner; |
1871 | } |
1872 | roses.insert(r1, verts2); |
1873 | |
1874 | merged.push_back(r2); |
1875 | |
1876 | if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) { |
1877 | DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n" , |
1878 | num_vertices(*winner)); |
1879 | break; // next h1 |
1880 | } |
1881 | |
1882 | // Update h1's accel count estimate. |
1883 | accel_count[r1] = estimatedAccelStates(tbi, *winner); |
1884 | } |
1885 | |
1886 | DEBUG_PRINTF("%zu roses merged\n" , merged.size()); |
1887 | roses.erase_all(merged.begin(), merged.end()); |
1888 | } |
1889 | } |
1890 | |
1891 | /** |
1892 | * This pass attempts to merge prefix/infix engines with a small number of |
1893 | * vertices together into larger engines. The engines must not be have a |
1894 | * reformed start dot star (due to a leading repeat) nor an infix LBR. Engines |
1895 | * that have compatible lag are greedily grouped such that they remain |
1896 | * accelerable and only have a small number of states. Note: if a role has an |
1897 | * infix with multiple trigger vertices, the role will be left unchanged by this |
1898 | * pass and will remain using an unmerged graph. |
1899 | */ |
1900 | void mergeSmallLeftfixes(RoseBuildImpl &tbi) { |
1901 | DEBUG_PRINTF("entry\n" ); |
1902 | |
1903 | if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses) { |
1904 | return; |
1905 | } |
1906 | |
1907 | RoseGraph &g = tbi.g; |
1908 | |
1909 | LeftfixBouquet nfa_leftfixes; |
1910 | |
1911 | for (auto v : vertices_range(g)) { |
1912 | if (!g[v].left) { |
1913 | continue; |
1914 | } |
1915 | |
1916 | // Handle single-parent infixes only. |
1917 | if (tbi.isRootSuccessor(v)) { |
1918 | continue; |
1919 | } |
1920 | |
1921 | left_id left(g[v].left); |
1922 | |
1923 | // Only non-transient for the moment. |
1924 | if (contains(tbi.transient, left)) { |
1925 | continue; |
1926 | } |
1927 | |
1928 | // No DFAs or Haigs right now. |
1929 | if (left.dfa() || left.haig()) { |
1930 | continue; |
1931 | } |
1932 | |
1933 | // Castles are handled by a different pass. |
1934 | if (left.castle()) { |
1935 | continue; |
1936 | } |
1937 | |
1938 | assert(left.graph()); |
1939 | NGHolder &h = *left.graph(); |
1940 | |
1941 | /* Ensure that kind on the graph is correct */ |
1942 | assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX)); |
1943 | |
1944 | if (hasReformedStartDotStar(h, tbi.cc.grey)) { |
1945 | /* We would lose optimisations of the leading repeat by merging. */ |
1946 | continue; |
1947 | } |
1948 | |
1949 | // Small roses only. |
1950 | if (num_vertices(h) > small_rose_threshold(tbi.cc)) { |
1951 | continue; |
1952 | } |
1953 | |
1954 | nfa_leftfixes.insert(left, v); |
1955 | } |
1956 | |
1957 | deque<LeftfixBouquet> leftfix_groups; |
1958 | chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX); |
1959 | nfa_leftfixes.clear(); |
1960 | DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n" , |
1961 | leftfix_groups.size()); |
1962 | |
1963 | for (auto &group : leftfix_groups) { |
1964 | mergeNfaLeftfixes(tbi, group); |
1965 | } |
1966 | } |
1967 | |
1968 | static |
1969 | void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands, |
1970 | insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) { |
1971 | /* caller must have already ensured that candidates have the same reach */ |
1972 | RoseGraph &g = build.g; |
1973 | DEBUG_PRINTF("%zu castle leftfix merge candidates\n" , cands.size()); |
1974 | |
1975 | for (auto it = cands.begin(); it != cands.end(); ++it) { |
1976 | left_id &cand_1 = *it; |
1977 | vector<RoseVertex> &verts_1 = eng_verts[cand_1]; |
1978 | if (verts_1.empty()) { |
1979 | continue; |
1980 | } |
1981 | |
1982 | for (auto jt = next(it); jt != cands.end(); ++jt) { |
1983 | const left_id &cand_2 = *jt; |
1984 | vector<RoseVertex> &verts_2 = eng_verts[cand_2]; |
1985 | if (verts_2.empty()) { |
1986 | continue; |
1987 | } |
1988 | |
1989 | assert(cand_1.castle()->reach() == cand_2.castle()->reach()); |
1990 | |
1991 | if (!checkVerticesOkForLeftfixMerge(build, verts_1, verts_2)) { |
1992 | DEBUG_PRINTF("not mergeable\n" ); |
1993 | continue; // next cand_2 |
1994 | } |
1995 | |
1996 | DEBUG_PRINTF("castle1=%p (size %zu)\n" , cand_1.castle(), |
1997 | cand_1.castle()->repeats.size()); |
1998 | DEBUG_PRINTF("castle2=%p (size %zu)\n" , cand_2.castle(), |
1999 | cand_2.castle()->repeats.size()); |
2000 | |
2001 | map<u32, u32> top_map; |
2002 | if (!mergeCastle(*cand_1.castle(), *cand_2.castle(), top_map)) { |
2003 | DEBUG_PRINTF("couldn't merge\n" ); |
2004 | continue; // next cand_2 |
2005 | } |
2006 | |
2007 | // Update castle2's roses to point to castle1 now. |
2008 | shared_ptr<CastleProto> winner = g[verts_1.front()].left.castle; |
2009 | for (auto v : verts_2) { |
2010 | assert(g[v].left.castle.get() == cand_2.castle()); |
2011 | g[v].left.castle = winner; |
2012 | for (const auto &e : in_edges_range(v, g)) { |
2013 | g[e].rose_top = top_map.at(g[e].rose_top); |
2014 | } |
2015 | } |
2016 | |
2017 | insert(&verts_1, verts_1.end(), verts_2); |
2018 | verts_2.clear(); |
2019 | } |
2020 | } |
2021 | } |
2022 | |
2023 | /** |
2024 | * Merges castles with the same reach together regardless of where in the rose |
2025 | * graph they are. Note: there is no requirement for the castles to have common |
2026 | * parent or target vertices. |
2027 | * |
2028 | * There are no heuristics for reducing block mode merges as castle speed |
2029 | * mainly depends on the reach being scanned. |
2030 | */ |
2031 | void mergeCastleLeftfixes(RoseBuildImpl &build) { |
2032 | DEBUG_PRINTF("entry\n" ); |
2033 | |
2034 | if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses |
2035 | || !build.cc.grey.allowCastle) { |
2036 | return; |
2037 | } |
2038 | |
2039 | RoseGraph &g = build.g; |
2040 | |
2041 | insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; |
2042 | |
2043 | for (auto v : vertices_range(g)) { |
2044 | if (!g[v].left.castle) { |
2045 | continue; |
2046 | } |
2047 | |
2048 | // Handle infixes only. |
2049 | if (build.isRootSuccessor(v)) { |
2050 | continue; |
2051 | } |
2052 | |
2053 | eng_verts[g[v].left].push_back(v); |
2054 | } |
2055 | |
2056 | map<CharReach, vector<left_id>> by_reach; |
2057 | for (const auto &left : eng_verts | map_keys) { |
2058 | by_reach[left.castle()->reach()].push_back(left); |
2059 | } |
2060 | |
2061 | vector<vector<left_id>> chunks; |
2062 | for (auto &raw_group : by_reach | map_values) { |
2063 | chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX); |
2064 | } |
2065 | by_reach.clear(); |
2066 | |
2067 | DEBUG_PRINTF("chunked castles into %zu groups\n" , chunks.size()); |
2068 | |
2069 | for (auto &chunk : chunks) { |
2070 | mergeCastleChunk(build, chunk, eng_verts); |
2071 | } |
2072 | } |
2073 | |
2074 | static |
2075 | void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, |
2076 | const bool acyclic) { |
2077 | RoseGraph &g = tbi.g; |
2078 | |
2079 | DEBUG_PRINTF("group has %zu suffixes\n" , suffixes.size()); |
2080 | |
2081 | // If this isn't an acyclic case, we track the number of accelerable states |
2082 | // for each graph in a map and only recompute them when the graph is |
2083 | // modified. |
2084 | unordered_map<suffix_id, u32> accel_count; |
2085 | if (!acyclic) { |
2086 | for (const auto &suffix : suffixes) { |
2087 | assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX); |
2088 | accel_count[suffix] = estimatedAccelStates(tbi, *suffix.graph()); |
2089 | } |
2090 | } |
2091 | |
2092 | for (auto it = suffixes.begin(); it != suffixes.end(); ++it) { |
2093 | suffix_id s1 = *it; |
2094 | const deque<RoseVertex> &verts1 = suffixes.vertices(s1); |
2095 | assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX); |
2096 | |
2097 | // Caller should ensure that we don't propose merges of graphs that are |
2098 | // already too big. |
2099 | assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc)); |
2100 | |
2101 | deque<suffix_id> merged; |
2102 | for (auto jt = next(it); jt != suffixes.end(); ++jt) { |
2103 | suffix_id s2 = *jt; |
2104 | const deque<RoseVertex> &verts2 = suffixes.vertices(s2); |
2105 | assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX); |
2106 | |
2107 | if (!acyclic) { |
2108 | u32 accel1 = accel_count[s1]; |
2109 | if (accel1 >= NFA_MAX_ACCEL_STATES) { |
2110 | DEBUG_PRINTF("h1 has hit max accel\n" ); |
2111 | break; // next h1 |
2112 | } |
2113 | |
2114 | u32 accel2 = accel_count[s2]; |
2115 | if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) { |
2116 | DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, " |
2117 | "accel2=%u)\n" , |
2118 | accel1, accel2); |
2119 | continue; // next h2 |
2120 | } |
2121 | } |
2122 | |
2123 | // Attempt to merge h2 into h1. |
2124 | |
2125 | NGHolder victim; |
2126 | cloneHolder(victim, *s2.graph()); |
2127 | |
2128 | // Store a copy of the suffix tops in case we have to roll back. |
2129 | map<RoseVertex, u32> old_tops; |
2130 | for (auto v : verts2) { |
2131 | old_tops[v] = g[v].suffix.top; |
2132 | } |
2133 | |
2134 | if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) { |
2135 | DEBUG_PRINTF("can't set distinct tops\n" ); |
2136 | continue; // next h2 |
2137 | } |
2138 | |
2139 | if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) { |
2140 | DEBUG_PRINTF("merge failed\n" ); |
2141 | // Roll back in-edge properties. |
2142 | for (const auto &m : old_tops) { |
2143 | g[m.first].suffix.top = m.second; |
2144 | } |
2145 | continue; // next h2 |
2146 | } |
2147 | |
2148 | // Update h2's roses to point to h1 now |
2149 | shared_ptr<NGHolder> winner = g[verts1.front()].suffix.graph; |
2150 | for (auto v : verts2) { |
2151 | g[v].suffix.graph = winner; |
2152 | } |
2153 | suffixes.insert(s1, verts2); |
2154 | merged.push_back(s2); |
2155 | |
2156 | if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) { |
2157 | DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n" , |
2158 | num_vertices(*s1.graph())); |
2159 | break; // next h1 |
2160 | } |
2161 | |
2162 | if (!acyclic) { |
2163 | // Update h1's accel count estimate. |
2164 | accel_count[s1] = estimatedAccelStates(tbi, *s1.graph()); |
2165 | } |
2166 | } |
2167 | |
2168 | DEBUG_PRINTF("%zu suffixes merged\n" , merged.size()); |
2169 | suffixes.erase_all(merged.begin(), merged.end()); |
2170 | } |
2171 | } |
2172 | |
2173 | /** |
2174 | * This merge pass combines suffixes from unrelated roles into a single |
2175 | * suffix with multiple top events in order to distinguish the triggers |
2176 | * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes |
2177 | * while mergeSmallSuffixes only considers small suffixes. The merges will |
2178 | * group roles with suffixes in the graph into clusters of at most |
2179 | * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the |
2180 | * suffixes and attempting to pairwise merge it with another member. Merges |
2181 | * will fail if the result is not implementable, requires too many distinct top |
2182 | * events, or if it losses the ability to be accelerated. The merge will modify |
2183 | * the existing suffix graph of the one member (g1), the other member updates |
2184 | * it graph to refer to g1 instead of its previous graph (g2) and use the new |
2185 | * tops created. Other roles may have been sharing g1 - these are unaffected by |
2186 | * the change as the existing top events are left untouched. Other roles using |
2187 | * g2 are also unaffected as g2 will continue to exist until while it has any |
2188 | * roles triggering it. |
2189 | * |
2190 | * Note: suffixes destined for the LBR are not considered for these merges as |
2191 | * the LBR can only handle a single repeat and this type of repeat is ideally |
2192 | * handled outside of an NFA or DFA. |
2193 | */ |
2194 | void mergeAcyclicSuffixes(RoseBuildImpl &tbi) { |
2195 | DEBUG_PRINTF("entry\n" ); |
2196 | |
2197 | if (!tbi.cc.grey.mergeSuffixes) { |
2198 | return; |
2199 | } |
2200 | |
2201 | SuffixBouquet suffixes; |
2202 | |
2203 | RoseGraph &g = tbi.g; |
2204 | |
2205 | for (auto v : vertices_range(g)) { |
2206 | shared_ptr<NGHolder> h = g[v].suffix.graph; |
2207 | if (!h || tbi.isInETable(v)) { |
2208 | continue; |
2209 | } |
2210 | |
2211 | assert(!g[v].suffix.haig); |
2212 | |
2213 | if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) { |
2214 | continue; |
2215 | } |
2216 | |
2217 | if (!isAcyclic(*h)) { |
2218 | continue; |
2219 | } |
2220 | |
2221 | suffixes.insert(g[v].suffix, v); |
2222 | } |
2223 | |
2224 | deque<SuffixBouquet> suff_groups; |
2225 | chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX); |
2226 | DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n" , suffixes.size(), |
2227 | suff_groups.size()); |
2228 | suffixes.clear(); |
2229 | |
2230 | for (auto &group : suff_groups) { |
2231 | mergeSuffixes(tbi, group, true); |
2232 | } |
2233 | } |
2234 | |
2235 | /** |
2236 | * This merge pass combines suffixes from unrelated roles into a single |
2237 | * suffix with multiple top events in order to distinguish the triggers |
2238 | * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes |
2239 | * while mergeSmallSuffixes only considers small suffixes. The merges will |
2240 | * group roles with suffixes in the graph into clusters of at most |
2241 | * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the |
2242 | * suffixes and attempting to pairwise merge it with another member. Merges |
2243 | * will fail if the result is not implementable, requires too many distinct top |
2244 | * events, or if it losses the ability to be accelerated. The merge will modify |
2245 | * the existing suffix graph of the one member (g1), the other member updates |
2246 | * it graph to refer to g1 instead of its previous graph (g2) and use the new |
2247 | * tops created. Other roles may have been sharing g1 - these are unaffected by |
2248 | * the change as the existing top events are left untouched. Other roles using |
2249 | * g2 are also unaffected as g2 will continue to exist until while it has any |
2250 | * roles triggering it. |
2251 | * |
2252 | * Note: suffixes destined for the LBR are not considered for these merges as |
2253 | * the LBR can only handle a single repeat and this type of repeat is ideally |
2254 | * handled outside of an NFA or DFA. |
2255 | */ |
2256 | void mergeSmallSuffixes(RoseBuildImpl &tbi) { |
2257 | DEBUG_PRINTF("entry\n" ); |
2258 | |
2259 | if (!tbi.cc.grey.mergeSuffixes) { |
2260 | return; |
2261 | } |
2262 | |
2263 | RoseGraph &g = tbi.g; |
2264 | SuffixBouquet suffixes; |
2265 | |
2266 | for (auto v : vertices_range(g)) { |
2267 | shared_ptr<NGHolder> h = g[v].suffix.graph; |
2268 | if (!h || tbi.isInETable(v)) { |
2269 | continue; |
2270 | } |
2271 | assert(!g[v].suffix.haig); |
2272 | |
2273 | // Leave acyclics out for the moment. |
2274 | if (isAcyclic(*h)) { |
2275 | continue; |
2276 | } |
2277 | |
2278 | // Small-ish suffixes only. |
2279 | if (num_vertices(*h) > 32) { |
2280 | continue; |
2281 | } |
2282 | |
2283 | suffixes.insert(g[v].suffix, v); |
2284 | } |
2285 | |
2286 | deque<SuffixBouquet> suff_groups; |
2287 | chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX); |
2288 | DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n" , suffixes.size(), |
2289 | suff_groups.size()); |
2290 | suffixes.clear(); |
2291 | |
2292 | for (auto &group : suff_groups) { |
2293 | mergeSuffixes(tbi, group, false); |
2294 | } |
2295 | } |
2296 | |
2297 | static |
2298 | void removeDeadOutfixes(vector<OutfixInfo> &outfixes) { |
2299 | auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); }; |
2300 | outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead), |
2301 | end(outfixes)); |
2302 | } |
2303 | |
2304 | static |
2305 | void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) { |
2306 | assert(!winner.is_dead()); |
2307 | |
2308 | winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth); |
2309 | winner.minWidth = min(winner.minWidth, victim.minWidth); |
2310 | winner.maxWidth = max(winner.maxWidth, victim.maxWidth); |
2311 | winner.maxOffset = max(winner.maxOffset, victim.maxOffset); |
2312 | mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info); |
2313 | |
2314 | // This outfix can be ignored in small block mode if both were. The dedupe |
2315 | // layer at runtime will protect us from extra matches if only one was in |
2316 | // the small block matcher. |
2317 | winner.in_sbmatcher &= victim.in_sbmatcher; |
2318 | } |
2319 | |
2320 | static |
2321 | map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build, |
2322 | const vector<NGHolder *> &nfas) { |
2323 | map<NGHolder *, NGHolder *> merged; |
2324 | |
2325 | vector<NGHolder *> batch; |
2326 | for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) { |
2327 | batch.push_back(*it); |
2328 | assert((*it)->kind == NFA_OUTFIX); |
2329 | if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) { |
2330 | auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc); |
2331 | insert(&merged, batch_merged); |
2332 | batch.clear(); |
2333 | } |
2334 | } |
2335 | |
2336 | return merged; |
2337 | } |
2338 | |
2339 | static |
2340 | void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) { |
2341 | DEBUG_PRINTF("merging %zu nfas\n" , nfas.size()); |
2342 | if (nfas.size() < 2) { |
2343 | return; |
2344 | } |
2345 | |
2346 | vector<OutfixInfo> &outfixes = tbi.outfixes; |
2347 | |
2348 | map<NGHolder *, size_t> nfa_mapping; |
2349 | for (size_t i = 0; i < outfixes.size(); i++) { |
2350 | auto *holder = outfixes[i].holder(); |
2351 | if (holder) { |
2352 | nfa_mapping[holder] = i; |
2353 | } |
2354 | } |
2355 | |
2356 | map<NGHolder *, NGHolder *> merged = chunkedNfaMerge(tbi, nfas); |
2357 | if (merged.empty()) { |
2358 | return; |
2359 | } |
2360 | |
2361 | DEBUG_PRINTF("%zu nfas merged\n" , merged.size()); |
2362 | |
2363 | // Update the outfix info for merged holders. |
2364 | for (const auto &m : merged) { |
2365 | OutfixInfo &victim = outfixes.at(nfa_mapping[m.first]); |
2366 | OutfixInfo &winner = outfixes.at(nfa_mapping[m.second]); |
2367 | mergeOutfixInfo(winner, victim); |
2368 | victim.clear(); |
2369 | } |
2370 | |
2371 | removeDeadOutfixes(outfixes); |
2372 | } |
2373 | |
2374 | namespace { |
2375 | struct MergeMcClellan { |
2376 | MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in) |
2377 | : rm(rm_in), grey(grey_in) {} |
2378 | |
2379 | unique_ptr<raw_dfa> operator()(const raw_dfa *d1, const raw_dfa *d2) const { |
2380 | assert(d1 && d2); |
2381 | return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey); |
2382 | } |
2383 | |
2384 | private: |
2385 | const ReportManager &rm; |
2386 | const Grey &grey; |
2387 | }; |
2388 | |
2389 | struct MergeHaig { |
2390 | explicit MergeHaig(u32 limit_in) : limit(limit_in) {} |
2391 | |
2392 | unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1, |
2393 | const raw_som_dfa *d2) const { |
2394 | assert(d1 && d2); |
2395 | return attemptToMergeHaig({d1, d2}, limit); |
2396 | } |
2397 | |
2398 | private: |
2399 | const u32 limit; //!< state limit for merged result. |
2400 | }; |
2401 | } |
2402 | |
2403 | /** |
2404 | * Generic pairwise merge algorithm that can be used for either McClellan |
2405 | * (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge |
2406 | * operation to a merge functor, which allows the caller to set some policy |
2407 | * (state limits, etc). |
2408 | * |
2409 | * This is currently astonishingly simple and just considers every pair of |
2410 | * DFAs, slow and steady. We may wish to actually apply a merge ordering |
2411 | * strategy in the future. |
2412 | */ |
2413 | template<class RawDfa, class MergeFunctor> |
2414 | static |
2415 | void pairwiseDfaMerge(vector<RawDfa *> &dfas, |
2416 | unordered_map<RawDfa *, size_t> &dfa_mapping, |
2417 | vector<OutfixInfo> &outfixes, |
2418 | MergeFunctor merge_func) { |
2419 | DEBUG_PRINTF("merging group of size %zu\n" , dfas.size()); |
2420 | |
2421 | for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) { |
2422 | if (!*it) { |
2423 | continue; |
2424 | } |
2425 | for (auto jt = next(it); jt != ite; ++jt) { |
2426 | if (!*jt) { |
2427 | continue; |
2428 | } |
2429 | |
2430 | DEBUG_PRINTF("try merge %p and %p\n" , *it, *jt); |
2431 | unique_ptr<RawDfa> rdfa = merge_func(*it, *jt); |
2432 | if (!rdfa) { |
2433 | continue; // Merge failed. |
2434 | } |
2435 | |
2436 | DEBUG_PRINTF("merge succeeded, built %p\n" , rdfa.get()); |
2437 | OutfixInfo &winner = outfixes.at(dfa_mapping[*it]); |
2438 | OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]); |
2439 | assert(!winner.is_dead() && !victim.is_dead()); |
2440 | |
2441 | RawDfa *dfa_ptr = rdfa.get(); |
2442 | dfa_mapping[dfa_ptr] = dfa_mapping[*it]; |
2443 | dfa_mapping.erase(*it); |
2444 | winner.proto = move(rdfa); |
2445 | |
2446 | mergeOutfixInfo(winner, victim); |
2447 | |
2448 | victim.clear(); |
2449 | *jt = nullptr; // to be deleted. |
2450 | *it = dfa_ptr; |
2451 | } |
2452 | } |
2453 | } |
2454 | |
2455 | template<class RawDfa, class MergeFunctor> |
2456 | static |
2457 | void chunkedDfaMerge(vector<RawDfa *> &dfas, |
2458 | unordered_map<RawDfa *, size_t> &dfa_mapping, |
2459 | vector<OutfixInfo> &outfixes, |
2460 | MergeFunctor merge_func) { |
2461 | DEBUG_PRINTF("begin merge of %zu dfas\n" , dfas.size()); |
2462 | |
2463 | vector<RawDfa *> out_dfas; |
2464 | vector<RawDfa *> chunk; |
2465 | for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) { |
2466 | chunk.push_back(*it); |
2467 | if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) { |
2468 | pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func); |
2469 | out_dfas.insert(end(out_dfas), begin(chunk), end(chunk)); |
2470 | chunk.clear(); |
2471 | } |
2472 | } |
2473 | |
2474 | // Remove null (merged) DFAs and update vector for subsequent use. |
2475 | out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr), |
2476 | out_dfas.end()); |
2477 | dfas.swap(out_dfas); |
2478 | DEBUG_PRINTF("after merge there are %zu dfas\n" , dfas.size()); |
2479 | } |
2480 | |
2481 | static |
2482 | void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) { |
2483 | DEBUG_PRINTF("merging %zu nfas\n" , dfas.size()); |
2484 | if (dfas.size() < 2) { |
2485 | return; |
2486 | } |
2487 | |
2488 | vector<OutfixInfo> &outfixes = tbi.outfixes; |
2489 | |
2490 | /* key is index into outfix array as iterators, etc may be invalidated by |
2491 | * element addition. */ |
2492 | unordered_map<raw_dfa *, size_t> dfa_mapping; |
2493 | for (size_t i = 0; i < outfixes.size(); i++) { |
2494 | auto *rdfa = outfixes[i].rdfa(); |
2495 | if (rdfa) { |
2496 | dfa_mapping[rdfa] = i; |
2497 | } |
2498 | } |
2499 | |
2500 | chunkedDfaMerge(dfas, dfa_mapping, outfixes, |
2501 | MergeMcClellan(tbi.rm, tbi.cc.grey)); |
2502 | removeDeadOutfixes(outfixes); |
2503 | } |
2504 | |
2505 | static |
2506 | void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, |
2507 | const Grey &grey) { |
2508 | if (!grey.roseMcClellanOutfix) { |
2509 | return; |
2510 | } |
2511 | |
2512 | DEBUG_PRINTF("merge combo\n" ); |
2513 | |
2514 | bool seen_dfa = false; |
2515 | u32 nfa_count = 0; |
2516 | for (const auto &outfix : tbi.outfixes) { |
2517 | if (outfix.holder()) { |
2518 | DEBUG_PRINTF("nfa\n" ); |
2519 | nfa_count++; |
2520 | } else if (outfix.rdfa()) { |
2521 | DEBUG_PRINTF("dfa\n" ); |
2522 | seen_dfa = true; |
2523 | } |
2524 | } |
2525 | |
2526 | DEBUG_PRINTF("nfa %u dfas present %d\n" , nfa_count, |
2527 | (int)seen_dfa); |
2528 | if (!nfa_count || (nfa_count == 1 && !seen_dfa)) { |
2529 | DEBUG_PRINTF("no combo merges possible\n" ); |
2530 | return; |
2531 | } |
2532 | |
2533 | /* key is index into outfix array as iterators, etc may be invalidated by |
2534 | * element addition. */ |
2535 | size_t new_dfas = 0; |
2536 | unordered_map<raw_dfa *, size_t> dfa_mapping; |
2537 | vector<raw_dfa *> dfas; |
2538 | |
2539 | for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) { |
2540 | auto &outfix = *it; |
2541 | assert(!outfix.is_dead()); |
2542 | |
2543 | if (outfix.rdfa()) { |
2544 | auto *rdfa = outfix.rdfa(); |
2545 | dfas.push_back(rdfa); |
2546 | dfa_mapping[rdfa] = it - tbi.outfixes.begin(); |
2547 | continue; |
2548 | } |
2549 | |
2550 | if (!outfix.holder()) { |
2551 | continue; |
2552 | } |
2553 | |
2554 | NGHolder *h = outfix.holder(); |
2555 | assert(h->kind == NFA_OUTFIX); |
2556 | auto rdfa = buildMcClellan(*h, &rm, grey); |
2557 | if (rdfa) { |
2558 | // Transform this outfix into a DFA and add it to the merge set. |
2559 | dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin(); |
2560 | dfas.push_back(rdfa.get()); |
2561 | outfix.proto = move(rdfa); |
2562 | new_dfas++; |
2563 | } |
2564 | } |
2565 | |
2566 | DEBUG_PRINTF("constructed %zu new dfas\n" , new_dfas); |
2567 | |
2568 | if (!new_dfas) { |
2569 | /* assumes normal dfas have already been fully merged */ |
2570 | return; |
2571 | } |
2572 | |
2573 | chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes, |
2574 | MergeMcClellan(tbi.rm, tbi.cc.grey)); |
2575 | removeDeadOutfixes(tbi.outfixes); |
2576 | } |
2577 | |
2578 | static |
2579 | void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas, |
2580 | u32 limit) { |
2581 | if (dfas.size() < 2) { |
2582 | return; |
2583 | } |
2584 | |
2585 | vector<OutfixInfo> &outfixes = tbi.outfixes; |
2586 | |
2587 | unordered_map<raw_som_dfa *, size_t> dfa_mapping; |
2588 | for (size_t i = 0; i < outfixes.size(); i++) { |
2589 | auto *haig = outfixes[i].haig(); |
2590 | if (haig) { |
2591 | dfa_mapping[haig] = i; |
2592 | } |
2593 | } |
2594 | |
2595 | chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig(limit)); |
2596 | removeDeadOutfixes(outfixes); |
2597 | } |
2598 | |
2599 | /** |
2600 | * This pass attempts to merge outfix engines together. At this point in time, |
2601 | * the engine type (NFA, DFA, Haig) has already been decided for each outfix |
2602 | * and outfixes can only merged with others of their same type. NFAs are merged |
2603 | * in a priority order based on common prefix length. The other types are |
2604 | * merged blindly. Engines are merged to the extent that they can still be |
2605 | * implemented efficiently. |
2606 | */ |
2607 | void mergeOutfixes(RoseBuildImpl &tbi) { |
2608 | if (!tbi.cc.grey.mergeOutfixes) { |
2609 | return; |
2610 | } |
2611 | |
2612 | vector<NGHolder *> nfas; |
2613 | vector<raw_dfa *> dfas; |
2614 | vector<raw_som_dfa *> som_dfas; |
2615 | |
2616 | for (auto &outfix : tbi.outfixes) { |
2617 | if (outfix.rdfa()) { |
2618 | dfas.push_back(outfix.rdfa()); |
2619 | } else if (outfix.holder()) { |
2620 | nfas.push_back(outfix.holder()); |
2621 | } else if (outfix.haig()) { |
2622 | som_dfas.push_back(outfix.haig()); |
2623 | } |
2624 | } |
2625 | |
2626 | DEBUG_PRINTF("merging %zu dfas, %zu nfas\n" , |
2627 | dfas.size(), nfas.size()); |
2628 | |
2629 | mergeOutfixNfas(tbi, nfas); |
2630 | mergeOutfixDfas(tbi, dfas); |
2631 | mergeOutfixHaigs(tbi, som_dfas, 255); |
2632 | mergeOutfixHaigs(tbi, som_dfas, 8192); |
2633 | mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey); |
2634 | } |
2635 | |
2636 | static |
2637 | u32 allowedSquashDistance(const CharReach &cr, u32 min_width, |
2638 | const RoseBuildImpl &tbi, |
2639 | RoseVertex tv) { |
2640 | CharReach accept_cr; |
2641 | DEBUG_PRINTF("hello |cr|=%zu\n" , cr.count()); |
2642 | |
2643 | const RoseGraph &g = tbi.g; |
2644 | |
2645 | /* TODO: inspect further back in the pattern */ |
2646 | for (u32 lit_id : g[tv].literals) { |
2647 | const rose_literal_id &lit = tbi.literals.at(lit_id); |
2648 | if (lit.delay) { |
2649 | return 0; /* TODO: better */ |
2650 | } |
2651 | if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) { |
2652 | return 0; |
2653 | } |
2654 | assert(!lit.s.empty()); |
2655 | accept_cr |= *lit.s.rbegin(); |
2656 | } |
2657 | |
2658 | DEBUG_PRINTF("|accept_cr|=%zu\n" , accept_cr.count()); |
2659 | |
2660 | if ((accept_cr & cr).any()) { |
2661 | DEBUG_PRINTF("no squash\n" ); |
2662 | return 0; /* the accept byte doesn't always kill the puffette. TODO: |
2663 | * maybe if we look further back we could find something that |
2664 | * would kill the puffette... */ |
2665 | } |
2666 | |
2667 | DEBUG_PRINTF("allowed to squash %u\n" , min_width); |
2668 | return min_width; |
2669 | } |
2670 | |
2671 | void mergePuffixes(RoseBuildImpl &tbi) { |
2672 | DEBUG_PRINTF("entry\n" ); |
2673 | |
2674 | if (!tbi.cc.grey.mergeSuffixes) { |
2675 | return; |
2676 | } |
2677 | |
2678 | RoseGraph &g = tbi.g; |
2679 | |
2680 | for (auto v : vertices_range(g)) { |
2681 | shared_ptr<NGHolder> h = g[v].suffix.graph; |
2682 | if (!h) { |
2683 | continue; |
2684 | } |
2685 | assert(!g[v].suffix.haig); |
2686 | assert(!g[v].eod_accept); |
2687 | |
2688 | assert(onlyOneTop(*h)); /* we should not have merged yet */ |
2689 | bool fixed_depth = g[v].min_offset == g[v].max_offset; |
2690 | |
2691 | if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) { |
2692 | continue; |
2693 | } |
2694 | |
2695 | PureRepeat repeat; |
2696 | if (!isPureRepeat(*h, repeat)) { |
2697 | assert(0); |
2698 | continue; |
2699 | } |
2700 | |
2701 | if (repeat.bounds.min == depth(0)) { |
2702 | assert(0); // No vacuous puffs allowed. |
2703 | continue; |
2704 | } |
2705 | |
2706 | assert(repeat.bounds.min.is_finite() && |
2707 | repeat.bounds.max.is_reachable()); |
2708 | assert(repeat.bounds.max == repeat.bounds.min || |
2709 | repeat.bounds.max.is_infinite()); |
2710 | |
2711 | const bool unbounded = repeat.bounds.max.is_infinite(); |
2712 | const set<ReportID> reports = all_reports(*h); |
2713 | assert(reports.size() == 1); |
2714 | ReportID report = *reports.begin(); |
2715 | |
2716 | DEBUG_PRINTF("got puffette candidate %u:%s\n" , report, |
2717 | repeat.bounds.str().c_str()); |
2718 | |
2719 | raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach); |
2720 | |
2721 | u32 queue; |
2722 | u32 event; |
2723 | tbi.addChainTail(rp, &queue, &event); |
2724 | u32 squashDistance = |
2725 | allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v); |
2726 | |
2727 | Report ir = makeMpvTrigger(event, squashDistance); |
2728 | ReportID id = tbi.rm.getInternalId(ir); |
2729 | |
2730 | DEBUG_PRINTF("puffette event q%u t%u\n" , queue, event); |
2731 | g[v].suffix.reset(); |
2732 | g[v].reports.insert(id); |
2733 | } |
2734 | } |
2735 | |
2736 | static |
2737 | void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m, |
2738 | u32 top, const vector<RoseVertex> &verts) { |
2739 | DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n" , top, |
2740 | m.get(), verts.size()); |
2741 | |
2742 | for (auto v : verts) { |
2743 | assert(g[v].suffix.castle); |
2744 | g[v].suffix.castle = m; |
2745 | g[v].suffix.top = top; |
2746 | } |
2747 | } |
2748 | |
2749 | static |
2750 | void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles, |
2751 | const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) { |
2752 | if (castles.size() <= 1) { |
2753 | return; |
2754 | } |
2755 | |
2756 | DEBUG_PRINTF("merging reach %s, %zu elements\n" , |
2757 | describeClass(castles[0]->reach()).c_str(), castles.size()); |
2758 | |
2759 | CastleProto *m = nullptr; |
2760 | |
2761 | for (CastleProto *c : castles) { |
2762 | assert(c->repeats.size() == 1); // Not yet merged. |
2763 | assert(g[eng_verts.at(c).front()].suffix.castle.get() == c); |
2764 | if (!m) { |
2765 | m = c; |
2766 | continue; |
2767 | } |
2768 | |
2769 | u32 top = m->merge(c->repeats[0]); |
2770 | if (top == CastleProto::max_occupancy) { |
2771 | // No room left to merge into 'm'. This one becomes the new 'm'. |
2772 | DEBUG_PRINTF("next mergee\n" ); |
2773 | m = c; |
2774 | continue; |
2775 | } |
2776 | updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top, |
2777 | eng_verts.at(c)); |
2778 | DEBUG_PRINTF("added to %p, top %u\n" , m, top); |
2779 | } |
2780 | } |
2781 | |
2782 | void mergeCastleSuffixes(RoseBuildImpl &build) { |
2783 | DEBUG_PRINTF("entry\n" ); |
2784 | |
2785 | if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) { |
2786 | return; |
2787 | } |
2788 | |
2789 | unordered_map<CastleProto *, vector<RoseVertex>> eng_verts; |
2790 | map<CharReach, vector<CastleProto *>> by_reach; |
2791 | |
2792 | RoseGraph &g = build.g; |
2793 | |
2794 | for (auto v : vertices_range(g)) { |
2795 | if (!g[v].suffix.castle) { |
2796 | continue; |
2797 | } |
2798 | |
2799 | CastleProto *c = g[v].suffix.castle.get(); |
2800 | |
2801 | if (c->repeats.size() != 1) { |
2802 | // This code assumes it's the only place merging is being done. |
2803 | assert(0); |
2804 | continue; |
2805 | } |
2806 | |
2807 | if (!contains(eng_verts, c)) { |
2808 | by_reach[c->reach()].push_back(c); |
2809 | } |
2810 | eng_verts[c].push_back(v); |
2811 | } |
2812 | |
2813 | for (auto &chunk : by_reach | map_values) { |
2814 | mergeCastleSuffixChunk(g, chunk, eng_verts); |
2815 | } |
2816 | } |
2817 | |
2818 | } // namespace ue2 |
2819 | |