ng_redundancy.cpp source code [ClickHouse/contrib/hyperscan/src/nfagraph/ng_redundancy.cpp]

1	/*
2	* Copyright (c) 2015-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief NFA graph reductions.
31	*
32	* This code attempts to make the NFA graph smaller by performing a number of
33	* local transformations:
34	*
35	* ### (1) removal of redundant vertices:
36	*
37	* v is redundant wrt to u if succ(v) is a subset of succ(u)
38	* AND pred(v) is a subset of pred(u)
39	* AND cr(v) is a subset of cr(u)
40	*
41	* ### (2) 'diamond' transformation:
42	*
43	* given succ(v) == succ(u) and pred(v) == pred(u),
44	* v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v),
45	* and cr(w) = union(cr(v), cr(u))
46	*
47	* ### (3) locally identifiable left equivalence:
48	*
49	* given pred(v) == pred(u) (**) and cr(v) == cr(u),
50	* v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v),
51	* and succ(w) = union(succ(v), succ(u))
52	*
53	* ### (4) locally identifiable right equivalence:
54	*
55	* given succ(v) == succ(u) (**) and cr(v) == cr(u),
56	* v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v),
57	* and pred(w) = union(pred(v), pred(u))
58	*
59	* NOTE (**): for left and right equivalence, we can also do the transform if
60	* set(u) contains u, set(v) contains v and the sets are otherwise equal. This
61	* enables equivalent vertices with self-loops to be merged.
62	*
63	* If v and u raise accepts, they can only be merged if they raise the same
64	* report IDs.
65	*
66	* Transformations are applied repeatedly until the graph stops changing.
67	*
68	* Note that the final graph may depend on the order in which these
69	* transformations are applied. In order to reduce the non-determinism the
70	* following order is imposed: (1); (2); (3) + (4).
71	*/
72	#include "ng_redundancy.h"
73
74	#include "ng_holder.h"
75	#include "ng_calc_components.h"
76	#include "ng_dominators.h"
77	#include "ng_prune.h"
78	#include "ng_util.h"
79	#include "ue2common.h"
80	#include "util/container.h"
81	#include "util/flat_containers.h"
82	#include "util/graph_range.h"
83
84	#include <algorithm>
85	#include <cassert>
86	#include <map>
87	#include <set>
88	#include <vector>
89
90	#include <boost/graph/depth_first_search.hpp>
91	#include <boost/graph/reverse_graph.hpp>
92
93	using namespace std;
94
95	namespace ue2 {
96
97	namespace {
98
99	/* Precalculated (and maintained) information about a vertex. /
100	class VertexInfo {
101	public:
102	flat_set<NFAVertex> pred; //!< predecessors of this vertex
103	flat_set<NFAVertex> succ; //!< successors of this vertex
104	bool isAccept = false; //!< does this vertex lead to accept?
105	bool isRemoved = false; //!< have we already removed this vertex?
106
107	size_t inDegree() const { return pred.size(); }
108	size_t outDegree() const { return succ.size(); }
109	};
110
111	class VertexInfoMap {
112	public:
113	explicit VertexInfoMap(const NGHolder &gg)
114	: g(gg), infos (num_vertices(gg)) {}
115	VertexInfo &operator[](NFAVertex v) {
116	u32 i = g [v].index;
117	assert(i < infos.size());
118	return infos [i];
119	}
120
121	const VertexInfo &operator[](NFAVertex v) const {
122	u32 i = g [v].index;
123	assert(i < infos.size());
124	return infos [i];
125	}
126
127	private:
128	const NGHolder &g;
129	vector<VertexInfo> infos;
130	};
131
132	} // namespace
133
134	/* Populates the info map with their predecessor and successor states, and*
135	* whether they are accept states. */
136	static
137	void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) {
138	for (auto v : vertices_range(g)) {
139	VertexInfo &info = infoMap [v];
140	assert(info.pred.empty() && info.succ.empty());
141
142	// Build successor and predecessor sets
143	insert(&info.pred, inv_adjacent_vertices(v, g));
144	insert(&info.succ, adjacent_vertices(v, g));
145
146	// Note whether the vertex is an accept state
147	if (!is_special(v, g)) {
148	if (contains(info.succ, g.accept)
149	\|\| contains(info.succ, g.acceptEod)) {
150	info.isAccept = true;
151	}
152	}
153	}
154	}
155
156	/* Helper function to take the intersection of two sorted vertex sets*
157	* in-place. */
158	static
159	void inplaceIntersection(vector<NFAVertex> &vset1,
160	const flat_set<NFAVertex> &vset2) {
161	const NFAVertex GONE = NGHolder::null_vertex();
162
163	vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end();
164	flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end();
165
166	while ((it != ite) && (jt != jte)) {
167	assert(*it != GONE);
168
169	if (it < jt) {
170	// present in vset1 but not in vset2. Set to null, remove in a
171	// second pass.
172	*it = GONE;
173	++it;
174	} else if (jt < it) {
175	// present in vset2 but not in vset1, skip.
176	++jt;
177	} else {
178	// present in both sets.
179	++it; ++jt;
180	}
181	}
182
183	// Left overs are only in that set.
184	vset1.erase(it, ite);
185
186	// Remove nulls created above.
187	vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end());
188	}
189
190	/* Find the intersection of the successors of our predecessors. /
191	static
192	void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet,
193	const VertexInfoMap &infoMap,
194	vector<NFAVertex> &intersection,
195	bool considerSelf = true / follow self loops /) {
196	/ find a good seed for the intersection /
197	const flat_set<NFAVertex> best = nullptr*;
198	for (auto u : predSet) {
199	if (!considerSelf && u == v) {
200	continue;
201	}
202
203	const flat_set<NFAVertex> &succSet = infoMap [u].succ;
204	if (!best \|\| succSet.size() <= best->size()) {
205	best = &succSet;
206
207	// Break out if we've reduced our intersection to [v]
208	if (best->size() == `1`) {
209	assert(*(best->begin()) == v);
210	intersection.push_back(v);
211	return;
212	}
213	}
214	}
215
216	if (best) {
217	insert(&intersection, intersection.end(), *best);
218	}
219
220	for (auto u : predSet) {
221	if (!considerSelf && u == v) {
222	continue;
223	}
224
225	inplaceIntersection(intersection, infoMap [u].succ);
226
227	// Check: intersection should always be at least size 1
228	assert(!intersection.empty());
229
230	// Break out if we've reduced our intersection to [v]
231	if (intersection.size() == `1`) {
232	assert(*intersection.begin() == v);
233	return;
234	}
235	}
236	}
237
238	/* Find the intersection of the predecessors of our successors. /
239	static
240	void predSuccIntersection(const NFAVertex v,
241	const flat_set<NFAVertex> &succSet,
242	const VertexInfoMap &infoMap,
243	vector<NFAVertex> &intersection,
244	bool considerSelf = true / follow self loops /) {
245	/ find a good seed for the intersection /
246	const flat_set<NFAVertex> best = nullptr*;
247	for (auto w : succSet) {
248	if (!considerSelf && w == v) {
249	continue;
250	}
251
252	const flat_set<NFAVertex> &predSet = infoMap [w].pred;
253	if (!best \|\| predSet.size() <= best->size()) {
254	best = &predSet;
255
256	// Break out if we've reduced our intersection to [v]
257	if (best->size() == `1`) {
258	assert(*(best->begin()) == v);
259	intersection.push_back(v);
260	return;
261	}
262	}
263	}
264
265	if (best) {
266	insert(&intersection, intersection.end(), *best);
267	}
268
269	for (auto w : succSet) {
270	if (!considerSelf && w == v) {
271	continue;
272	}
273
274	inplaceIntersection(intersection, infoMap [w].pred);
275
276	// Check: intersection should always be at least size 1
277	assert(!intersection.empty());
278
279	// Break out if we've reduced our intersection to [v]
280	if (intersection.size() == `1`) {
281	assert(*intersection.begin() == v);
282	return;
283	}
284	}
285	}
286
287	/* Update containers to take into account the removal of vertex v. /
288	static
289	void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap,
290	set<NFAVertex> &removable) {
291	VertexInfo &info = infoMap [v];
292	assert(!info.isRemoved);
293	assert(!contains(removable, v));
294	info.isRemoved = true;
295	removable.insert(v);
296
297	// remove v from its predecessors' successors
298	for (auto u : info.pred) {
299	infoMap [u].succ.erase(v);
300	}
301
302	// remove v from its successors' predecessors
303	for (auto w : info.succ) {
304	infoMap [w].pred.erase(v);
305	}
306	}
307
308	static
309	bool hasInEdgeTops(const NGHolder &g, NFAVertex v) {
310	NFAEdge e = edge(g.start, v, g);
311	return e && !g [e].tops.empty();
312	}
313
314	/* Transform (1), removal of redundant vertices. /
315	static
316	bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
317	set<NFAVertex> &removable) {
318	/ useless merges can be done in any order, no need to take any care with*
319	* ordering */
320
321	// Temporary vectors used for intersections below
322	vector<NFAVertex> succPredSet, predSuccSet, intersection;
323
324	bool changed = false;
325	for (auto v : vertices_range(g)) {
326	VertexInfo &info = infoMap [v];
327
328	if (info.isRemoved) {
329	continue;
330	}
331
332	assert(!contains(removable, v));
333
334	if (is_special(v, g)) {
335	continue;
336	}
337
338	/ we do not need to check for out edge tops - as only specials (start)*
339	* can have tops and they are already disqualified. */
340	if (hasInEdgeTops(g, v)) {
341	continue; // Conservatively skip anything with nonzero tops.
342	}
343
344	if (info.pred.empty() \|\| info.succ.empty()) {
345	DEBUG_PRINTF("vertex %zu has empty pred/succ list\n", g[v].index);
346	assert(`0`); // non-special states should always have succ/pred lists
347	continue;
348	}
349
350	// The following cases are more complex and rely on the intersection of
351	// Succ(Pred(v)) and Pred(Succ(v))
352
353	// Compute intersections, operating on the smaller set first
354	// Note that we use vectors here, as set_intersection underneath
355	// guarantees sorted output, and vectors were quite a bit
356	// faster than sets or lists.
357
358	succPredSet.clear();
359	predSuccSet.clear();
360
361	if (info.pred.size() <= info.succ.size()) {
362	succPredIntersection(v, info.pred, infoMap, succPredSet);
363	if (succPredSet.size() == `1`) {
364	// nobody in here but us chickens
365	assert(*succPredSet.begin() == v);
366	continue;
367	}
368	predSuccIntersection(v, info.succ, infoMap, predSuccSet);
369	if (predSuccSet.size() == `1`) {
370	assert(*predSuccSet.begin() == v);
371	continue;
372	}
373	} else {
374	predSuccIntersection(v, info.succ, infoMap, predSuccSet);
375	if (predSuccSet.size() == `1`) {
376	assert(*predSuccSet.begin() == v);
377	continue;
378	}
379	succPredIntersection(v, info.pred, infoMap, succPredSet);
380	if (succPredSet.size() == `1`) {
381	assert(*succPredSet.begin() == v);
382	continue;
383	}
384	}
385
386	// Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
387	intersection.clear();
388	set_intersection(succPredSet.begin(), succPredSet.end(),
389	predSuccSet.begin(), predSuccSet.end(),
390	back_inserter(intersection));
391
392	/ Boring if it is just us in the intersection /
393	if (intersection.size() < `2`) {
394	continue;
395	}
396
397	// Compare char_reach, mark v for removal if any members of
398	// the intersection have an equal or greater reach
399	const CharReach &currReach = g [v].char_reach;
400	const auto &currReports = g [v].reports;
401	for (auto t : intersection) {
402	const VertexInfo &info2 = infoMap [t];
403
404	/ start is never a succ of a state, so will never be in the*
405	* predsucc/succpred intersection */
406	assert(t != g.start);
407
408	if (t == v \|\| info2.isRemoved) {
409	continue;
410	}
411
412	// For each candidate C to make V redundant, check:
413	// if V is an accept state, C must be an accept state for
414	// the same pattern
415	// pred(C) is a superset of pred(V)
416	// succ(C) is a superset of succ(V)
417	// reach(C) is a superset of reach(V)
418	//
419	// Note: pred/sec tests are covered by the intersections
420	// calculated above.
421
422	/ note: links to accepts are also tracked in succs /
423	if (info.isAccept && currReports != g [t].reports) {
424	continue;
425	}
426
427	if (som) {
428	if (t == g.startDs) {
429	continue;
430	}
431	if (is_virtual_start(t, g) != is_virtual_start(v, g)) {
432	continue;
433	}
434	}
435
436	/ we do not need to check for out edge tops - as only start*
437	* can have tops and it has already been ruled out. */
438	if (hasInEdgeTops(g, t)) {
439	continue; // Conservatively skip anything with nonzero tops.
440	}
441
442	CharReach &otherReach = g [t].char_reach;
443	if (currReach.isSubsetOf(otherReach)) {
444	DEBUG_PRINTF("removing redundant vertex %zu (keeping %zu)\n",
445	g[v].index, g[t].index);
446	markForRemoval(v, infoMap, removable);
447	changed = true;
448	break;
449	}
450	}
451	}
452
453	return changed;
454	}
455
456	/* Transform (2), diamond merge pass. /
457	static
458	bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
459	set<NFAVertex> &removable) {
460	// Temporary vectors used for intersections below
461	vector<NFAVertex> succPredSet, predSuccSet, intersection;
462
463	bool changed = false;
464	for (auto v : vertices_range(g)) {
465	VertexInfo &info = infoMap [v];
466
467	if (info.isRemoved) {
468	continue;
469	}
470
471	assert(!contains(removable, v));
472
473	if (is_special(v, g)) {
474	continue;
475	}
476
477	/ we do not need to check for out edge tops - as only specials (start)*
478	* can have tops and they are already disqualified. */
479	if (hasInEdgeTops(g, v)) {
480	continue; // Conservatively skip anything with nonzero tops.
481	}
482
483	if (info.pred.empty() \|\| info.succ.empty()) {
484	assert(`0`); // non-special states should always have succ/pred lists
485	continue;
486	}
487
488	// The following cases are more complex and rely on the intersection of
489	// Succ(Pred(v)) and Pred(Succ(v))
490
491	// Compute intersections, operating on the smaller set first
492	// Note that we use vectors here, as set_intersection underneath
493	// guarantees sorted output, and vectors were quite a bit faster than
494	// sets or lists.
495
496	succPredSet.clear();
497	predSuccSet.clear();
498
499	if (info.pred.size() <= info.succ.size()) {
500	succPredIntersection(v, info.pred, infoMap, succPredSet);
501	if (succPredSet.size() == `1`) {
502	// nobody in here but us chickens
503	assert(*succPredSet.begin() == v);
504	continue;
505	}
506	predSuccIntersection(v, info.succ, infoMap, predSuccSet);
507	if (predSuccSet.size() == `1`) {
508	assert(*predSuccSet.begin() == v);
509	continue;
510	}
511	} else {
512	predSuccIntersection(v, info.succ, infoMap, predSuccSet);
513	if (predSuccSet.size() == `1`) {
514	assert(*predSuccSet.begin() == v);
515	continue;
516	}
517	succPredIntersection(v, info.pred, infoMap, succPredSet);
518	if (succPredSet.size() == `1`) {
519	assert(*succPredSet.begin() == v);
520	continue;
521	}
522	}
523
524	// Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
525	intersection.clear();
526	set_intersection(succPredSet.begin(), succPredSet.end(),
527	predSuccSet.begin(), predSuccSet.end(),
528	back_inserter(intersection));
529
530	/ Boring if it is just us in the intersection /
531	if (intersection.size() < `2`) {
532	continue;
533	}
534
535	const CharReach &currReach = g [v].char_reach;
536	const auto &currReports = g [v].reports;
537	for (auto t : intersection) {
538	const VertexInfo &info2 = infoMap [t];
539
540	if (t == v \|\| info2.isRemoved \|\| is_special(t, g)) {
541	continue;
542	}
543
544	/ note: links to accepts are also tracked in succs /
545	if (info.isAccept && currReports != g [t].reports) {
546	continue;
547	}
548
549	/ we do not need to check for out edge tops - as only specials*
550	* (start) can have tops and they are already disqualified. */
551	if (hasInEdgeTops(g, t)) {
552	continue; // Conservatively skip anything with nonzero tops.
553	}
554
555	if (som) {
556	if (is_virtual_start(v, g) != is_virtual_start(t, g)) {
557	continue; // can only merge like with like.
558	}
559	}
560
561	// If in-degree of v == in-degree of target
562	// and out-degree of v == out-degree of target
563	// (because pred and succ are supersets)
564	// then combine charreach of v into target and remove v
565	if (info.inDegree() == info2.inDegree()
566	&& info.outDegree() == info2.outDegree()) {
567	// add character reachability of v into target
568	CharReach &otherReach = g [t].char_reach;
569	otherReach \|= currReach;
570	// v can be removed
571	DEBUG_PRINTF("removing redundant vertex %zu and merging "
572	"reachability with vertex %zu\n",
573	g[v].index, g[t].index);
574	markForRemoval(v, infoMap, removable);
575	changed = true;
576	break;
577	}
578	}
579	}
580
581	return changed;
582	}
583
584	namespace {
585
586	struct ReachMismatch {};
587
588	class ReachSubsetVisitor : public boost::default_dfs_visitor {
589	public:
590	explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {}
591
592	template <class Graph, class Vertex>
593	void discover_vertex(const Vertex &v, const Graph &g) const {
594	if (is_any_start(v, g)) {
595	return; // start vertices are OK
596	} else if (is_special(v, g)) {
597	assert(`0`);
598	throw ReachMismatch (); // other special nodes??
599	}
600
601	const CharReach &vcr = g[v].char_reach;
602	DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n", vcr.count(),
603	cr.count());
604	if (vcr != (vcr & cr)) {
605	throw ReachMismatch ();
606	}
607	}
608
609	private:
610	const CharReach &cr;
611	};
612
613	/* Terminator function for DFS used in pathReachSubset. /
614	template <class Graph, class Vertex> class VertexIs {
615	public:
616	explicit VertexIs(const Vertex &v) : vertex(v) {}
617	bool operator()(const Vertex &v, const Graph &) const {
618	return v == vertex;
619	}
620
621	private:
622	Vertex vertex;
623	};
624
625	} // namespace
626
627	/* Returns true if every vertex on paths leading to edge \p e has reachability*
628	* which is a subset of the reachability of \p dom */
629	static
630	bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom,
631	const NGHolder &g) {
632	const CharReach &domReach = g [dom].char_reach;
633	if (domReach.all()) {
634	return true;
635	}
636
637	NFAVertex start = source(e, g);
638	using RevGraph = boost::reverse_graph<NGHolder, const NGHolder &>;
639	map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
640
641	// Walk the graph backwards from v, examining each node. We fail (return
642	// false) if we encounter a node with reach NOT a subset of domReach, and
643	// we stop searching at dom.
644	try {
645	depth_first_visit(RevGraph (g), start,
646	ReachSubsetVisitor (domReach),
647	make_assoc_property_map(vertexColor),
648	VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom));
649	} catch(ReachMismatch&) {
650	return false;
651	}
652
653	return true;
654	}
655
656	/* Returns true if every vertex on paths leading from edge \p e has*
657	* reachability which is a subset of the reachability of \p dom */
658	static
659	bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom,
660	const NGHolder &g) {
661	const CharReach &domReach = g [dom].char_reach;
662	if (domReach.all()) {
663	return true;
664	}
665
666	NFAVertex start = target(e, g);
667	map<NFAVertex, boost::default_color_type> vertexColor;
668
669	// Walk the graph forward from v, examining each node. We fail (return
670	// false) if we encounter a node with reach NOT a subset of domReach, and
671	// we stop searching at dom.
672	try {
673	depth_first_visit(g, start, ReachSubsetVisitor (domReach),
674	make_assoc_property_map(vertexColor),
675	VertexIs<NGHolder, NFAVertex>(dom));
676	} catch(ReachMismatch&) {
677	return false;
678	}
679
680	return true;
681	}
682
683	static
684	bool allOutsSpecial(NFAVertex v, const NGHolder &g) {
685	for (auto w : adjacent_vertices_range(v, g)) {
686	if (!is_special(w, g)) {
687	return false;
688	}
689	}
690	return true;
691	}
692
693	static
694	bool allInsSpecial(NFAVertex v, const NGHolder &g) {
695	for (auto u : inv_adjacent_vertices_range(v, g)) {
696	if (!is_special(u, g)) {
697	return false;
698	}
699	}
700	return true;
701	}
702
703	/* Cheaply check whether this graph can't be reduced at all, because it is*
704	* just a chain of vertices with no other edges. */
705	static
706	bool isIrreducible(const NGHolder &g) {
707	for (auto v : vertices_range(g)) {
708	// skip specials
709	if (is_special(v, g)) {
710	continue;
711	}
712
713	if (in_degree(v, g) != `1` && !allInsSpecial(v, g)) {
714	return false;
715	}
716	if (out_degree(v, g) != `1` && !allOutsSpecial(v, g)) {
717	return false;
718	}
719	}
720
721	/ if calcComponents got sleepy and went home, the above checks don't hold*
722	* as it assumes there is only one connected component. */
723	if (isAlternationOfClasses(g)) {
724	return false;
725	}
726
727	return true;
728	}
729
730	static
731	u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) {
732	u32 count = `0`;
733
734	cyclic.resize(num_vertices(g));
735
736	for (auto v : vertices_range(g)) {
737	assert(g[v].index < cyclic.size());
738	if (hasSelfLoop(v, g)) {
739	count++;
740	cyclic [g [v].index] = true;
741	}
742	}
743
744	return count;
745	}
746
747	static
748	void findCyclicDom(NGHolder &g, vector<bool> &cyclic,
749	set<NFAEdge> &dead, som_type som) {
750	auto dominators = findDominators(g);
751
752	for (auto v : vertices_range(g)) {
753	if (is_special(v, g)) {
754	continue;
755	}
756
757	// Path in through a dominator (e.g. '.+a?foobar')
758	NFAVertex dom = dominators [v];
759	if (dom && cyclic [g [dom].index]
760	&& edge(dom, v, g).second) {
761
762	if (som && dom == g.startDs) {
763	continue;
764	}
765
766	DEBUG_PRINTF("vertex %zu is dominated by directly-connected cyclic "
767	"vertex %zu\n", g[v].index, g[dom].index);
768
769	// iff all paths through in-edge e of v involve vertices whose
770	// reachability is a subset of reach(dom), we can delete edge e.
771	for (const auto &e : in_edges_range(v, g)) {
772	if (source(e, g) == dom) {
773	continue;
774	}
775
776	if (reversePathReachSubset(e, dom, g)) {
777	DEBUG_PRINTF("edge (%zu, %zu) can be removed: leading "
778	"paths share dom reach\n",
779	g[source(e, g)].index, g[target(e, g)].index);
780	dead.insert(e);
781	if (source(e, g) == v) {
782	cyclic [g [v].index] = false;
783	}
784	continue;
785	}
786	}
787	}
788	}
789	}
790
791	static
792	void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic,
793	set<NFAEdge> &dead) {
794	auto postdominators = findPostDominators(g);
795
796	for (auto v : vertices_range(g)) {
797	if (is_special(v, g)) {
798	continue;
799	}
800
801	// Path out through a post-dominator (e.g. a?.+foobar')
802	NFAVertex postdom = postdominators [v];
803	if (postdom && cyclic [g [postdom].index] && edge(v, postdom, g).second) {
804	DEBUG_PRINTF("vertex %zu is postdominated by directly-connected "
805	"cyclic vertex %zu\n", g[v].index, g[postdom].index);
806
807	// iff all paths through in-edge e of v involve vertices whose
808	// reachability is a subset of reach(dom), we can delete edge e.
809	for (const auto &e : out_edges_range(v, g)) {
810	if (target(e, g) == postdom) {
811	continue;
812	}
813
814	if (forwardPathReachSubset(e, postdom, g)) {
815	DEBUG_PRINTF("edge (%zu, %zu) can be removed: trailing "
816	"paths share postdom reach\n",
817	g[source(e, g)].index, g[target(e, g)].index);
818	if (target(e, g) == v) {
819	cyclic [g [v].index] = false;
820	}
821	dead.insert(e);
822	continue;
823	}
824	}
825	}
826	}
827	}
828
829	bool removeRedundancy(NGHolder &g, som_type som) {
830	DEBUG_PRINTF("rr som = %d\n", (int)som);
831	renumber_vertices(g);
832
833	// Cheap check: if all the non-special vertices have in-degree one and
834	// out-degree one, there's no redundancy in this here graph and we can
835	// vamoose.
836	if (isIrreducible(g)) {
837	return false;
838	}
839
840	VertexInfoMap infoMap(g);
841
842	// Populate maps of successors and predecessors, and accept status
843	populateContainers(g, infoMap);
844
845	/ Run multiple passes: terminate when a full pass doesn't remove*
846	* any vertices */
847	bool doUseless = true;
848	bool doDiamond = true;
849	set<NFAVertex> removable;
850	while (doUseless \|\| doDiamond) {
851	if (doUseless
852	&& doUselessMergePass(g, som, infoMap, removable)) {
853	doDiamond = true;
854	}
855	doUseless = false;
856
857	if (doDiamond
858	&& doDiamondMergePass(g, som, infoMap, removable)) {
859	doUseless = true;
860	}
861	doDiamond = false;
862	}
863	DEBUG_PRINTF("found %zu removable vertices overall.\n", removable.size());
864	remove_vertices(removable, g);
865
866	return !removable.empty();
867	}
868
869	/* UE-524: remove edges into nodes that are dominated by cyclic nodes with*
870	* reachability that is a superset of all paths feeding into that edge. */
871	bool removeCyclicDominated(NGHolder &g, som_type som) {
872	set<NFAEdge> dead;
873	vector<bool> cyclic;
874	bool changed = false;
875
876	findCyclic(g, cyclic);
877
878	findCyclicDom(g, cyclic, dead, som);
879	if (!dead.empty()) {
880	remove_edges(dead, g);
881	pruneUseless(g);
882	dead.clear();
883	cyclic.clear(); // need to recalculate cyclic as ids have changed
884	findCyclic(g, cyclic);
885	changed = true;
886	}
887
888	findCyclicPostDom(g, cyclic, dead);
889	if (!dead.empty()) {
890	remove_edges(dead, g);
891	pruneUseless(g);
892	dead.clear();
893	changed = true;
894	}
895
896	return changed;
897	}
898
899	} // namespace ue2
900

Browse the source code of ClickHouse/contrib/hyperscan/src/nfagraph/ng_redundancy.cpp