ng_equivalence.cpp source code [ClickHouse/contrib/hyperscan/src/nfagraph/ng_equivalence.cpp]

1	/*
2	* Copyright (c) 2015-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief Equivalence class graph reduction pass.
31	*/
32
33	#include "ng_equivalence.h"
34
35	#include "grey.h"
36	#include "ng_depth.h"
37	#include "ng_holder.h"
38	#include "ng_util.h"
39	#include "util/compile_context.h"
40	#include "util/flat_containers.h"
41	#include "util/graph_range.h"
42	#include "util/make_unique.h"
43	#include "util/unordered.h"
44
45	#include <algorithm>
46	#include <memory>
47	#include <set>
48	#include <stack>
49	#include <vector>
50
51	using namespace std;
52
53	namespace ue2 {
54
55	enum EquivalenceType {
56	LEFT_EQUIVALENCE,
57	RIGHT_EQUIVALENCE,
58	};
59
60	namespace {
61	class VertexInfo;
62
63	// custom comparison functor for unordered_set and flat_set
64	struct VertexInfoPtrCmp {
65	// for flat_set
66	bool operator()(const VertexInfo a, const* VertexInfo b) const*;
67	};
68
69	using VertexInfoSet = flat_set<VertexInfo *, VertexInfoPtrCmp>;
70
71	/* Precalculated (and maintained) information about a vertex. /
72	class VertexInfo {
73	public:
74	VertexInfo(NFAVertex v_in, const NGHolder &g)
75	: v (v_in), vert_index(g [v].index), cr (g [v].char_reach),
76	equivalence_class(~`0`), vertex_flags(g [v].assert_flags) {}
77
78	VertexInfoSet pred; //!< predecessors of this vertex
79	VertexInfoSet succ; //!< successors of this vertex
80	NFAVertex v;
81	size_t vert_index;
82	CharReach cr;
83	CharReach pred_cr;
84	CharReach succ_cr;
85	flat_set<u32> edge_tops; /< tops on edge from start /*
86	unsigned equivalence_class;
87	unsigned vertex_flags;
88	};
89
90	// compare two vertex info pointers on their vertex index
91	bool VertexInfoPtrCmp::operator()(const VertexInfo *a,
92	const VertexInfo b) const* {
93	return a->vert_index < b->vert_index;
94	}
95
96	// to avoid traversing infomap each time we need to check the class during
97	// partitioning, we will cache the information pertaining to a particular class
98	class ClassInfo {
99	public:
100	struct ClassDepth {
101	ClassDepth() {}
102	ClassDepth(const NFAVertexDepth &d)
103	: d1 (d.fromStart), d2 (d.fromStartDotStar) {}
104	ClassDepth(const NFAVertexRevDepth &rd)
105	: d1 (rd.toAccept), d2 (rd.toAcceptEod) {}
106	DepthMinMax d1;
107	DepthMinMax d2;
108	};
109	ClassInfo(const NGHolder &g, const VertexInfo &vi, const ClassDepth &d_in,
110	EquivalenceType eq)
111	: / reports only matter for right-equiv /
112	rs (eq == RIGHT_EQUIVALENCE ? g [vi.v].reports : flat_set<ReportID>()),
113	vertex_flags(vi.vertex_flags), edge_tops (vi.edge_tops), cr (vi.cr),
114	adjacent_cr (eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr),
115	/ treat non-special vertices the same /
116	node_type(min(g [vi.v].index, size_t{N_SPECIALS})), depth (d_in) {}
117
118	bool operator==(const ClassInfo &b) const {
119	return node_type == b.node_type && depth.d1 == b.depth.d1 &&
120	depth.d2 == b.depth.d2 && cr == b.cr &&
121	adjacent_cr == b.adjacent_cr && edge_tops == b.edge_tops &&
122	vertex_flags == b.vertex_flags && rs == b.rs;
123	}
124
125	size_t hash() const {
126	return hash_all(rs, vertex_flags, cr, adjacent_cr, node_type, depth.d1,
127	depth.d2);
128	}
129
130	private:
131	flat_set<ReportID> rs; / for right equiv only /
132	unsigned vertex_flags;
133	flat_set<u32> edge_tops;
134	CharReach cr;
135	CharReach adjacent_cr;
136	unsigned node_type;
137	ClassDepth depth;
138	};
139
140	// work queue class. this contraption has two goals:
141	// 1. uniqueness of elements
142	// 2. FILO operation
143	class WorkQueue {
144	public:
145	explicit WorkQueue(unsigned c) {
146	q.reserve(c);
147	}
148	// unique push
149	void push(unsigned id) {
150	if (ids.insert(id).second) {
151	q.push_back(id);
152	}
153	}
154
155	// pop
156	unsigned pop() {
157	unsigned id = q.back();
158	ids.erase(id);
159	q.pop_back();
160	return id;
161	}
162
163	void append(WorkQueue &other) {
164	for (const auto &e : other) {
165	push(e);
166	}
167	}
168
169	void clear() {
170	ids.clear();
171	q.clear();
172	}
173
174	bool empty() const {
175	return ids.empty();
176	}
177
178	vector<unsigned>::const_iterator begin() const {
179	return q.begin();
180	}
181
182	vector<unsigned>::const_iterator end() const {
183	return q.end();
184	}
185
186	size_t capacity() const {
187	return q.capacity();
188	}
189	private:
190	unordered_set<unsigned> ids; //!< stores id's, for uniqueness
191	vector<unsigned> q; //!< vector of id's that we use as FILO.
192	};
193
194	}
195
196	static
197	bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
198	unsigned nonSpecialVertices = `0`;
199	for (auto w : adjacent_vertices_range(v, g)) {
200	if (!is_special(w, g) && w != v) {
201	nonSpecialVertices++;
202	}
203	}
204	return nonSpecialVertices == `1`;
205	}
206
207	static
208	bool inIsIrreducible(NFAVertex &v, const NGHolder &g) {
209	unsigned nonSpecialVertices = `0`;
210	for (auto u : inv_adjacent_vertices_range(v, g)) {
211	if (!is_special(u, g) && u != v) {
212	nonSpecialVertices++;
213	}
214	}
215	return nonSpecialVertices == `1`;
216	}
217
218	/* Cheaply check whether this graph can't be reduced at all, because it is*
219	* just a chain of vertices with no other edges. */
220	static
221	bool isIrreducible(const NGHolder &g) {
222	for (auto v : vertices_range(g)) {
223	// skip specials
224	if (is_special(v, g)) {
225	continue;
226	}
227
228	// we want meaningful in_degree to be 1. we also want to make sure we
229	// don't count self-loop + 1 incoming edge as not irreducible
230	if (in_degree(v, g) != `1` && !inIsIrreducible(v, g)) {
231	return false;
232	}
233	// we want meaningful out_degree to be 1. we also want to make sure we
234	// don't count self-loop + 1 outgoing edge as not irreducible
235	if (out_degree(v, g) != `1` && !outIsIrreducible(v, g)) {
236	return false;
237	}
238	}
239
240	return true;
241	}
242
243	#ifndef NDEBUG
244	static
245	bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) {
246	for (const auto &e : in_edges_range(v, g)) {
247	if (g[e].assert_flags != `0`) {
248	return true;
249	}
250	}
251	for (const auto &e : out_edges_range(v, g)) {
252	if (g[e].assert_flags != `0`) {
253	return true;
254	}
255	}
256	return false;
257	}
258	#endif
259
260	// populate VertexInfo table
261	static
262	vector<unique_ptr<VertexInfo>> getVertexInfos(const NGHolder &g) {
263	const size_t num_verts = num_vertices(g);
264
265	vector<unique_ptr<VertexInfo>> infos;
266	infos.reserve(num_verts * `2`);
267
268	vector<VertexInfo > vertex_map; // indexed by vertex_index property*
269	vertex_map.resize(num_verts);
270
271	for (auto v : vertices_range(g)) {
272	infos.push_back(make_unique<VertexInfo>(v, g));
273	vertex_map [g [v].index] = infos.back().get();
274	}
275
276	// now, go through each vertex and populate its predecessor and successor
277	// lists
278	for (auto &vi : infos) {
279	assert(vi);
280	NFAVertex v = vi ->v;
281
282	// find predecessors
283	for (const auto &e : in_edges_range(v, g)) {
284	NFAVertex u = source(e, g);
285	VertexInfo *u_vi = vertex_map [g [u].index];
286
287	vi ->pred_cr \|= u_vi->cr;
288	vi ->pred.insert(u_vi);
289
290	// also set up edge tops
291	if (is_triggered(g) && u == g.start) {
292	vi ->edge_tops = g [e].tops;
293	}
294	}
295
296	// find successors
297	for (auto w : adjacent_vertices_range(v, g)) {
298	VertexInfo *w_vi = vertex_map [g [w].index];
299	vi ->succ_cr \|= w_vi->cr;
300	vi ->succ.insert(w_vi);
301	}
302	assert(!hasEdgeAsserts(vi->v, g));
303	}
304
305	return infos;
306	}
307
308	// store equivalence class in VertexInfo for each vertex
309	static
310	vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos,
311	WorkQueue &work_queue, const NGHolder &g,
312	EquivalenceType eq) {
313	const size_t num_verts = infos.size();
314
315	vector<VertexInfoSet> classes;
316	ue2_unordered_map<ClassInfo, unsigned> classinfomap;
317
318	// assume we will have lots of classes, so we don't waste time resizing
319	// these structures.
320	classes.reserve(num_verts);
321	classinfomap.reserve(num_verts);
322
323	// get distances from start (or accept) for all vertices
324	// only one of them is used at a time, never both
325	vector<NFAVertexDepth> depths;
326	vector<NFAVertexRevDepth> rdepths;
327
328	if (eq == LEFT_EQUIVALENCE) {
329	depths = calcDepths(g);
330	} else {
331	rdepths = calcRevDepths(g);
332	}
333
334	// partition the graph based on CharReach
335	for (auto &vi : infos) {
336	assert(vi);
337
338	ClassInfo::ClassDepth depth;
339
340	if (eq == LEFT_EQUIVALENCE) {
341	depth = depths [vi ->vert_index];
342	} else {
343	depth = rdepths [vi ->vert_index];
344	}
345	ClassInfo ci(g, *vi, depth, eq);
346
347	auto ii = classinfomap.find(ci);
348	if (ii == classinfomap.end()) {
349	// vertex is in a new equivalence class by itself.
350	unsigned eq_class = classes.size();
351	vi ->equivalence_class = eq_class;
352	classes.push_back({vi.get()});
353	classinfomap.emplace(move(ci), eq_class);
354	} else {
355	// vertex is added to an existing class.
356	unsigned eq_class = ii ->second;
357	vi ->equivalence_class = eq_class;
358	classes.at(eq_class).insert(vi.get());
359
360	// we now know that this particular class has more than one
361	// vertex, so we add it to the work queue
362	work_queue.push(eq_class);
363	}
364	}
365
366	DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classes.size());
367	return classes;
368	}
369
370	// generalized equivalence processing (left and right)
371	// basically, goes through every vertex in a class and checks if all successor or
372	// predecessor classes match in all vertices. if classes mismatch, a vertex is
373	// split into a separate class, along with all vertices having the same set of
374	// successor/predecessor classes. the opposite side (successors for left
375	// equivalence, predecessors for right equivalence) classes get revalidated in
376	// case of a split.
377	static
378	void equivalence(vector<VertexInfoSet> &classes, WorkQueue &work_queue,
379	EquivalenceType eq_type) {
380	// now, go through the work queue until it's empty
381	map<flat_set<unsigned>, VertexInfoSet> tentative_classmap;
382	flat_set<unsigned> cur_classes;
383	// local work queue, to store classes we want to revalidate in case of split
384	WorkQueue reval_queue(work_queue.capacity());
385
386	while (!work_queue.empty()) {
387	// dequeue our class from the work queue
388	unsigned cur_class = work_queue.pop();
389
390	// get all vertices in current equivalence class
391	VertexInfoSet &cur_class_vertices = classes.at(cur_class);
392
393	if (cur_class_vertices.size() < `2`) {
394	continue;
395	}
396
397	// clear data from previous iterations
398	tentative_classmap.clear();
399
400	DEBUG_PRINTF("doing equivalence pass for class %u, %zd vertices\n",
401	cur_class, cur_class_vertices.size());
402
403	// go through vertices in this class
404	for (VertexInfo *vi : cur_class_vertices) {
405	cur_classes.clear();
406
407	// get vertex lists for equivalence vertices and vertices for
408	// revalidation in case of split
409	const auto &eq_vertices =
410	(eq_type == LEFT_EQUIVALENCE) ? vi->pred : vi->succ;
411	const auto &reval_vertices =
412	(eq_type == LEFT_EQUIVALENCE) ? vi->succ : vi->pred;
413
414	// go through equivalence and note the classes
415	for (const VertexInfo *tmp : eq_vertices) {
416	cur_classes.insert(tmp->equivalence_class);
417	}
418
419	// note all the classes that need to be reevaluated
420	for (const VertexInfo *tmp : reval_vertices) {
421	reval_queue.push(tmp->equivalence_class);
422	}
423
424	VertexInfoSet &tentative_classes = tentative_classmap [cur_classes];
425	tentative_classes.insert(vi);
426	}
427
428	// if we found more than one class, split and revalidate everything
429	if (tentative_classmap.size() > `1`) {
430	auto tmi = tentative_classmap.begin();
431
432	// start from the second class
433	for (++tmi; tmi != tentative_classmap.end(); ++tmi) {
434	const VertexInfoSet &vertices_to_split = tmi ->second;
435	unsigned new_class = classes.size();
436	VertexInfoSet new_class_vertices;
437
438	for (VertexInfo *vi : vertices_to_split) {
439	vi->equivalence_class = new_class;
440	// note: we cannot use the cur_class_vertices ref, as it is
441	// invalidated by modifications to the classes vector.
442	classes [cur_class].erase(vi);
443	new_class_vertices.insert(vi);
444	}
445	classes.push_back(move(new_class_vertices));
446
447	if (contains(tmi ->first, cur_class)) {
448	reval_queue.push(new_class);
449	}
450	}
451	work_queue.append(reval_queue);
452	}
453	reval_queue.clear();
454	}
455	}
456
457	static
458	bool require_separate_eod_vertex(const VertexInfoSet &vert_infos,
459	const NGHolder &g) {
460	/ We require separate eod and normal accept vertices for a class if we have*
461	* both normal accepts and eod accepts AND the reports are different for eod
462	* and non-eod reports. */
463
464	flat_set<ReportID> non_eod;
465	flat_set<ReportID> eod;
466
467	for (const VertexInfo *vi : vert_infos) {
468	NFAVertex v = vi->v;
469
470	if (edge(v, g.accept, g).second) {
471	insert(&non_eod, g [v].reports);
472	}
473
474	if (edge(v, g.acceptEod, g).second) {
475	insert(&eod, g [v].reports);
476	}
477	}
478
479	if (non_eod.empty() \|\| eod.empty()) {
480	return false;
481	}
482
483	return non_eod != eod;
484
485	}
486
487	static
488	void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g,
489	unsigned eq_class, VertexInfoSet &cur_class_vertices,
490	set<NFAVertex> *toRemove) {
491	DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a "
492	"single vertex.\n", cur_class_vertices.size(), eq_class);
493
494	// replace equivalence class with a single vertex:
495	// 1. create new vertex with matching properties
496	// 2. wire all predecessors to new vertex
497	// 2a. update info for new vertex with new predecessors
498	// 2b. update each predecessor's successor list
499	// 3. wire all successors to new vertex
500	// 3a. update info for new vertex with new successors
501	// 3b. update each successor's predecessor list
502	// 4. remove old vertex
503
504	// any differences between vertex properties were resolved during
505	// initial partitioning, so we assume that every vertex in equivalence
506	// class has the same CharReach et al.
507	// so, we find the first vertex in our class and get all its properties
508
509	/ For left equivalence, if the members have different reporting behaviour*
510	* we sometimes require two vertices to be created (one connected to accept
511	* and one to accepteod) */
512
513	NFAVertex old_v = (*cur_class_vertices.begin())->v;
514	NFAVertex new_v = clone_vertex(g, old_v); / set up new vertex with same*
515	* props */
516	g [new_v].reports.clear(); / populated as we pull in succs /
517
518	// store this vertex in our global vertex list
519	infos.push_back(make_unique<VertexInfo>(new_v, g));
520	VertexInfo *new_vertex_info = infos.back().get();
521
522	NFAVertex new_v_eod = NGHolder::null_vertex();
523	VertexInfo new_vertex_info_eod = nullptr*;
524
525	if (require_separate_eod_vertex(cur_class_vertices, g)) {
526	new_v_eod = clone_vertex(g, old_v);
527	g [new_v_eod].reports.clear();
528	infos.push_back(make_unique<VertexInfo>(new_v_eod, g));
529	new_vertex_info_eod = infos.back().get();
530	}
531
532	const auto &edgetops = (*cur_class_vertices.begin())->edge_tops;
533	for (VertexInfo *old_vertex_info : cur_class_vertices) {
534	assert(old_vertex_info->equivalence_class == eq_class);
535
536	// mark this vertex for removal
537	toRemove->insert(old_vertex_info->v);
538
539	// for each predecessor, add edge to new vertex and update info
540	for (VertexInfo *pred_info : old_vertex_info->pred) {
541	// update info for new vertex
542	new_vertex_info->pred.insert(pred_info);
543	if (new_vertex_info_eod) {
544	new_vertex_info_eod->pred.insert(pred_info);
545	}
546
547	// update info for predecessor
548	pred_info->succ.erase(old_vertex_info);
549
550	// if edge doesn't exist, create it
551	NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g);
552
553	// put edge tops, if applicable
554	if (!edgetops.empty()) {
555	assert(g[e].tops.empty() \|\| g[e].tops == edgetops);
556	g [e].tops = edgetops;
557	}
558
559	pred_info->succ.insert(new_vertex_info);
560
561	if (new_v_eod) {
562	NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod,
563	g);
564
565	// put edge tops, if applicable
566	if (!edgetops.empty()) {
567	assert(g[e].tops.empty() \|\| g[e].tops == edgetops);
568	g [ee].tops = edgetops;
569	}
570
571	pred_info->succ.insert(new_vertex_info_eod);
572	}
573	}
574
575	// for each successor, add edge from new vertex and update info
576	for (VertexInfo *succ_info : old_vertex_info->succ) {
577	NFAVertex succ_v = succ_info->v;
578
579	// update info for successor
580	succ_info->pred.erase(old_vertex_info);
581
582	if (new_v_eod && succ_v == g.acceptEod) {
583	// update info for new vertex
584	new_vertex_info_eod->succ.insert(succ_info);
585	insert(&g [new_v_eod].reports,
586	g [old_vertex_info->v].reports);
587
588	add_edge_if_not_present(new_v_eod, succ_v, g);
589	succ_info->pred.insert(new_vertex_info_eod);
590	} else {
591	// update info for new vertex
592	new_vertex_info->succ.insert(succ_info);
593
594	// if edge doesn't exist, create it
595	add_edge_if_not_present(new_v, succ_v, g);
596	succ_info->pred.insert(new_vertex_info);
597
598	if (is_any_accept(succ_v, g)) {
599	insert(&g [new_v].reports,
600	g [old_vertex_info->v].reports);
601	}
602	}
603	}
604	}
605
606	// update classmap
607	new_vertex_info->equivalence_class = eq_class;
608	cur_class_vertices.insert(new_vertex_info);
609	}
610
611	// walk through vertices of an equivalence class and replace them with a single
612	// vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the
613	// report behaviour with a single vertex).
614	static
615	bool mergeEquivalentClasses(vector<VertexInfoSet> &classes,
616	vector<unique_ptr<VertexInfo>> &infos,
617	NGHolder &g) {
618	bool merged = false;
619	set<NFAVertex> toRemove;
620
621	// go through all classes and merge classes with more than one vertex
622	for (unsigned eq_class = `0`; eq_class < classes.size(); eq_class++) {
623	// get all vertices in current equivalence class
624	VertexInfoSet &cur_class_vertices = classes [eq_class];
625
626	// we don't care for single-vertex classes
627	if (cur_class_vertices.size() > `1`) {
628	merged = true;
629	mergeClass(infos, g, eq_class, cur_class_vertices, &toRemove);
630	}
631	}
632
633	// remove all dead vertices
634	DEBUG_PRINTF("removing %zd vertices.\n", toRemove.size());
635	remove_vertices(toRemove, g);
636
637	return merged;
638	}
639
640	static
641	bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) {
642	// create a list of equivalence classes to check
643	WorkQueue work_queue(num_vertices(g));
644
645	// get information on every vertex in the graph
646	// new vertices are allocated here, and stored in infos
647	auto infos = getVertexInfos(g);
648
649	// partition the graph
650	auto classes = partitionGraph(infos, work_queue, g, eq_type);
651
652	// do equivalence processing
653	equivalence(classes, work_queue, eq_type);
654
655	// replace equivalent classes with single vertices
656	// new vertices are (possibly) allocated here, and stored in infos
657	return mergeEquivalentClasses(classes, infos, g);
658	}
659
660	bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) {
661	if (!cc.grey.equivalenceEnable) {
662	DEBUG_PRINTF("equivalence processing disabled in grey box\n");
663	return false;
664	}
665	renumber_vertices(g);
666
667	// Cheap check: if all the non-special vertices have in-degree one and
668	// out-degree one, there's no redundancy in this here graph and we can
669	// vamoose.
670	if (isIrreducible(g)) {
671	DEBUG_PRINTF("skipping equivalence processing, graph is irreducible\n");
672	return false;
673	}
674
675	// take note if we have merged any vertices
676	bool merge = false;
677	merge \|= reduceGraphEquivalences(g, LEFT_EQUIVALENCE);
678	merge \|= reduceGraphEquivalences(g, RIGHT_EQUIVALENCE);
679	return merge;
680	}
681
682	} // namespace ue2
683

Browse the source code of ClickHouse/contrib/hyperscan/src/nfagraph/ng_equivalence.cpp