ng_restructuring.cpp source code [ClickHouse/contrib/hyperscan/src/nfagraph/ng_restructuring.cpp]

1	/*
2	* Copyright (c) 2015-2017, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/* \file*
30	* \brief State numbering and late graph restructuring code.
31	*/
32	#include "ng_restructuring.h"
33
34	#include "grey.h"
35	#include "ng_holder.h"
36	#include "ng_util.h"
37	#include "ue2common.h"
38	#include "util/graph_range.h"
39
40	#include <algorithm>
41	#include <cassert>
42
43	#include <boost/graph/transpose_graph.hpp>
44
45	using namespace std;
46
47	namespace ue2 {
48
49	/* Connect the start vertex to each of the vertices in \p tops. This is useful*
50	* temporarily for when we need to run a graph algorithm that expects a single
51	* source vertex. */
52	static
53	void wireStartToTops(NGHolder &g, const flat_set<NFAVertex> &tops,
54	vector<NFAEdge> &tempEdges) {
55	for (NFAVertex v : tops) {
56	assert(!isLeafNode(v, g));
57
58	const NFAEdge &e = add_edge(g.start, v, g);
59	tempEdges.push_back(e);
60	}
61	}
62
63	/**
64	* Returns true if start's successors (aside from startDs) are subset of
65	* startDs's proper successors or if start has no successors other than startDs.
66	*/
67	static
68	bool startIsRedundant(const NGHolder &g) {
69	/ We ignore startDs as the self-loop may have been stripped as an*
70	* optimisation for repeats (improveLeadingRepeats()). */
71	set<NFAVertex> start;
72	insert(&start, adjacent_vertices_range(g.start, g));
73	start.erase(g.startDs);
74
75	// Trivial case: start has no successors other than startDs.
76	if (start.empty()) {
77	DEBUG_PRINTF("start has no out-edges other than to startDs\n");
78	return true;
79	}
80
81	set<NFAVertex> startDs;
82	insert(&startDs, adjacent_vertices_range(g.startDs, g));
83	startDs.erase(g.startDs);
84
85	if (!is_subset_of(start, startDs)) {
86	DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n");
87	return false;
88	}
89
90	return true;
91	}
92
93	static
94	void getStateOrdering(NGHolder &g, const flat_set<NFAVertex> &tops,
95	vector<NFAVertex> &ordering) {
96	// First, wire up our "tops" to start so that we have a single source,
97	// which will give a nicer topo order.
98	vector<NFAEdge> tempEdges;
99	wireStartToTops(g, tops, tempEdges);
100
101	renumber_vertices(g);
102
103	vector<NFAVertex> temp = getTopoOrdering(g);
104
105	remove_edges(tempEdges, g);
106
107	// Move {start, startDs} to the end, so they'll be first when we reverse
108	// the ordering (if they are required).
109	temp.erase(remove(temp.begin(), temp.end(), g.startDs));
110	temp.erase(remove(temp.begin(), temp.end(), g.start));
111	if (proper_out_degree(g.startDs, g)) {
112	temp.push_back(g.startDs);
113	}
114	if (!startIsRedundant(g)) {
115	temp.push_back(g.start);
116	}
117
118	// Walk ordering, remove vertices that shouldn't be participating in state
119	// numbering, such as accepts.
120	for (auto v : temp) {
121	if (is_any_accept(v, g)) {
122	continue; // accepts don't need states
123	}
124
125	ordering.push_back(v);
126	}
127
128	// Output of topo order was in reverse.
129	reverse(ordering.begin(), ordering.end());
130	}
131
132	// Returns the number of states.
133	static
134	unordered_map<NFAVertex, u32>
135	getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) {
136	unordered_map<NFAVertex, u32> states;
137	for (const auto &v : vertices_range(h)) {
138	states [v] = NO_STATE;
139	}
140
141	u32 stateNum = `0`;
142	for (auto v : ordering) {
143	DEBUG_PRINTF("assigning state num %u to vertex %zu\n", stateNum,
144	h[v].index);
145	states [v] = stateNum++;
146	}
147	return states;
148	}
149
150	/* UE-1648: A state with a single successor that happens to be a predecessor*
151	* can be given any ol' state ID by the topological ordering, so we sink it
152	* next to its pred. This enables better merging. */
153	static
154	void optimiseTightLoops(const NGHolder &g, vector<NFAVertex> &ordering) {
155	deque<pair<NFAVertex, NFAVertex>> candidates;
156
157	auto start = ordering.begin();
158	for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
159	NFAVertex v = *it;
160	if (is_special(v, g)) {
161	continue;
162	}
163
164	if (out_degree(v, g) == `1`) {
165	NFAVertex t = *(adjacent_vertices(v, g).first);
166	if (v == t) {
167	continue;
168	}
169	if (edge(t, v, g).second && find(start, it, t) != ite) {
170	candidates.push_back(make_pair(v, t));
171	}
172	}
173	}
174
175	for (const auto &cand : candidates) {
176	NFAVertex v = cand.first, u = cand.second;
177	auto u_it = find(ordering.begin(), ordering.end(), u);
178	auto v_it = find(ordering.begin(), ordering.end(), v);
179
180	// Only move candidates backwards in the ordering, and only move them
181	// when necessary.
182	if (u_it >= v_it \|\| distance(u_it, v_it) == `1`) {
183	continue;
184	}
185
186	DEBUG_PRINTF("moving vertex %zu next to %zu\n", g[v].index, g[u].index);
187
188	ordering.erase(v_it);
189	ordering.insert(++u_it, v);
190	}
191	}
192
193	unordered_map<NFAVertex, u32>
194	numberStates(NGHolder &h, const flat_set<NFAVertex> &tops) {
195	DEBUG_PRINTF("numbering states for holder %p\n", &h);
196
197	vector<NFAVertex> ordering;
198	getStateOrdering(h, tops, ordering);
199
200	optimiseTightLoops(h, ordering);
201
202	return getStateIndices(h, ordering);
203	}
204
205	u32 countStates(const unordered_map<NFAVertex, u32> &state_ids) {
206	if (state_ids.empty()) {
207	return `0`;
208	}
209
210	u32 max_state = `0`;
211	for (const auto &m : state_ids) {
212	if (m.second != NO_STATE) {
213	max_state = max(m.second, max_state);
214	}
215	}
216	u32 num_states = max_state + `1`;
217
218	return num_states;
219	}
220
221	} // namespace ue2
222

Browse the source code of ClickHouse/contrib/hyperscan/src/nfagraph/ng_restructuring.cpp