goughcompile_accel.cpp source code [ClickHouse/contrib/hyperscan/src/nfa/goughcompile_accel.cpp]

1	/*
2	* Copyright (c) 2015, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	#include "goughcompile_internal.h"
30	#include "gough_internal.h"
31	#include "grey.h"
32	#include "mcclellancompile.h"
33	#include "util/container.h"
34	#include "util/graph.h"
35	#include "util/graph_range.h"
36
37	#include "ue2common.h"
38
39	#include <map>
40	#include <vector>
41
42	using namespace std;
43
44	namespace ue2 {
45
46	template<typename Graph>
47	void add_edge_if_not_selfloop(const typename Graph::vertex_descriptor &u,
48	const typename Graph::vertex_descriptor &v,
49	Graph &g) {
50	if (u != v) {
51	add_edge(u, v, g);
52	}
53	}
54
55	static
56	bool can_accel_over_selfloop(const GoughVertexProps &vp, const GoughEdge &e,
57	const GoughEdgeProps &ep, u32 *margin) {
58	if (vp.vars.empty() && ep.vars.empty()) {
59	/ if we update no som information, then it is trivial to accelerate /
60	*margin = `0`;
61	return true;
62	}
63
64	/ if the effect of running a self loop stabilises after a small number of*
65	* iterations, it is possible to accelerate over the state and only then run
66	* the block N times. To model this we create a graph which shows how the
67	* value for a variable at the end of a self loop block is related to values
68	* at the start */
69
70	typedef boost::adjacency_list<boost::vecS, boost::vecS,
71	boost::bidirectionalS> basic_graph;
72	typedef basic_graph::vertex_descriptor basic_vertex;
73	basic_graph bg;
74
75	map<const GoughSSAVar *, basic_vertex> verts;
76
77	/ create verts /
78	for (const auto &var : ep.vars) {
79	verts [var.get()] = add_vertex(bg);
80	}
81
82	for (const auto &var : vp.vars) {
83	verts [var.get()] = add_vertex(bg);
84	}
85
86	/ wire edges /
87	set<basic_vertex> done;
88	for (const auto &var : ep.vars) {
89	assert(contains(verts, var.get()));
90	basic_vertex v = verts [var.get()];
91	for (GoughSSAVar *pred : var ->get_inputs()) {
92	if (!contains(verts, pred)) {
93	continue;
94	}
95	basic_vertex u = verts [pred];
96	if (contains(done, u)) { / u has already taken on new values this*
97	* iteration */
98	for (auto p : inv_adjacent_vertices_range(u, bg)) {
99	add_edge_if_not_selfloop(p, v, bg);
100	}
101	} else {
102	add_edge_if_not_selfloop(u, v, bg);
103	}
104	}
105	done.insert(v);
106	}
107
108	for (const auto &var : vp.vars) {
109	GoughSSAVar *pred = var ->get_input(e);
110	assert(contains(verts, var.get()));
111	basic_vertex v = verts [var.get()];
112	if (!contains(verts, pred)) {
113	continue;
114	}
115
116	basic_vertex u = verts [pred];
117	if (contains(done, u)) { / u has already taken on new values this*
118	* iteration */
119	for (auto p : inv_adjacent_vertices_range(u, bg)) {
120	add_edge_if_not_selfloop(p, v, bg);
121	}
122	} else {
123	add_edge_if_not_selfloop(u, v, bg);
124	}
125	/ do not add v to done as all joins happen in parallel /
126	}
127
128	/ check for loops - non self loops may prevent settling /
129
130	if (!is_dag(bg)) {
131	DEBUG_PRINTF("can not %u accel as large loops\n", vp.state_id);
132	return false;
133	}
134
135	margin = num_vertices(bg); /* TODO: be less conservative /
136
137	if (*margin > `50`) {
138	return false;
139	}
140
141	return true;
142	}
143
144	static
145	bool verify_neighbour(const GoughGraph &g, GoughVertex u,
146	const map<gough_edge_id, vector<gough_ins> > &blocks,
147	const set<GoughVertex> &succs,
148	const vector<gough_ins> &block_sl) {
149	for (const auto &e : out_edges_range(u, g)) {
150	if (!g [e].reach.any()) { / ignore top edges /
151	continue;
152	}
153
154	GoughVertex t = target(e, g);
155	if (!contains(succs, t)) { / must be an escape string /
156	continue;
157	}
158
159	if (!contains(blocks, gough_edge_id (g, e))) {
160	return false;
161	}
162
163	if (blocks.at(gough_edge_id (g, e)) != block_sl) {
164	return false;
165	}
166	}
167
168	return true;
169	}
170
171	static
172	bool verify_neighbour_no_block(const GoughGraph &g, GoughVertex u,
173	const map<gough_edge_id, vector<gough_ins> > &blocks,
174	const set<GoughVertex> &succs) {
175	for (const auto &e : out_edges_range(u, g)) {
176	if (!g [e].reach.any()) { / ignore top edges /
177	continue;
178	}
179
180	GoughVertex t = target(e, g);
181	if (!contains(succs, t)) { / must be an escape string /
182	continue;
183	}
184
185	if (contains(blocks, gough_edge_id (g, e))) {
186	return false;
187	}
188	}
189
190	return true;
191	}
192
193	/ Checks the som aspects of allowing two byte accel - it is expected that the*
194	* mcclellan logic will identify escape strings.
195	*
196	* For 2 byte acceleration to be correct we require that any non-escape sequence
197	* characters xy from the accel state has the same effect as just the character
198	* of y.
199	*
200	* The current way of ensuring this is to require:
201	* (a) all edges out of the cyclic state behave identically to the cyclic self
202	* loop edge
203	* (b) edges out of the neighbouring state which do not correspond to escape
204	* string behave identical to the cyclic state edges.
205	*
206	* TODO: these restrictions could be relaxed by looking at the effect on
207	* relevant (live?) vars only, allowing additions to the escape string set, and
208	* considering one byte escapes.
209	*/
210	static
211	bool allow_two_byte_accel(const GoughGraph &g,
212	const map<gough_edge_id, vector<gough_ins> > &blocks,
213	GoughVertex v, const GoughEdge &self_loop) {
214	if (contains(blocks, gough_edge_id (g, self_loop))) {
215	DEBUG_PRINTF("edge plan on self loop\n");
216	const auto &block_sl = blocks.at(gough_edge_id (g, self_loop));
217
218	set<GoughVertex> succs;
219	for (const auto &e : out_edges_range(v, g)) {
220	if (g [e].reach.none()) { / ignore top edges /
221	continue;
222	}
223
224	gough_edge_id ged(g, e);
225	if (!contains(blocks, ged) \|\| blocks.at(ged) != block_sl) {
226	DEBUG_PRINTF("different out-edge behaviour\n");
227	return false;
228	}
229	succs.insert(target(e, g));
230	}
231
232	for (auto w : adjacent_vertices_range(v, g)) {
233	if (w != v && !verify_neighbour(g, w, blocks, succs, block_sl)) {
234	return false;
235	}
236	}
237	} else {
238	DEBUG_PRINTF("no edge plan on self loop\n");
239	set<GoughVertex> succs;
240	for (const auto &e : out_edges_range(v, g)) {
241	if (g [e].reach.none()) { / ignore top edges /
242	continue;
243	}
244
245	gough_edge_id ged(g, e);
246	if (contains(blocks, ged)) {
247	DEBUG_PRINTF("different out-edge behaviour\n");
248	return false;
249	}
250	succs.insert(target(e, g));
251
252	for (auto w : adjacent_vertices_range(v, g)) {
253	if (w != v && !verify_neighbour_no_block(g, w, blocks, succs)) {
254	return false;
255	}
256	}
257	}
258	}
259
260	DEBUG_PRINTF("allowing two byte accel for %u\n", g[v].state_id);
261	return true;
262	}
263
264	void find_allowed_accel_states(const GoughGraph &g,
265	const map<gough_edge_id, vector<gough_ins> > &blocks,
266	map<dstate_id_t, gough_accel_state_info> *out) {
267	for (auto v : vertices_range(g)) {
268	GoughEdge e;
269	if (!find_normal_self_loop(v, g, &e)) {
270	continue; / not accelerable /
271	}
272	u32 margin = `0`;
273	if (!can_accel_over_selfloop(g [v], e, g [e], &margin)) {
274	continue; / not accelerable /
275	}
276	bool tba = allow_two_byte_accel(g, blocks, v, e);
277	out->emplace(g [v].state_id, gough_accel_state_info (margin, tba));
278	}
279	}
280
281	} // namespace ue2
282

Browse the source code of ClickHouse/contrib/hyperscan/src/nfa/goughcompile_accel.cpp