1 | /* |
2 | * Copyright (c) 2015, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "goughcompile_internal.h" |
30 | #include "gough_internal.h" |
31 | #include "grey.h" |
32 | #include "mcclellancompile.h" |
33 | #include "util/container.h" |
34 | #include "util/graph.h" |
35 | #include "util/graph_range.h" |
36 | |
37 | #include "ue2common.h" |
38 | |
39 | #include <map> |
40 | #include <vector> |
41 | |
42 | using namespace std; |
43 | |
44 | namespace ue2 { |
45 | |
46 | template<typename Graph> |
47 | void add_edge_if_not_selfloop(const typename Graph::vertex_descriptor &u, |
48 | const typename Graph::vertex_descriptor &v, |
49 | Graph &g) { |
50 | if (u != v) { |
51 | add_edge(u, v, g); |
52 | } |
53 | } |
54 | |
55 | static |
56 | bool can_accel_over_selfloop(const GoughVertexProps &vp, const GoughEdge &e, |
57 | const GoughEdgeProps &ep, u32 *margin) { |
58 | if (vp.vars.empty() && ep.vars.empty()) { |
59 | /* if we update no som information, then it is trivial to accelerate */ |
60 | *margin = 0; |
61 | return true; |
62 | } |
63 | |
64 | /* if the effect of running a self loop stabilises after a small number of |
65 | * iterations, it is possible to accelerate over the state and only then run |
66 | * the block N times. To model this we create a graph which shows how the |
67 | * value for a variable at the end of a self loop block is related to values |
68 | * at the start */ |
69 | |
70 | typedef boost::adjacency_list<boost::vecS, boost::vecS, |
71 | boost::bidirectionalS> basic_graph; |
72 | typedef basic_graph::vertex_descriptor basic_vertex; |
73 | basic_graph bg; |
74 | |
75 | map<const GoughSSAVar *, basic_vertex> verts; |
76 | |
77 | /* create verts */ |
78 | for (const auto &var : ep.vars) { |
79 | verts[var.get()] = add_vertex(bg); |
80 | } |
81 | |
82 | for (const auto &var : vp.vars) { |
83 | verts[var.get()] = add_vertex(bg); |
84 | } |
85 | |
86 | /* wire edges */ |
87 | set<basic_vertex> done; |
88 | for (const auto &var : ep.vars) { |
89 | assert(contains(verts, var.get())); |
90 | basic_vertex v = verts[var.get()]; |
91 | for (GoughSSAVar *pred : var->get_inputs()) { |
92 | if (!contains(verts, pred)) { |
93 | continue; |
94 | } |
95 | basic_vertex u = verts[pred]; |
96 | if (contains(done, u)) { /* u has already taken on new values this |
97 | * iteration */ |
98 | for (auto p : inv_adjacent_vertices_range(u, bg)) { |
99 | add_edge_if_not_selfloop(p, v, bg); |
100 | } |
101 | } else { |
102 | add_edge_if_not_selfloop(u, v, bg); |
103 | } |
104 | } |
105 | done.insert(v); |
106 | } |
107 | |
108 | for (const auto &var : vp.vars) { |
109 | GoughSSAVar *pred = var->get_input(e); |
110 | assert(contains(verts, var.get())); |
111 | basic_vertex v = verts[var.get()]; |
112 | if (!contains(verts, pred)) { |
113 | continue; |
114 | } |
115 | |
116 | basic_vertex u = verts[pred]; |
117 | if (contains(done, u)) { /* u has already taken on new values this |
118 | * iteration */ |
119 | for (auto p : inv_adjacent_vertices_range(u, bg)) { |
120 | add_edge_if_not_selfloop(p, v, bg); |
121 | } |
122 | } else { |
123 | add_edge_if_not_selfloop(u, v, bg); |
124 | } |
125 | /* do not add v to done as all joins happen in parallel */ |
126 | } |
127 | |
128 | /* check for loops - non self loops may prevent settling */ |
129 | |
130 | if (!is_dag(bg)) { |
131 | DEBUG_PRINTF("can not %u accel as large loops\n" , vp.state_id); |
132 | return false; |
133 | } |
134 | |
135 | *margin = num_vertices(bg); /* TODO: be less conservative */ |
136 | |
137 | if (*margin > 50) { |
138 | return false; |
139 | } |
140 | |
141 | return true; |
142 | } |
143 | |
144 | static |
145 | bool verify_neighbour(const GoughGraph &g, GoughVertex u, |
146 | const map<gough_edge_id, vector<gough_ins> > &blocks, |
147 | const set<GoughVertex> &succs, |
148 | const vector<gough_ins> &block_sl) { |
149 | for (const auto &e : out_edges_range(u, g)) { |
150 | if (!g[e].reach.any()) { /* ignore top edges */ |
151 | continue; |
152 | } |
153 | |
154 | GoughVertex t = target(e, g); |
155 | if (!contains(succs, t)) { /* must be an escape string */ |
156 | continue; |
157 | } |
158 | |
159 | if (!contains(blocks, gough_edge_id(g, e))) { |
160 | return false; |
161 | } |
162 | |
163 | if (blocks.at(gough_edge_id(g, e)) != block_sl) { |
164 | return false; |
165 | } |
166 | } |
167 | |
168 | return true; |
169 | } |
170 | |
171 | static |
172 | bool verify_neighbour_no_block(const GoughGraph &g, GoughVertex u, |
173 | const map<gough_edge_id, vector<gough_ins> > &blocks, |
174 | const set<GoughVertex> &succs) { |
175 | for (const auto &e : out_edges_range(u, g)) { |
176 | if (!g[e].reach.any()) { /* ignore top edges */ |
177 | continue; |
178 | } |
179 | |
180 | GoughVertex t = target(e, g); |
181 | if (!contains(succs, t)) { /* must be an escape string */ |
182 | continue; |
183 | } |
184 | |
185 | if (contains(blocks, gough_edge_id(g, e))) { |
186 | return false; |
187 | } |
188 | } |
189 | |
190 | return true; |
191 | } |
192 | |
193 | /* Checks the som aspects of allowing two byte accel - it is expected that the |
194 | * mcclellan logic will identify escape strings. |
195 | * |
196 | * For 2 byte acceleration to be correct we require that any non-escape sequence |
197 | * characters xy from the accel state has the same effect as just the character |
198 | * of y. |
199 | * |
200 | * The current way of ensuring this is to require: |
201 | * (a) all edges out of the cyclic state behave identically to the cyclic self |
202 | * loop edge |
203 | * (b) edges out of the neighbouring state which do not correspond to escape |
204 | * string behave identical to the cyclic state edges. |
205 | * |
206 | * TODO: these restrictions could be relaxed by looking at the effect on |
207 | * relevant (live?) vars only, allowing additions to the escape string set, and |
208 | * considering one byte escapes. |
209 | */ |
210 | static |
211 | bool allow_two_byte_accel(const GoughGraph &g, |
212 | const map<gough_edge_id, vector<gough_ins> > &blocks, |
213 | GoughVertex v, const GoughEdge &self_loop) { |
214 | if (contains(blocks, gough_edge_id(g, self_loop))) { |
215 | DEBUG_PRINTF("edge plan on self loop\n" ); |
216 | const auto &block_sl = blocks.at(gough_edge_id(g, self_loop)); |
217 | |
218 | set<GoughVertex> succs; |
219 | for (const auto &e : out_edges_range(v, g)) { |
220 | if (g[e].reach.none()) { /* ignore top edges */ |
221 | continue; |
222 | } |
223 | |
224 | gough_edge_id ged(g, e); |
225 | if (!contains(blocks, ged) || blocks.at(ged) != block_sl) { |
226 | DEBUG_PRINTF("different out-edge behaviour\n" ); |
227 | return false; |
228 | } |
229 | succs.insert(target(e, g)); |
230 | } |
231 | |
232 | for (auto w : adjacent_vertices_range(v, g)) { |
233 | if (w != v && !verify_neighbour(g, w, blocks, succs, block_sl)) { |
234 | return false; |
235 | } |
236 | } |
237 | } else { |
238 | DEBUG_PRINTF("no edge plan on self loop\n" ); |
239 | set<GoughVertex> succs; |
240 | for (const auto &e : out_edges_range(v, g)) { |
241 | if (g[e].reach.none()) { /* ignore top edges */ |
242 | continue; |
243 | } |
244 | |
245 | gough_edge_id ged(g, e); |
246 | if (contains(blocks, ged)) { |
247 | DEBUG_PRINTF("different out-edge behaviour\n" ); |
248 | return false; |
249 | } |
250 | succs.insert(target(e, g)); |
251 | |
252 | for (auto w : adjacent_vertices_range(v, g)) { |
253 | if (w != v && !verify_neighbour_no_block(g, w, blocks, succs)) { |
254 | return false; |
255 | } |
256 | } |
257 | } |
258 | } |
259 | |
260 | DEBUG_PRINTF("allowing two byte accel for %u\n" , g[v].state_id); |
261 | return true; |
262 | } |
263 | |
264 | void find_allowed_accel_states(const GoughGraph &g, |
265 | const map<gough_edge_id, vector<gough_ins> > &blocks, |
266 | map<dstate_id_t, gough_accel_state_info> *out) { |
267 | for (auto v : vertices_range(g)) { |
268 | GoughEdge e; |
269 | if (!find_normal_self_loop(v, g, &e)) { |
270 | continue; /* not accelerable */ |
271 | } |
272 | u32 margin = 0; |
273 | if (!can_accel_over_selfloop(g[v], e, g[e], &margin)) { |
274 | continue; /* not accelerable */ |
275 | } |
276 | bool tba = allow_two_byte_accel(g, blocks, v, e); |
277 | out->emplace(g[v].state_id, gough_accel_state_info(margin, tba)); |
278 | } |
279 | } |
280 | |
281 | } // namespace ue2 |
282 | |