1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "goughcompile.h" |
30 | #include "goughcompile_dump.h" |
31 | #include "goughcompile_internal.h" |
32 | #include "gough_internal.h" |
33 | #include "grey.h" |
34 | #include "util/container.h" |
35 | #include "util/flat_containers.h" |
36 | #include "util/graph.h" |
37 | #include "util/graph_range.h" |
38 | #include "util/order_check.h" |
39 | |
40 | #include "ue2common.h" |
41 | |
42 | #include <algorithm> |
43 | #include <boost/graph/depth_first_search.hpp> |
44 | #include <boost/range/adaptor/map.hpp> |
45 | |
46 | using namespace std; |
47 | using boost::adaptors::map_values; |
48 | |
49 | namespace ue2 { |
50 | |
51 | template<typename VarP, typename VarQ> |
52 | void push_back_all_raw(vector<VarP> *out, const vector<VarQ> &in) { |
53 | for (const auto &var : in) { |
54 | out->push_back(var.get()); |
55 | } |
56 | } |
57 | |
58 | static |
59 | void all_vars(const GoughGraph &g, vector<GoughSSAVar *> *out) { |
60 | for (auto v : vertices_range(g)) { |
61 | push_back_all_raw(out, g[v].vars); |
62 | } |
63 | for (const auto &e : edges_range(g)) { |
64 | push_back_all_raw(out, g[e].vars); |
65 | } |
66 | } |
67 | |
68 | namespace { |
69 | struct GoughGraphAux { |
70 | map<const GoughSSAVar *, GoughVertex> containing_v; |
71 | map<const GoughSSAVar *, GoughEdge> containing_e; |
72 | map<const GoughSSAVar *, set<GoughVertex> > reporters; |
73 | }; |
74 | } |
75 | |
76 | static never_inline |
77 | void fill_aux(const GoughGraph &g, GoughGraphAux *aux) { |
78 | for (auto v : vertices_range(g)) { |
79 | for (const auto &var : g[v].vars) { |
80 | aux->containing_v[var.get()] = v; |
81 | DEBUG_PRINTF("%u is on vertex %u\n" , var->slot, g[v].state_id); |
82 | } |
83 | |
84 | for (GoughSSAVar *var : g[v].reports | map_values) { |
85 | aux->reporters[var].insert(v); |
86 | } |
87 | |
88 | for (GoughSSAVar *var : g[v].reports_eod | map_values) { |
89 | aux->reporters[var].insert(v); |
90 | } |
91 | } |
92 | for (const auto &e : edges_range(g)) { |
93 | for (const auto &var : g[e].vars) { |
94 | aux->containing_e[var.get()] = e; |
95 | DEBUG_PRINTF("%u is on edge %u->%u\n" , var->slot, |
96 | g[source(e, g)].state_id, g[target(e, g)].state_id); |
97 | } |
98 | } |
99 | } |
100 | |
101 | static |
102 | bool is_block_local(const GoughGraph &cfg, GoughSSAVar *var, |
103 | const GoughGraphAux &aux) { |
104 | /* if var used as a report, it cannot be considered block local */ |
105 | if (contains(aux.reporters, var)) { |
106 | return false; |
107 | } |
108 | |
109 | /* (useful) vertex/join vars never local - they are terminal in blocks |
110 | * and so should be read by another block. */ |
111 | if (!contains(aux.containing_e, var)) { |
112 | return false; |
113 | } |
114 | |
115 | /* for other cases, require that all uses of var are later in the same edge |
116 | * or on the target AND if on target it is sole on flow coming from the |
117 | * edge in question. */ |
118 | const GoughEdge &e = aux.containing_e.at(var); |
119 | GoughVertex t = target(e, cfg); |
120 | |
121 | size_t seen_outputs = 0; |
122 | const flat_set<GoughSSAVarWithInputs *> &out = var->get_outputs(); |
123 | bool seen_var = false; |
124 | for (const auto &e_var : cfg[e].vars) { |
125 | if (seen_var) { |
126 | GoughSSAVarWithInputs *w |
127 | = dynamic_cast<GoughSSAVarWithInputs *>(e_var.get()); |
128 | if (contains(out, w)) { |
129 | seen_outputs++; |
130 | } |
131 | } else { |
132 | seen_var = var == e_var.get(); |
133 | } |
134 | } |
135 | assert(seen_var); |
136 | |
137 | for (const auto &t_var : cfg[t].vars) { |
138 | if (contains(out, t_var.get())) { |
139 | seen_outputs++; |
140 | const flat_set<GoughEdge> &flow = t_var->get_edges_for_input(var); |
141 | if (flow.size() != 1 || *flow.begin() != e) { |
142 | /* this var is used by the target join var BUT on a different |
143 | * flow, so this is not a block local variable */ |
144 | return false; |
145 | } |
146 | } |
147 | } |
148 | |
149 | assert(seen_outputs <= out.size()); |
150 | return seen_outputs == out.size(); |
151 | } |
152 | |
153 | static |
154 | void handle_pending_edge(const GoughGraph &g, const GoughEdge &e, |
155 | GoughSSAVar *start, set<GoughVertex> &pending_vertex, |
156 | set<const GoughSSAVar *> &rv) { |
157 | const vector<shared_ptr<GoughSSAVar> > &vars = g[e].vars; |
158 | bool marking = !start; |
159 | DEBUG_PRINTF(" ---checking edge %u->%u %s %zu\n" , g[source(e, g)].state_id, |
160 | g[target(e, g)].state_id, marking ? "full" : "partial" , |
161 | vars.size()); |
162 | for (auto it = vars.rbegin(); it != vars.rend(); ++it) { |
163 | GoughSSAVar *var = it->get(); |
164 | if (contains(rv, var)) { |
165 | DEBUG_PRINTF("somebody has already processed this vertex [%u]\n" , |
166 | var->slot); |
167 | return; |
168 | } |
169 | if (var == start) { |
170 | assert(!marking); |
171 | marking = true; |
172 | continue; |
173 | } |
174 | if (marking) { |
175 | rv.insert(var); |
176 | } |
177 | } |
178 | assert(marking); |
179 | GoughVertex s = source(e, g); |
180 | for (const auto &var : g[s].vars) { |
181 | DEBUG_PRINTF("interferes %u\n" , var->slot); |
182 | rv.insert(var.get()); |
183 | } |
184 | pending_vertex.insert(s); |
185 | } |
186 | |
187 | static |
188 | void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g, |
189 | const GoughGraphAux &aux, |
190 | const flat_set<GoughSSAVarWithInputs *> &pending_var, |
191 | set<GoughVertex> &pending_vertex, |
192 | set<const GoughSSAVar *> &rv) { |
193 | for (GoughSSAVarWithInputs *var : pending_var) { |
194 | if (contains(aux.containing_v, var)) { |
195 | /* def is used by join vertex, value only needs to be live on some |
196 | * incoming edges */ |
197 | GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var; |
198 | const flat_set<GoughEdge> &live_edges |
199 | = vj->get_edges_for_input(def); |
200 | for (const auto &e : live_edges) { |
201 | handle_pending_edge(g, e, nullptr, pending_vertex, rv); |
202 | } |
203 | continue; |
204 | } |
205 | const GoughEdge &e = aux.containing_e.at(var); |
206 | handle_pending_edge(g, e, var, pending_vertex, rv); |
207 | } |
208 | } |
209 | |
210 | static |
211 | void handle_pending_vertex(GoughVertex def_v, const GoughGraph &g, |
212 | GoughVertex current, |
213 | set<GoughVertex> &pending_vertex, |
214 | set<const GoughSSAVar *> &rv) { |
215 | DEBUG_PRINTF("---checking vertex %u\n" , g[current].state_id); |
216 | if (def_v == current) { |
217 | DEBUG_PRINTF("contains target vertex\n" ); |
218 | return; /* we have reached def */ |
219 | } |
220 | for (const auto &e : in_edges_range(current, g)) { |
221 | handle_pending_edge(g, e, nullptr, pending_vertex, rv); |
222 | } |
223 | } |
224 | |
225 | static |
226 | void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g, |
227 | const GoughGraphAux &aux, |
228 | set<GoughVertex> &pending_vertex, |
229 | set<const GoughSSAVar *> &rv) { |
230 | if (pending_vertex.empty()) { |
231 | return; |
232 | } |
233 | |
234 | GoughVertex def_v = GoughGraph::null_vertex(); |
235 | if (contains(aux.containing_v, def)) { |
236 | def_v = aux.containing_v.at(def); |
237 | } |
238 | unordered_set<GoughVertex> done; |
239 | while (!pending_vertex.empty()) { |
240 | GoughVertex current = *pending_vertex.begin(); |
241 | pending_vertex.erase(current); |
242 | if (contains(done, current)) { |
243 | continue; |
244 | } |
245 | done.insert(current); |
246 | handle_pending_vertex(def_v, g, current, pending_vertex, rv); |
247 | } |
248 | } |
249 | |
250 | /* returns set of labels that the given def is live at */ |
251 | static never_inline |
252 | set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g, |
253 | const GoughGraphAux &aux) { |
254 | DEBUG_PRINTF("checking who is defined during %u lifetime\n" , def->slot); |
255 | set<GoughVertex> pending_vertex; |
256 | |
257 | set<const GoughSSAVar *> rv; |
258 | rv.insert(def); |
259 | |
260 | if (contains(aux.reporters, def)) { |
261 | DEBUG_PRINTF("--> gets reported\n" ); |
262 | const set<GoughVertex> &reporters = aux.reporters.at(def); |
263 | for (auto v : reporters) { |
264 | pending_vertex.insert(v); |
265 | for (const auto &var : g[v].vars) { |
266 | DEBUG_PRINTF("interferes %u\n" , var->slot); |
267 | rv.insert(var.get()); |
268 | } |
269 | } |
270 | } |
271 | |
272 | handle_pending_vars(def, g, aux, def->get_outputs(), pending_vertex, rv); |
273 | handle_pending_vertices(def, g, aux, pending_vertex, rv); |
274 | |
275 | rv.erase(def); |
276 | return rv; |
277 | } |
278 | |
279 | template<typename VarP> |
280 | void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) { |
281 | for (auto &var : vars) { |
282 | assert(var->slot == INVALID_SLOT); |
283 | var->slot = (*next_slot)++; |
284 | } |
285 | } |
286 | |
287 | /* crude, deterministic assignment of symbolic register slots. |
288 | * returns number of slots given out |
289 | */ |
290 | static |
291 | u32 initial_slots(const GoughGraph &g) { |
292 | u32 next_slot = 0; |
293 | for (auto v : vertices_range(g)) { |
294 | set_initial_slots(g[v].vars, &next_slot); |
295 | } |
296 | for (const auto &e : edges_range(g)) { |
297 | set_initial_slots(g[e].vars, &next_slot); |
298 | } |
299 | |
300 | return next_slot; |
301 | } |
302 | |
303 | #define NO_COLOUR (~0U) |
304 | |
305 | static |
306 | u32 available_colour(const flat_set<u32> &bad_colours) { |
307 | u32 rv = 0; |
308 | for (const u32 &colour : bad_colours) { |
309 | if (colour != rv) { |
310 | assert(colour > rv); |
311 | break; |
312 | } |
313 | rv = colour + 1; |
314 | } |
315 | |
316 | assert(rv != NO_COLOUR); |
317 | return rv; |
318 | } |
319 | |
320 | static |
321 | void poison_colours(const set<const GoughSSAVar *> &live, u32 c, |
322 | const vector<u32> &colour_map, |
323 | vector<flat_set<u32> > *bad_colour) { |
324 | for (const GoughSSAVar *var : live) { |
325 | u32 var_index = var->slot; |
326 | if (colour_map[var_index] != NO_COLOUR) { |
327 | assert(c != colour_map[var_index]); |
328 | } else { |
329 | (*bad_colour)[var_index].insert(c); |
330 | } |
331 | } |
332 | } |
333 | |
334 | static |
335 | void find_bad_due_to_live(const set<const GoughSSAVar *> &live, |
336 | const vector<u32> &colour_map, flat_set<u32> *out) { |
337 | for (const GoughSSAVar *var : live) { |
338 | u32 var_index = var->slot; |
339 | if (colour_map[var_index] != NO_COLOUR) { |
340 | out->insert(colour_map[var_index]); |
341 | } |
342 | } |
343 | } |
344 | |
345 | static |
346 | void sequential_vertex_colouring(const GoughGraph &g, const GoughGraphAux &aux, |
347 | const vector<GoughSSAVar *> &order, |
348 | vector<u32> &colour_map) { |
349 | assert(order.size() < NO_COLOUR); |
350 | colour_map.clear(); |
351 | colour_map.resize(order.size(), NO_COLOUR); |
352 | vector<u32> temp(order.size(), ~0U); |
353 | vector<flat_set<u32> > bad_colour(order.size()); |
354 | |
355 | for (GoughSSAVar *var : order) { |
356 | u32 var_index = var->slot; |
357 | if (is_block_local(g, var, aux)) { |
358 | DEBUG_PRINTF("%u is block local\n" , var_index); |
359 | /* ignore variable whose lifetime is limited to their local block |
360 | * there is no need to assign stream state to these variables */ |
361 | continue; |
362 | } |
363 | assert(colour_map[var_index] == NO_COLOUR); |
364 | set<const GoughSSAVar *> live = live_during(var, g, aux); |
365 | flat_set<u32> &local_bad = bad_colour[var_index]; |
366 | find_bad_due_to_live(live, colour_map, &local_bad); |
367 | DEBUG_PRINTF("colouring %u\n" , var_index); |
368 | u32 c = available_colour(local_bad); |
369 | colour_map[var_index] = c; |
370 | assert(!contains(bad_colour[var_index], c)); |
371 | poison_colours(live, c, colour_map, &bad_colour); |
372 | |
373 | flat_set<u32> temp_set; |
374 | local_bad.swap(temp_set); |
375 | DEBUG_PRINTF(" %u coloured %u\n" , var_index, c); |
376 | } |
377 | } |
378 | |
379 | template<typename VarP> |
380 | void add_to_dom_ordering(const vector<VarP> &vars, |
381 | vector<GoughSSAVar *> *out) { |
382 | for (const auto &var : vars) { |
383 | out->push_back(var.get()); |
384 | } |
385 | } |
386 | |
387 | namespace { |
388 | class FinishVisitor : public boost::default_dfs_visitor { |
389 | public: |
390 | explicit FinishVisitor(vector<GoughVertex> *o) : out(o) {} |
391 | void finish_vertex(const GoughVertex v, const GoughGraph &) { |
392 | out->push_back(v); |
393 | } |
394 | vector<GoughVertex> *out; |
395 | }; |
396 | } |
397 | |
398 | static |
399 | void find_dom_ordering(const GoughGraph &cfg, vector<GoughSSAVar *> *out) { |
400 | vector<GoughVertex> g_order; |
401 | |
402 | /* due to construction quirks, default vertex order provides entry points */ |
403 | depth_first_search(cfg, visitor(FinishVisitor(&g_order)) |
404 | .root_vertex(cfg[boost::graph_bundle].initial_vertex)); |
405 | |
406 | for (auto it = g_order.rbegin(); it != g_order.rend(); ++it) { |
407 | add_to_dom_ordering(cfg[*it].vars, out); |
408 | for (const auto &e : out_edges_range(*it, cfg)) { |
409 | add_to_dom_ordering(cfg[e].vars, out); |
410 | } |
411 | } |
412 | } |
413 | |
414 | static |
415 | void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count, |
416 | vector<u32> *old_new) { |
417 | /* Interference graphs from SSA form are chordal -> optimally colourable in |
418 | * poly time. |
419 | * |
420 | * Chordal graphs can be coloured by walking in perfect elimination order. |
421 | * If the SSA CFG is iterated over in a way that respects dominance |
422 | * relationship, the interference graph will be iterated in a perfect |
423 | * elimination order. |
424 | * |
425 | * We can avoid creating the full interference graph and use liveness |
426 | * information as we iterate over the definitions to perform the colouring. |
427 | * |
428 | * See S Hack various 2006- |
429 | */ |
430 | vector<GoughSSAVar *> dom_order; |
431 | |
432 | GoughGraphAux aux; |
433 | fill_aux(cfg, &aux); |
434 | |
435 | find_dom_ordering(cfg, &dom_order); |
436 | assert(dom_order.size() == old_slot_count); |
437 | sequential_vertex_colouring(cfg, aux, dom_order, *old_new); |
438 | } |
439 | |
440 | static |
441 | void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals, |
442 | u32 local_base) { |
443 | DEBUG_PRINTF("%zu local variables\n" , locals.size()); |
444 | /* local variables only occur on edges (joins are never local) */ |
445 | |
446 | u32 allocated_count = 0; |
447 | for (const auto &e : edges_range(g)) { |
448 | u32 next_slot = local_base; |
449 | for (auto &var : g[e].vars) { |
450 | if (contains(locals, var.get())) { |
451 | DEBUG_PRINTF("updating slot %u using local %u\n" , var->slot, |
452 | next_slot); |
453 | var->slot = next_slot++; |
454 | allocated_count++; |
455 | } |
456 | } |
457 | } |
458 | |
459 | assert(allocated_count == locals.size()); |
460 | } |
461 | |
462 | static never_inline |
463 | u32 update_slots(GoughGraph &g, const vector<u32> &old_new, |
464 | UNUSED u32 old_slot_count) { |
465 | vector<GoughSSAVar *> vars; |
466 | set<GoughSSAVar *> locals; |
467 | all_vars(g, &vars); |
468 | u32 slot_count = 0; |
469 | for (GoughSSAVar *v : vars) { |
470 | assert(v->slot < old_new.size()); |
471 | DEBUG_PRINTF("updating slot %u to %u\n" , v->slot, old_new[v->slot]); |
472 | if (old_new[v->slot] != NO_COLOUR) { /* not local, assign final slot */ |
473 | v->slot = old_new[v->slot]; |
474 | ENSURE_AT_LEAST(&slot_count, v->slot + 1); |
475 | } else { |
476 | locals.insert(v); |
477 | } |
478 | } |
479 | assert(slot_count <= old_slot_count); |
480 | DEBUG_PRINTF("reduce stream slots from %u to %u\n" , old_slot_count, |
481 | slot_count); |
482 | update_local_slots(g, locals, slot_count); |
483 | |
484 | return slot_count; |
485 | } |
486 | |
487 | u32 assign_slots(GoughGraph &cfg, const Grey &grey) { |
488 | u32 slot_count = initial_slots(cfg); |
489 | |
490 | if (!grey.goughRegisterAllocate) { |
491 | return slot_count; |
492 | } |
493 | dump(cfg, "slots_pre" , grey); |
494 | |
495 | vector<u32> old_new; |
496 | create_slot_mapping(cfg, slot_count, &old_new); |
497 | slot_count = update_slots(cfg, old_new, slot_count); |
498 | |
499 | return slot_count; |
500 | } |
501 | |
502 | } // namespace ue2 |
503 | |