1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "goughcompile.h"
30#include "goughcompile_dump.h"
31#include "goughcompile_internal.h"
32#include "gough_internal.h"
33#include "grey.h"
34#include "util/container.h"
35#include "util/flat_containers.h"
36#include "util/graph.h"
37#include "util/graph_range.h"
38#include "util/order_check.h"
39
40#include "ue2common.h"
41
42#include <algorithm>
43#include <boost/graph/depth_first_search.hpp>
44#include <boost/range/adaptor/map.hpp>
45
46using namespace std;
47using boost::adaptors::map_values;
48
49namespace ue2 {
50
51template<typename VarP, typename VarQ>
52void push_back_all_raw(vector<VarP> *out, const vector<VarQ> &in) {
53 for (const auto &var : in) {
54 out->push_back(var.get());
55 }
56}
57
58static
59void all_vars(const GoughGraph &g, vector<GoughSSAVar *> *out) {
60 for (auto v : vertices_range(g)) {
61 push_back_all_raw(out, g[v].vars);
62 }
63 for (const auto &e : edges_range(g)) {
64 push_back_all_raw(out, g[e].vars);
65 }
66}
67
68namespace {
69struct GoughGraphAux {
70 map<const GoughSSAVar *, GoughVertex> containing_v;
71 map<const GoughSSAVar *, GoughEdge> containing_e;
72 map<const GoughSSAVar *, set<GoughVertex> > reporters;
73};
74}
75
76static never_inline
77void fill_aux(const GoughGraph &g, GoughGraphAux *aux) {
78 for (auto v : vertices_range(g)) {
79 for (const auto &var : g[v].vars) {
80 aux->containing_v[var.get()] = v;
81 DEBUG_PRINTF("%u is on vertex %u\n", var->slot, g[v].state_id);
82 }
83
84 for (GoughSSAVar *var : g[v].reports | map_values) {
85 aux->reporters[var].insert(v);
86 }
87
88 for (GoughSSAVar *var : g[v].reports_eod | map_values) {
89 aux->reporters[var].insert(v);
90 }
91 }
92 for (const auto &e : edges_range(g)) {
93 for (const auto &var : g[e].vars) {
94 aux->containing_e[var.get()] = e;
95 DEBUG_PRINTF("%u is on edge %u->%u\n", var->slot,
96 g[source(e, g)].state_id, g[target(e, g)].state_id);
97 }
98 }
99}
100
101static
102bool is_block_local(const GoughGraph &cfg, GoughSSAVar *var,
103 const GoughGraphAux &aux) {
104 /* if var used as a report, it cannot be considered block local */
105 if (contains(aux.reporters, var)) {
106 return false;
107 }
108
109 /* (useful) vertex/join vars never local - they are terminal in blocks
110 * and so should be read by another block. */
111 if (!contains(aux.containing_e, var)) {
112 return false;
113 }
114
115 /* for other cases, require that all uses of var are later in the same edge
116 * or on the target AND if on target it is sole on flow coming from the
117 * edge in question. */
118 const GoughEdge &e = aux.containing_e.at(var);
119 GoughVertex t = target(e, cfg);
120
121 size_t seen_outputs = 0;
122 const flat_set<GoughSSAVarWithInputs *> &out = var->get_outputs();
123 bool seen_var = false;
124 for (const auto &e_var : cfg[e].vars) {
125 if (seen_var) {
126 GoughSSAVarWithInputs *w
127 = dynamic_cast<GoughSSAVarWithInputs *>(e_var.get());
128 if (contains(out, w)) {
129 seen_outputs++;
130 }
131 } else {
132 seen_var = var == e_var.get();
133 }
134 }
135 assert(seen_var);
136
137 for (const auto &t_var : cfg[t].vars) {
138 if (contains(out, t_var.get())) {
139 seen_outputs++;
140 const flat_set<GoughEdge> &flow = t_var->get_edges_for_input(var);
141 if (flow.size() != 1 || *flow.begin() != e) {
142 /* this var is used by the target join var BUT on a different
143 * flow, so this is not a block local variable */
144 return false;
145 }
146 }
147 }
148
149 assert(seen_outputs <= out.size());
150 return seen_outputs == out.size();
151}
152
153static
154void handle_pending_edge(const GoughGraph &g, const GoughEdge &e,
155 GoughSSAVar *start, set<GoughVertex> &pending_vertex,
156 set<const GoughSSAVar *> &rv) {
157 const vector<shared_ptr<GoughSSAVar> > &vars = g[e].vars;
158 bool marking = !start;
159 DEBUG_PRINTF(" ---checking edge %u->%u %s %zu\n", g[source(e, g)].state_id,
160 g[target(e, g)].state_id, marking ? "full" : "partial",
161 vars.size());
162 for (auto it = vars.rbegin(); it != vars.rend(); ++it) {
163 GoughSSAVar *var = it->get();
164 if (contains(rv, var)) {
165 DEBUG_PRINTF("somebody has already processed this vertex [%u]\n",
166 var->slot);
167 return;
168 }
169 if (var == start) {
170 assert(!marking);
171 marking = true;
172 continue;
173 }
174 if (marking) {
175 rv.insert(var);
176 }
177 }
178 assert(marking);
179 GoughVertex s = source(e, g);
180 for (const auto &var : g[s].vars) {
181 DEBUG_PRINTF("interferes %u\n", var->slot);
182 rv.insert(var.get());
183 }
184 pending_vertex.insert(s);
185}
186
187static
188void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g,
189 const GoughGraphAux &aux,
190 const flat_set<GoughSSAVarWithInputs *> &pending_var,
191 set<GoughVertex> &pending_vertex,
192 set<const GoughSSAVar *> &rv) {
193 for (GoughSSAVarWithInputs *var : pending_var) {
194 if (contains(aux.containing_v, var)) {
195 /* def is used by join vertex, value only needs to be live on some
196 * incoming edges */
197 GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var;
198 const flat_set<GoughEdge> &live_edges
199 = vj->get_edges_for_input(def);
200 for (const auto &e : live_edges) {
201 handle_pending_edge(g, e, nullptr, pending_vertex, rv);
202 }
203 continue;
204 }
205 const GoughEdge &e = aux.containing_e.at(var);
206 handle_pending_edge(g, e, var, pending_vertex, rv);
207 }
208}
209
210static
211void handle_pending_vertex(GoughVertex def_v, const GoughGraph &g,
212 GoughVertex current,
213 set<GoughVertex> &pending_vertex,
214 set<const GoughSSAVar *> &rv) {
215 DEBUG_PRINTF("---checking vertex %u\n", g[current].state_id);
216 if (def_v == current) {
217 DEBUG_PRINTF("contains target vertex\n");
218 return; /* we have reached def */
219 }
220 for (const auto &e : in_edges_range(current, g)) {
221 handle_pending_edge(g, e, nullptr, pending_vertex, rv);
222 }
223}
224
225static
226void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g,
227 const GoughGraphAux &aux,
228 set<GoughVertex> &pending_vertex,
229 set<const GoughSSAVar *> &rv) {
230 if (pending_vertex.empty()) {
231 return;
232 }
233
234 GoughVertex def_v = GoughGraph::null_vertex();
235 if (contains(aux.containing_v, def)) {
236 def_v = aux.containing_v.at(def);
237 }
238 unordered_set<GoughVertex> done;
239 while (!pending_vertex.empty()) {
240 GoughVertex current = *pending_vertex.begin();
241 pending_vertex.erase(current);
242 if (contains(done, current)) {
243 continue;
244 }
245 done.insert(current);
246 handle_pending_vertex(def_v, g, current, pending_vertex, rv);
247 }
248}
249
250/* returns set of labels that the given def is live at */
251static never_inline
252set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g,
253 const GoughGraphAux &aux) {
254 DEBUG_PRINTF("checking who is defined during %u lifetime\n", def->slot);
255 set<GoughVertex> pending_vertex;
256
257 set<const GoughSSAVar *> rv;
258 rv.insert(def);
259
260 if (contains(aux.reporters, def)) {
261 DEBUG_PRINTF("--> gets reported\n");
262 const set<GoughVertex> &reporters = aux.reporters.at(def);
263 for (auto v : reporters) {
264 pending_vertex.insert(v);
265 for (const auto &var : g[v].vars) {
266 DEBUG_PRINTF("interferes %u\n", var->slot);
267 rv.insert(var.get());
268 }
269 }
270 }
271
272 handle_pending_vars(def, g, aux, def->get_outputs(), pending_vertex, rv);
273 handle_pending_vertices(def, g, aux, pending_vertex, rv);
274
275 rv.erase(def);
276 return rv;
277}
278
279template<typename VarP>
280void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) {
281 for (auto &var : vars) {
282 assert(var->slot == INVALID_SLOT);
283 var->slot = (*next_slot)++;
284 }
285}
286
287/* crude, deterministic assignment of symbolic register slots.
288 * returns number of slots given out
289 */
290static
291u32 initial_slots(const GoughGraph &g) {
292 u32 next_slot = 0;
293 for (auto v : vertices_range(g)) {
294 set_initial_slots(g[v].vars, &next_slot);
295 }
296 for (const auto &e : edges_range(g)) {
297 set_initial_slots(g[e].vars, &next_slot);
298 }
299
300 return next_slot;
301}
302
303#define NO_COLOUR (~0U)
304
305static
306u32 available_colour(const flat_set<u32> &bad_colours) {
307 u32 rv = 0;
308 for (const u32 &colour : bad_colours) {
309 if (colour != rv) {
310 assert(colour > rv);
311 break;
312 }
313 rv = colour + 1;
314 }
315
316 assert(rv != NO_COLOUR);
317 return rv;
318}
319
320static
321void poison_colours(const set<const GoughSSAVar *> &live, u32 c,
322 const vector<u32> &colour_map,
323 vector<flat_set<u32> > *bad_colour) {
324 for (const GoughSSAVar *var : live) {
325 u32 var_index = var->slot;
326 if (colour_map[var_index] != NO_COLOUR) {
327 assert(c != colour_map[var_index]);
328 } else {
329 (*bad_colour)[var_index].insert(c);
330 }
331 }
332}
333
334static
335void find_bad_due_to_live(const set<const GoughSSAVar *> &live,
336 const vector<u32> &colour_map, flat_set<u32> *out) {
337 for (const GoughSSAVar *var : live) {
338 u32 var_index = var->slot;
339 if (colour_map[var_index] != NO_COLOUR) {
340 out->insert(colour_map[var_index]);
341 }
342 }
343}
344
345static
346void sequential_vertex_colouring(const GoughGraph &g, const GoughGraphAux &aux,
347 const vector<GoughSSAVar *> &order,
348 vector<u32> &colour_map) {
349 assert(order.size() < NO_COLOUR);
350 colour_map.clear();
351 colour_map.resize(order.size(), NO_COLOUR);
352 vector<u32> temp(order.size(), ~0U);
353 vector<flat_set<u32> > bad_colour(order.size());
354
355 for (GoughSSAVar *var : order) {
356 u32 var_index = var->slot;
357 if (is_block_local(g, var, aux)) {
358 DEBUG_PRINTF("%u is block local\n", var_index);
359 /* ignore variable whose lifetime is limited to their local block
360 * there is no need to assign stream state to these variables */
361 continue;
362 }
363 assert(colour_map[var_index] == NO_COLOUR);
364 set<const GoughSSAVar *> live = live_during(var, g, aux);
365 flat_set<u32> &local_bad = bad_colour[var_index];
366 find_bad_due_to_live(live, colour_map, &local_bad);
367 DEBUG_PRINTF("colouring %u\n", var_index);
368 u32 c = available_colour(local_bad);
369 colour_map[var_index] = c;
370 assert(!contains(bad_colour[var_index], c));
371 poison_colours(live, c, colour_map, &bad_colour);
372
373 flat_set<u32> temp_set;
374 local_bad.swap(temp_set);
375 DEBUG_PRINTF(" %u coloured %u\n", var_index, c);
376 }
377}
378
379template<typename VarP>
380void add_to_dom_ordering(const vector<VarP> &vars,
381 vector<GoughSSAVar *> *out) {
382 for (const auto &var : vars) {
383 out->push_back(var.get());
384 }
385}
386
387namespace {
388class FinishVisitor : public boost::default_dfs_visitor {
389public:
390 explicit FinishVisitor(vector<GoughVertex> *o) : out(o) {}
391 void finish_vertex(const GoughVertex v, const GoughGraph &) {
392 out->push_back(v);
393 }
394 vector<GoughVertex> *out;
395};
396}
397
398static
399void find_dom_ordering(const GoughGraph &cfg, vector<GoughSSAVar *> *out) {
400 vector<GoughVertex> g_order;
401
402 /* due to construction quirks, default vertex order provides entry points */
403 depth_first_search(cfg, visitor(FinishVisitor(&g_order))
404 .root_vertex(cfg[boost::graph_bundle].initial_vertex));
405
406 for (auto it = g_order.rbegin(); it != g_order.rend(); ++it) {
407 add_to_dom_ordering(cfg[*it].vars, out);
408 for (const auto &e : out_edges_range(*it, cfg)) {
409 add_to_dom_ordering(cfg[e].vars, out);
410 }
411 }
412}
413
414static
415void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count,
416 vector<u32> *old_new) {
417 /* Interference graphs from SSA form are chordal -> optimally colourable in
418 * poly time.
419 *
420 * Chordal graphs can be coloured by walking in perfect elimination order.
421 * If the SSA CFG is iterated over in a way that respects dominance
422 * relationship, the interference graph will be iterated in a perfect
423 * elimination order.
424 *
425 * We can avoid creating the full interference graph and use liveness
426 * information as we iterate over the definitions to perform the colouring.
427 *
428 * See S Hack various 2006-
429 */
430 vector<GoughSSAVar *> dom_order;
431
432 GoughGraphAux aux;
433 fill_aux(cfg, &aux);
434
435 find_dom_ordering(cfg, &dom_order);
436 assert(dom_order.size() == old_slot_count);
437 sequential_vertex_colouring(cfg, aux, dom_order, *old_new);
438}
439
440static
441void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals,
442 u32 local_base) {
443 DEBUG_PRINTF("%zu local variables\n", locals.size());
444 /* local variables only occur on edges (joins are never local) */
445
446 u32 allocated_count = 0;
447 for (const auto &e : edges_range(g)) {
448 u32 next_slot = local_base;
449 for (auto &var : g[e].vars) {
450 if (contains(locals, var.get())) {
451 DEBUG_PRINTF("updating slot %u using local %u\n", var->slot,
452 next_slot);
453 var->slot = next_slot++;
454 allocated_count++;
455 }
456 }
457 }
458
459 assert(allocated_count == locals.size());
460}
461
462static never_inline
463u32 update_slots(GoughGraph &g, const vector<u32> &old_new,
464 UNUSED u32 old_slot_count) {
465 vector<GoughSSAVar *> vars;
466 set<GoughSSAVar *> locals;
467 all_vars(g, &vars);
468 u32 slot_count = 0;
469 for (GoughSSAVar *v : vars) {
470 assert(v->slot < old_new.size());
471 DEBUG_PRINTF("updating slot %u to %u\n", v->slot, old_new[v->slot]);
472 if (old_new[v->slot] != NO_COLOUR) { /* not local, assign final slot */
473 v->slot = old_new[v->slot];
474 ENSURE_AT_LEAST(&slot_count, v->slot + 1);
475 } else {
476 locals.insert(v);
477 }
478 }
479 assert(slot_count <= old_slot_count);
480 DEBUG_PRINTF("reduce stream slots from %u to %u\n", old_slot_count,
481 slot_count);
482 update_local_slots(g, locals, slot_count);
483
484 return slot_count;
485}
486
487u32 assign_slots(GoughGraph &cfg, const Grey &grey) {
488 u32 slot_count = initial_slots(cfg);
489
490 if (!grey.goughRegisterAllocate) {
491 return slot_count;
492 }
493 dump(cfg, "slots_pre", grey);
494
495 vector<u32> old_new;
496 create_slot_mapping(cfg, slot_count, &old_new);
497 slot_count = update_slots(cfg, old_new, slot_count);
498
499 return slot_count;
500}
501
502} // namespace ue2
503