| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "goughcompile.h" |
| 30 | #include "goughcompile_dump.h" |
| 31 | #include "goughcompile_internal.h" |
| 32 | #include "gough_internal.h" |
| 33 | #include "grey.h" |
| 34 | #include "util/container.h" |
| 35 | #include "util/flat_containers.h" |
| 36 | #include "util/graph.h" |
| 37 | #include "util/graph_range.h" |
| 38 | #include "util/order_check.h" |
| 39 | |
| 40 | #include "ue2common.h" |
| 41 | |
| 42 | #include <algorithm> |
| 43 | #include <boost/graph/depth_first_search.hpp> |
| 44 | #include <boost/range/adaptor/map.hpp> |
| 45 | |
| 46 | using namespace std; |
| 47 | using boost::adaptors::map_values; |
| 48 | |
| 49 | namespace ue2 { |
| 50 | |
| 51 | template<typename VarP, typename VarQ> |
| 52 | void push_back_all_raw(vector<VarP> *out, const vector<VarQ> &in) { |
| 53 | for (const auto &var : in) { |
| 54 | out->push_back(var.get()); |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | static |
| 59 | void all_vars(const GoughGraph &g, vector<GoughSSAVar *> *out) { |
| 60 | for (auto v : vertices_range(g)) { |
| 61 | push_back_all_raw(out, g[v].vars); |
| 62 | } |
| 63 | for (const auto &e : edges_range(g)) { |
| 64 | push_back_all_raw(out, g[e].vars); |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | namespace { |
| 69 | struct GoughGraphAux { |
| 70 | map<const GoughSSAVar *, GoughVertex> containing_v; |
| 71 | map<const GoughSSAVar *, GoughEdge> containing_e; |
| 72 | map<const GoughSSAVar *, set<GoughVertex> > reporters; |
| 73 | }; |
| 74 | } |
| 75 | |
| 76 | static never_inline |
| 77 | void fill_aux(const GoughGraph &g, GoughGraphAux *aux) { |
| 78 | for (auto v : vertices_range(g)) { |
| 79 | for (const auto &var : g[v].vars) { |
| 80 | aux->containing_v[var.get()] = v; |
| 81 | DEBUG_PRINTF("%u is on vertex %u\n" , var->slot, g[v].state_id); |
| 82 | } |
| 83 | |
| 84 | for (GoughSSAVar *var : g[v].reports | map_values) { |
| 85 | aux->reporters[var].insert(v); |
| 86 | } |
| 87 | |
| 88 | for (GoughSSAVar *var : g[v].reports_eod | map_values) { |
| 89 | aux->reporters[var].insert(v); |
| 90 | } |
| 91 | } |
| 92 | for (const auto &e : edges_range(g)) { |
| 93 | for (const auto &var : g[e].vars) { |
| 94 | aux->containing_e[var.get()] = e; |
| 95 | DEBUG_PRINTF("%u is on edge %u->%u\n" , var->slot, |
| 96 | g[source(e, g)].state_id, g[target(e, g)].state_id); |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | static |
| 102 | bool is_block_local(const GoughGraph &cfg, GoughSSAVar *var, |
| 103 | const GoughGraphAux &aux) { |
| 104 | /* if var used as a report, it cannot be considered block local */ |
| 105 | if (contains(aux.reporters, var)) { |
| 106 | return false; |
| 107 | } |
| 108 | |
| 109 | /* (useful) vertex/join vars never local - they are terminal in blocks |
| 110 | * and so should be read by another block. */ |
| 111 | if (!contains(aux.containing_e, var)) { |
| 112 | return false; |
| 113 | } |
| 114 | |
| 115 | /* for other cases, require that all uses of var are later in the same edge |
| 116 | * or on the target AND if on target it is sole on flow coming from the |
| 117 | * edge in question. */ |
| 118 | const GoughEdge &e = aux.containing_e.at(var); |
| 119 | GoughVertex t = target(e, cfg); |
| 120 | |
| 121 | size_t seen_outputs = 0; |
| 122 | const flat_set<GoughSSAVarWithInputs *> &out = var->get_outputs(); |
| 123 | bool seen_var = false; |
| 124 | for (const auto &e_var : cfg[e].vars) { |
| 125 | if (seen_var) { |
| 126 | GoughSSAVarWithInputs *w |
| 127 | = dynamic_cast<GoughSSAVarWithInputs *>(e_var.get()); |
| 128 | if (contains(out, w)) { |
| 129 | seen_outputs++; |
| 130 | } |
| 131 | } else { |
| 132 | seen_var = var == e_var.get(); |
| 133 | } |
| 134 | } |
| 135 | assert(seen_var); |
| 136 | |
| 137 | for (const auto &t_var : cfg[t].vars) { |
| 138 | if (contains(out, t_var.get())) { |
| 139 | seen_outputs++; |
| 140 | const flat_set<GoughEdge> &flow = t_var->get_edges_for_input(var); |
| 141 | if (flow.size() != 1 || *flow.begin() != e) { |
| 142 | /* this var is used by the target join var BUT on a different |
| 143 | * flow, so this is not a block local variable */ |
| 144 | return false; |
| 145 | } |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | assert(seen_outputs <= out.size()); |
| 150 | return seen_outputs == out.size(); |
| 151 | } |
| 152 | |
| 153 | static |
| 154 | void handle_pending_edge(const GoughGraph &g, const GoughEdge &e, |
| 155 | GoughSSAVar *start, set<GoughVertex> &pending_vertex, |
| 156 | set<const GoughSSAVar *> &rv) { |
| 157 | const vector<shared_ptr<GoughSSAVar> > &vars = g[e].vars; |
| 158 | bool marking = !start; |
| 159 | DEBUG_PRINTF(" ---checking edge %u->%u %s %zu\n" , g[source(e, g)].state_id, |
| 160 | g[target(e, g)].state_id, marking ? "full" : "partial" , |
| 161 | vars.size()); |
| 162 | for (auto it = vars.rbegin(); it != vars.rend(); ++it) { |
| 163 | GoughSSAVar *var = it->get(); |
| 164 | if (contains(rv, var)) { |
| 165 | DEBUG_PRINTF("somebody has already processed this vertex [%u]\n" , |
| 166 | var->slot); |
| 167 | return; |
| 168 | } |
| 169 | if (var == start) { |
| 170 | assert(!marking); |
| 171 | marking = true; |
| 172 | continue; |
| 173 | } |
| 174 | if (marking) { |
| 175 | rv.insert(var); |
| 176 | } |
| 177 | } |
| 178 | assert(marking); |
| 179 | GoughVertex s = source(e, g); |
| 180 | for (const auto &var : g[s].vars) { |
| 181 | DEBUG_PRINTF("interferes %u\n" , var->slot); |
| 182 | rv.insert(var.get()); |
| 183 | } |
| 184 | pending_vertex.insert(s); |
| 185 | } |
| 186 | |
| 187 | static |
| 188 | void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g, |
| 189 | const GoughGraphAux &aux, |
| 190 | const flat_set<GoughSSAVarWithInputs *> &pending_var, |
| 191 | set<GoughVertex> &pending_vertex, |
| 192 | set<const GoughSSAVar *> &rv) { |
| 193 | for (GoughSSAVarWithInputs *var : pending_var) { |
| 194 | if (contains(aux.containing_v, var)) { |
| 195 | /* def is used by join vertex, value only needs to be live on some |
| 196 | * incoming edges */ |
| 197 | GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var; |
| 198 | const flat_set<GoughEdge> &live_edges |
| 199 | = vj->get_edges_for_input(def); |
| 200 | for (const auto &e : live_edges) { |
| 201 | handle_pending_edge(g, e, nullptr, pending_vertex, rv); |
| 202 | } |
| 203 | continue; |
| 204 | } |
| 205 | const GoughEdge &e = aux.containing_e.at(var); |
| 206 | handle_pending_edge(g, e, var, pending_vertex, rv); |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | static |
| 211 | void handle_pending_vertex(GoughVertex def_v, const GoughGraph &g, |
| 212 | GoughVertex current, |
| 213 | set<GoughVertex> &pending_vertex, |
| 214 | set<const GoughSSAVar *> &rv) { |
| 215 | DEBUG_PRINTF("---checking vertex %u\n" , g[current].state_id); |
| 216 | if (def_v == current) { |
| 217 | DEBUG_PRINTF("contains target vertex\n" ); |
| 218 | return; /* we have reached def */ |
| 219 | } |
| 220 | for (const auto &e : in_edges_range(current, g)) { |
| 221 | handle_pending_edge(g, e, nullptr, pending_vertex, rv); |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | static |
| 226 | void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g, |
| 227 | const GoughGraphAux &aux, |
| 228 | set<GoughVertex> &pending_vertex, |
| 229 | set<const GoughSSAVar *> &rv) { |
| 230 | if (pending_vertex.empty()) { |
| 231 | return; |
| 232 | } |
| 233 | |
| 234 | GoughVertex def_v = GoughGraph::null_vertex(); |
| 235 | if (contains(aux.containing_v, def)) { |
| 236 | def_v = aux.containing_v.at(def); |
| 237 | } |
| 238 | unordered_set<GoughVertex> done; |
| 239 | while (!pending_vertex.empty()) { |
| 240 | GoughVertex current = *pending_vertex.begin(); |
| 241 | pending_vertex.erase(current); |
| 242 | if (contains(done, current)) { |
| 243 | continue; |
| 244 | } |
| 245 | done.insert(current); |
| 246 | handle_pending_vertex(def_v, g, current, pending_vertex, rv); |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | /* returns set of labels that the given def is live at */ |
| 251 | static never_inline |
| 252 | set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g, |
| 253 | const GoughGraphAux &aux) { |
| 254 | DEBUG_PRINTF("checking who is defined during %u lifetime\n" , def->slot); |
| 255 | set<GoughVertex> pending_vertex; |
| 256 | |
| 257 | set<const GoughSSAVar *> rv; |
| 258 | rv.insert(def); |
| 259 | |
| 260 | if (contains(aux.reporters, def)) { |
| 261 | DEBUG_PRINTF("--> gets reported\n" ); |
| 262 | const set<GoughVertex> &reporters = aux.reporters.at(def); |
| 263 | for (auto v : reporters) { |
| 264 | pending_vertex.insert(v); |
| 265 | for (const auto &var : g[v].vars) { |
| 266 | DEBUG_PRINTF("interferes %u\n" , var->slot); |
| 267 | rv.insert(var.get()); |
| 268 | } |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | handle_pending_vars(def, g, aux, def->get_outputs(), pending_vertex, rv); |
| 273 | handle_pending_vertices(def, g, aux, pending_vertex, rv); |
| 274 | |
| 275 | rv.erase(def); |
| 276 | return rv; |
| 277 | } |
| 278 | |
| 279 | template<typename VarP> |
| 280 | void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) { |
| 281 | for (auto &var : vars) { |
| 282 | assert(var->slot == INVALID_SLOT); |
| 283 | var->slot = (*next_slot)++; |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | /* crude, deterministic assignment of symbolic register slots. |
| 288 | * returns number of slots given out |
| 289 | */ |
| 290 | static |
| 291 | u32 initial_slots(const GoughGraph &g) { |
| 292 | u32 next_slot = 0; |
| 293 | for (auto v : vertices_range(g)) { |
| 294 | set_initial_slots(g[v].vars, &next_slot); |
| 295 | } |
| 296 | for (const auto &e : edges_range(g)) { |
| 297 | set_initial_slots(g[e].vars, &next_slot); |
| 298 | } |
| 299 | |
| 300 | return next_slot; |
| 301 | } |
| 302 | |
| 303 | #define NO_COLOUR (~0U) |
| 304 | |
| 305 | static |
| 306 | u32 available_colour(const flat_set<u32> &bad_colours) { |
| 307 | u32 rv = 0; |
| 308 | for (const u32 &colour : bad_colours) { |
| 309 | if (colour != rv) { |
| 310 | assert(colour > rv); |
| 311 | break; |
| 312 | } |
| 313 | rv = colour + 1; |
| 314 | } |
| 315 | |
| 316 | assert(rv != NO_COLOUR); |
| 317 | return rv; |
| 318 | } |
| 319 | |
| 320 | static |
| 321 | void poison_colours(const set<const GoughSSAVar *> &live, u32 c, |
| 322 | const vector<u32> &colour_map, |
| 323 | vector<flat_set<u32> > *bad_colour) { |
| 324 | for (const GoughSSAVar *var : live) { |
| 325 | u32 var_index = var->slot; |
| 326 | if (colour_map[var_index] != NO_COLOUR) { |
| 327 | assert(c != colour_map[var_index]); |
| 328 | } else { |
| 329 | (*bad_colour)[var_index].insert(c); |
| 330 | } |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | static |
| 335 | void find_bad_due_to_live(const set<const GoughSSAVar *> &live, |
| 336 | const vector<u32> &colour_map, flat_set<u32> *out) { |
| 337 | for (const GoughSSAVar *var : live) { |
| 338 | u32 var_index = var->slot; |
| 339 | if (colour_map[var_index] != NO_COLOUR) { |
| 340 | out->insert(colour_map[var_index]); |
| 341 | } |
| 342 | } |
| 343 | } |
| 344 | |
| 345 | static |
| 346 | void sequential_vertex_colouring(const GoughGraph &g, const GoughGraphAux &aux, |
| 347 | const vector<GoughSSAVar *> &order, |
| 348 | vector<u32> &colour_map) { |
| 349 | assert(order.size() < NO_COLOUR); |
| 350 | colour_map.clear(); |
| 351 | colour_map.resize(order.size(), NO_COLOUR); |
| 352 | vector<u32> temp(order.size(), ~0U); |
| 353 | vector<flat_set<u32> > bad_colour(order.size()); |
| 354 | |
| 355 | for (GoughSSAVar *var : order) { |
| 356 | u32 var_index = var->slot; |
| 357 | if (is_block_local(g, var, aux)) { |
| 358 | DEBUG_PRINTF("%u is block local\n" , var_index); |
| 359 | /* ignore variable whose lifetime is limited to their local block |
| 360 | * there is no need to assign stream state to these variables */ |
| 361 | continue; |
| 362 | } |
| 363 | assert(colour_map[var_index] == NO_COLOUR); |
| 364 | set<const GoughSSAVar *> live = live_during(var, g, aux); |
| 365 | flat_set<u32> &local_bad = bad_colour[var_index]; |
| 366 | find_bad_due_to_live(live, colour_map, &local_bad); |
| 367 | DEBUG_PRINTF("colouring %u\n" , var_index); |
| 368 | u32 c = available_colour(local_bad); |
| 369 | colour_map[var_index] = c; |
| 370 | assert(!contains(bad_colour[var_index], c)); |
| 371 | poison_colours(live, c, colour_map, &bad_colour); |
| 372 | |
| 373 | flat_set<u32> temp_set; |
| 374 | local_bad.swap(temp_set); |
| 375 | DEBUG_PRINTF(" %u coloured %u\n" , var_index, c); |
| 376 | } |
| 377 | } |
| 378 | |
| 379 | template<typename VarP> |
| 380 | void add_to_dom_ordering(const vector<VarP> &vars, |
| 381 | vector<GoughSSAVar *> *out) { |
| 382 | for (const auto &var : vars) { |
| 383 | out->push_back(var.get()); |
| 384 | } |
| 385 | } |
| 386 | |
| 387 | namespace { |
| 388 | class FinishVisitor : public boost::default_dfs_visitor { |
| 389 | public: |
| 390 | explicit FinishVisitor(vector<GoughVertex> *o) : out(o) {} |
| 391 | void finish_vertex(const GoughVertex v, const GoughGraph &) { |
| 392 | out->push_back(v); |
| 393 | } |
| 394 | vector<GoughVertex> *out; |
| 395 | }; |
| 396 | } |
| 397 | |
| 398 | static |
| 399 | void find_dom_ordering(const GoughGraph &cfg, vector<GoughSSAVar *> *out) { |
| 400 | vector<GoughVertex> g_order; |
| 401 | |
| 402 | /* due to construction quirks, default vertex order provides entry points */ |
| 403 | depth_first_search(cfg, visitor(FinishVisitor(&g_order)) |
| 404 | .root_vertex(cfg[boost::graph_bundle].initial_vertex)); |
| 405 | |
| 406 | for (auto it = g_order.rbegin(); it != g_order.rend(); ++it) { |
| 407 | add_to_dom_ordering(cfg[*it].vars, out); |
| 408 | for (const auto &e : out_edges_range(*it, cfg)) { |
| 409 | add_to_dom_ordering(cfg[e].vars, out); |
| 410 | } |
| 411 | } |
| 412 | } |
| 413 | |
| 414 | static |
| 415 | void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count, |
| 416 | vector<u32> *old_new) { |
| 417 | /* Interference graphs from SSA form are chordal -> optimally colourable in |
| 418 | * poly time. |
| 419 | * |
| 420 | * Chordal graphs can be coloured by walking in perfect elimination order. |
| 421 | * If the SSA CFG is iterated over in a way that respects dominance |
| 422 | * relationship, the interference graph will be iterated in a perfect |
| 423 | * elimination order. |
| 424 | * |
| 425 | * We can avoid creating the full interference graph and use liveness |
| 426 | * information as we iterate over the definitions to perform the colouring. |
| 427 | * |
| 428 | * See S Hack various 2006- |
| 429 | */ |
| 430 | vector<GoughSSAVar *> dom_order; |
| 431 | |
| 432 | GoughGraphAux aux; |
| 433 | fill_aux(cfg, &aux); |
| 434 | |
| 435 | find_dom_ordering(cfg, &dom_order); |
| 436 | assert(dom_order.size() == old_slot_count); |
| 437 | sequential_vertex_colouring(cfg, aux, dom_order, *old_new); |
| 438 | } |
| 439 | |
| 440 | static |
| 441 | void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals, |
| 442 | u32 local_base) { |
| 443 | DEBUG_PRINTF("%zu local variables\n" , locals.size()); |
| 444 | /* local variables only occur on edges (joins are never local) */ |
| 445 | |
| 446 | u32 allocated_count = 0; |
| 447 | for (const auto &e : edges_range(g)) { |
| 448 | u32 next_slot = local_base; |
| 449 | for (auto &var : g[e].vars) { |
| 450 | if (contains(locals, var.get())) { |
| 451 | DEBUG_PRINTF("updating slot %u using local %u\n" , var->slot, |
| 452 | next_slot); |
| 453 | var->slot = next_slot++; |
| 454 | allocated_count++; |
| 455 | } |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | assert(allocated_count == locals.size()); |
| 460 | } |
| 461 | |
| 462 | static never_inline |
| 463 | u32 update_slots(GoughGraph &g, const vector<u32> &old_new, |
| 464 | UNUSED u32 old_slot_count) { |
| 465 | vector<GoughSSAVar *> vars; |
| 466 | set<GoughSSAVar *> locals; |
| 467 | all_vars(g, &vars); |
| 468 | u32 slot_count = 0; |
| 469 | for (GoughSSAVar *v : vars) { |
| 470 | assert(v->slot < old_new.size()); |
| 471 | DEBUG_PRINTF("updating slot %u to %u\n" , v->slot, old_new[v->slot]); |
| 472 | if (old_new[v->slot] != NO_COLOUR) { /* not local, assign final slot */ |
| 473 | v->slot = old_new[v->slot]; |
| 474 | ENSURE_AT_LEAST(&slot_count, v->slot + 1); |
| 475 | } else { |
| 476 | locals.insert(v); |
| 477 | } |
| 478 | } |
| 479 | assert(slot_count <= old_slot_count); |
| 480 | DEBUG_PRINTF("reduce stream slots from %u to %u\n" , old_slot_count, |
| 481 | slot_count); |
| 482 | update_local_slots(g, locals, slot_count); |
| 483 | |
| 484 | return slot_count; |
| 485 | } |
| 486 | |
| 487 | u32 assign_slots(GoughGraph &cfg, const Grey &grey) { |
| 488 | u32 slot_count = initial_slots(cfg); |
| 489 | |
| 490 | if (!grey.goughRegisterAllocate) { |
| 491 | return slot_count; |
| 492 | } |
| 493 | dump(cfg, "slots_pre" , grey); |
| 494 | |
| 495 | vector<u32> old_new; |
| 496 | create_slot_mapping(cfg, slot_count, &old_new); |
| 497 | slot_count = update_slots(cfg, old_new, slot_count); |
| 498 | |
| 499 | return slot_count; |
| 500 | } |
| 501 | |
| 502 | } // namespace ue2 |
| 503 | |