| 1 | #include "all.h" |
| 2 | |
| 3 | #ifdef TEST_PMOV |
| 4 | #undef assert |
| 5 | #define assert(x) assert_test(#x, x) |
| 6 | #endif |
| 7 | |
| 8 | typedef struct RMap RMap; |
| 9 | |
| 10 | struct RMap { |
| 11 | int t[Tmp0]; |
| 12 | int r[Tmp0]; |
| 13 | int w[Tmp0]; /* wait list, for unmatched hints */ |
| 14 | BSet b[1]; |
| 15 | int n; |
| 16 | }; |
| 17 | |
| 18 | static bits regu; /* registers used */ |
| 19 | static Tmp *tmp; /* function temporaries */ |
| 20 | static Mem *mem; /* function mem references */ |
| 21 | static struct { |
| 22 | Ref src, dst; |
| 23 | int cls; |
| 24 | } pm[Tmp0]; /* parallel move constructed */ |
| 25 | static int npm; /* size of pm */ |
| 26 | static int loop; /* current loop level */ |
| 27 | |
| 28 | static uint stmov; /* stats: added moves */ |
| 29 | static uint stblk; /* stats: added blocks */ |
| 30 | |
| 31 | static int * |
| 32 | hint(int t) |
| 33 | { |
| 34 | return &tmp[phicls(t, tmp)].hint.r; |
| 35 | } |
| 36 | |
| 37 | static void |
| 38 | sethint(int t, int r) |
| 39 | { |
| 40 | Tmp *p; |
| 41 | |
| 42 | p = &tmp[phicls(t, tmp)]; |
| 43 | if (p->hint.r == -1 || p->hint.w > loop) { |
| 44 | p->hint.r = r; |
| 45 | p->hint.w = loop; |
| 46 | tmp[t].visit = -1; |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | static void |
| 51 | rcopy(RMap *ma, RMap *mb) |
| 52 | { |
| 53 | memcpy(ma->t, mb->t, sizeof ma->t); |
| 54 | memcpy(ma->r, mb->r, sizeof ma->r); |
| 55 | memcpy(ma->w, mb->w, sizeof ma->w); |
| 56 | bscopy(ma->b, mb->b); |
| 57 | ma->n = mb->n; |
| 58 | } |
| 59 | |
| 60 | static int |
| 61 | rfind(RMap *m, int t) |
| 62 | { |
| 63 | int i; |
| 64 | |
| 65 | for (i=0; i<m->n; i++) |
| 66 | if (m->t[i] == t) |
| 67 | return m->r[i]; |
| 68 | return -1; |
| 69 | } |
| 70 | |
| 71 | static Ref |
| 72 | rref(RMap *m, int t) |
| 73 | { |
| 74 | int r, s; |
| 75 | |
| 76 | r = rfind(m, t); |
| 77 | if (r == -1) { |
| 78 | s = tmp[t].slot; |
| 79 | assert(s != -1 && "should have spilled" ); |
| 80 | return SLOT(s); |
| 81 | } else |
| 82 | return TMP(r); |
| 83 | } |
| 84 | |
| 85 | static void |
| 86 | radd(RMap *m, int t, int r) |
| 87 | { |
| 88 | assert((t >= Tmp0 || t == r) && "invalid temporary" ); |
| 89 | assert(((T.gpr0 <= r && r < T.gpr0 + T.ngpr) |
| 90 | || (T.fpr0 <= r && r < T.fpr0 + T.nfpr)) |
| 91 | && "invalid register" ); |
| 92 | assert(!bshas(m->b, t) && "temporary has mapping" ); |
| 93 | assert(!bshas(m->b, r) && "register already allocated" ); |
| 94 | assert(m->n <= T.ngpr+T.nfpr && "too many mappings" ); |
| 95 | bsset(m->b, t); |
| 96 | bsset(m->b, r); |
| 97 | m->t[m->n] = t; |
| 98 | m->r[m->n] = r; |
| 99 | m->n++; |
| 100 | regu |= BIT(r); |
| 101 | } |
| 102 | |
| 103 | static Ref |
| 104 | ralloctry(RMap *m, int t, int try) |
| 105 | { |
| 106 | bits regs; |
| 107 | int h, r, r0, r1; |
| 108 | |
| 109 | if (t < Tmp0) { |
| 110 | assert(bshas(m->b, t)); |
| 111 | return TMP(t); |
| 112 | } |
| 113 | if (bshas(m->b, t)) { |
| 114 | r = rfind(m, t); |
| 115 | assert(r != -1); |
| 116 | return TMP(r); |
| 117 | } |
| 118 | r = tmp[t].visit; |
| 119 | if (r == -1 || bshas(m->b, r)) |
| 120 | r = *hint(t); |
| 121 | if (r == -1 || bshas(m->b, r)) { |
| 122 | if (try) |
| 123 | return R; |
| 124 | regs = tmp[phicls(t, tmp)].hint.m; |
| 125 | regs |= m->b->t[0]; |
| 126 | if (KBASE(tmp[t].cls) == 0) { |
| 127 | r0 = T.gpr0; |
| 128 | r1 = r0 + T.ngpr; |
| 129 | } else { |
| 130 | r0 = T.fpr0; |
| 131 | r1 = r0 + T.nfpr; |
| 132 | } |
| 133 | for (r=r0; r<r1; r++) |
| 134 | if (!(regs & BIT(r))) |
| 135 | goto Found; |
| 136 | for (r=r0; r<r1; r++) |
| 137 | if (!bshas(m->b, r)) |
| 138 | goto Found; |
| 139 | die("no more regs" ); |
| 140 | } |
| 141 | Found: |
| 142 | radd(m, t, r); |
| 143 | sethint(t, r); |
| 144 | tmp[t].visit = r; |
| 145 | h = *hint(t); |
| 146 | if (h != -1 && h != r) |
| 147 | m->w[h] = t; |
| 148 | return TMP(r); |
| 149 | } |
| 150 | |
| 151 | static inline Ref |
| 152 | ralloc(RMap *m, int t) |
| 153 | { |
| 154 | return ralloctry(m, t, 0); |
| 155 | } |
| 156 | |
| 157 | static int |
| 158 | rfree(RMap *m, int t) |
| 159 | { |
| 160 | int i, r; |
| 161 | |
| 162 | assert(t >= Tmp0 || !(BIT(t) & T.rglob)); |
| 163 | if (!bshas(m->b, t)) |
| 164 | return -1; |
| 165 | for (i=0; m->t[i] != t; i++) |
| 166 | assert(i+1 < m->n); |
| 167 | r = m->r[i]; |
| 168 | bsclr(m->b, t); |
| 169 | bsclr(m->b, r); |
| 170 | m->n--; |
| 171 | memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]); |
| 172 | memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]); |
| 173 | assert(t >= Tmp0 || t == r); |
| 174 | return r; |
| 175 | } |
| 176 | |
| 177 | static void |
| 178 | mdump(RMap *m) |
| 179 | { |
| 180 | int i; |
| 181 | |
| 182 | for (i=0; i<m->n; i++) |
| 183 | if (m->t[i] >= Tmp0) |
| 184 | fprintf(stderr, " (%s, R%d)" , |
| 185 | tmp[m->t[i]].name, |
| 186 | m->r[i]); |
| 187 | fprintf(stderr, "\n" ); |
| 188 | } |
| 189 | |
| 190 | static void |
| 191 | pmadd(Ref src, Ref dst, int k) |
| 192 | { |
| 193 | if (npm == Tmp0) |
| 194 | die("cannot have more moves than registers" ); |
| 195 | pm[npm].src = src; |
| 196 | pm[npm].dst = dst; |
| 197 | pm[npm].cls = k; |
| 198 | npm++; |
| 199 | } |
| 200 | |
| 201 | enum PMStat { ToMove, Moving, Moved }; |
| 202 | |
| 203 | static int |
| 204 | pmrec(enum PMStat *status, int i, int *k) |
| 205 | { |
| 206 | int j, c; |
| 207 | |
| 208 | /* note, this routine might emit |
| 209 | * too many large instructions |
| 210 | */ |
| 211 | if (req(pm[i].src, pm[i].dst)) { |
| 212 | status[i] = Moved; |
| 213 | return -1; |
| 214 | } |
| 215 | assert(KBASE(pm[i].cls) == KBASE(*k)); |
| 216 | assert((Kw|Kl) == Kl && (Ks|Kd) == Kd); |
| 217 | *k |= pm[i].cls; |
| 218 | for (j=0; j<npm; j++) |
| 219 | if (req(pm[j].dst, pm[i].src)) |
| 220 | break; |
| 221 | switch (j == npm ? Moved : status[j]) { |
| 222 | case Moving: |
| 223 | c = j; /* start of cycle */ |
| 224 | emit(Oswap, *k, R, pm[i].src, pm[i].dst); |
| 225 | break; |
| 226 | case ToMove: |
| 227 | status[i] = Moving; |
| 228 | c = pmrec(status, j, k); |
| 229 | if (c == i) { |
| 230 | c = -1; /* end of cycle */ |
| 231 | break; |
| 232 | } |
| 233 | if (c != -1) { |
| 234 | emit(Oswap, *k, R, pm[i].src, pm[i].dst); |
| 235 | break; |
| 236 | } |
| 237 | /* fall through */ |
| 238 | case Moved: |
| 239 | c = -1; |
| 240 | emit(Ocopy, pm[i].cls, pm[i].dst, pm[i].src, R); |
| 241 | break; |
| 242 | } |
| 243 | status[i] = Moved; |
| 244 | return c; |
| 245 | } |
| 246 | |
| 247 | static void |
| 248 | pmgen() |
| 249 | { |
| 250 | int i; |
| 251 | enum PMStat *status; |
| 252 | |
| 253 | status = alloc(npm * sizeof status[0]); |
| 254 | assert(!npm || status[npm-1] == ToMove); |
| 255 | for (i=0; i<npm; i++) |
| 256 | if (status[i] == ToMove) |
| 257 | pmrec(status, i, (int[]){pm[i].cls}); |
| 258 | } |
| 259 | |
| 260 | static void |
| 261 | move(int r, Ref to, RMap *m) |
| 262 | { |
| 263 | int n, t, r1; |
| 264 | |
| 265 | r1 = req(to, R) ? -1 : rfree(m, to.val); |
| 266 | if (bshas(m->b, r)) { |
| 267 | /* r is used and not by to */ |
| 268 | assert(r1 != r); |
| 269 | for (n=0; m->r[n] != r; n++) |
| 270 | assert(n+1 < m->n); |
| 271 | t = m->t[n]; |
| 272 | rfree(m, t); |
| 273 | bsset(m->b, r); |
| 274 | ralloc(m, t); |
| 275 | bsclr(m->b, r); |
| 276 | } |
| 277 | t = req(to, R) ? r : to.val; |
| 278 | radd(m, t, r); |
| 279 | } |
| 280 | |
| 281 | static int |
| 282 | regcpy(Ins *i) |
| 283 | { |
| 284 | return i->op == Ocopy && isreg(i->arg[0]); |
| 285 | } |
| 286 | |
| 287 | static Ins * |
| 288 | dopm(Blk *b, Ins *i, RMap *m) |
| 289 | { |
| 290 | RMap m0; |
| 291 | int n, r, r1, t, s; |
| 292 | Ins *i1, *ip; |
| 293 | bits def; |
| 294 | |
| 295 | m0 = *m; /* okay since we don't use m0.b */ |
| 296 | m0.b->t = 0; |
| 297 | i1 = ++i; |
| 298 | do { |
| 299 | i--; |
| 300 | move(i->arg[0].val, i->to, m); |
| 301 | } while (i != b->ins && regcpy(i-1)); |
| 302 | assert(m0.n <= m->n); |
| 303 | if (i != b->ins && (i-1)->op == Ocall) { |
| 304 | def = T.retregs((i-1)->arg[1], 0) | T.rglob; |
| 305 | for (r=0; T.rsave[r]>=0; r++) |
| 306 | if (!(BIT(T.rsave[r]) & def)) |
| 307 | move(T.rsave[r], R, m); |
| 308 | } |
| 309 | for (npm=0, n=0; n<m->n; n++) { |
| 310 | t = m->t[n]; |
| 311 | s = tmp[t].slot; |
| 312 | r1 = m->r[n]; |
| 313 | r = rfind(&m0, t); |
| 314 | if (r != -1) |
| 315 | pmadd(TMP(r1), TMP(r), tmp[t].cls); |
| 316 | else if (s != -1) |
| 317 | pmadd(TMP(r1), SLOT(s), tmp[t].cls); |
| 318 | } |
| 319 | for (ip=i; ip<i1; ip++) { |
| 320 | if (!req(ip->to, R)) |
| 321 | rfree(m, ip->to.val); |
| 322 | r = ip->arg[0].val; |
| 323 | if (rfind(m, r) == -1) |
| 324 | radd(m, r, r); |
| 325 | } |
| 326 | pmgen(); |
| 327 | return i; |
| 328 | } |
| 329 | |
| 330 | static int |
| 331 | prio1(Ref r1, Ref r2) |
| 332 | { |
| 333 | /* trivial heuristic to begin with, |
| 334 | * later we can use the distance to |
| 335 | * the definition instruction |
| 336 | */ |
| 337 | (void) r2; |
| 338 | return *hint(r1.val) != -1; |
| 339 | } |
| 340 | |
| 341 | static void |
| 342 | insert(Ref *r, Ref **rs, int p) |
| 343 | { |
| 344 | int i; |
| 345 | |
| 346 | rs[i = p] = r; |
| 347 | while (i-- > 0 && prio1(*r, *rs[i])) { |
| 348 | rs[i+1] = rs[i]; |
| 349 | rs[i] = r; |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | static void |
| 354 | doblk(Blk *b, RMap *cur) |
| 355 | { |
| 356 | int t, x, r, rf, rt, nr; |
| 357 | bits rs; |
| 358 | Ins *i, *i1; |
| 359 | Mem *m; |
| 360 | Ref *ra[4]; |
| 361 | |
| 362 | for (r=0; bsiter(b->out, &r) && r<Tmp0; r++) |
| 363 | radd(cur, r, r); |
| 364 | if (rtype(b->jmp.arg) == RTmp) |
| 365 | b->jmp.arg = ralloc(cur, b->jmp.arg.val); |
| 366 | curi = &insb[NIns]; |
| 367 | for (i1=&b->ins[b->nins]; i1!=b->ins;) { |
| 368 | emiti(*--i1); |
| 369 | i = curi; |
| 370 | rf = -1; |
| 371 | switch (i->op) { |
| 372 | case Ocall: |
| 373 | rs = T.argregs(i->arg[1], 0) | T.rglob; |
| 374 | for (r=0; T.rsave[r]>=0; r++) |
| 375 | if (!(BIT(T.rsave[r]) & rs)) |
| 376 | rfree(cur, T.rsave[r]); |
| 377 | break; |
| 378 | case Ocopy: |
| 379 | if (regcpy(i)) { |
| 380 | curi++; |
| 381 | i1 = dopm(b, i1, cur); |
| 382 | stmov += i+1 - curi; |
| 383 | continue; |
| 384 | } |
| 385 | if (isreg(i->to)) |
| 386 | if (rtype(i->arg[0]) == RTmp) |
| 387 | sethint(i->arg[0].val, i->to.val); |
| 388 | /* fall through */ |
| 389 | default: |
| 390 | if (!req(i->to, R)) { |
| 391 | assert(rtype(i->to) == RTmp); |
| 392 | r = i->to.val; |
| 393 | if (r < Tmp0 && (BIT(r) & T.rglob)) |
| 394 | break; |
| 395 | rf = rfree(cur, r); |
| 396 | if (rf == -1) { |
| 397 | assert(!isreg(i->to)); |
| 398 | curi++; |
| 399 | continue; |
| 400 | } |
| 401 | i->to = TMP(rf); |
| 402 | } |
| 403 | break; |
| 404 | } |
| 405 | for (x=0, nr=0; x<2; x++) |
| 406 | switch (rtype(i->arg[x])) { |
| 407 | case RMem: |
| 408 | m = &mem[i->arg[x].val]; |
| 409 | if (rtype(m->base) == RTmp) |
| 410 | insert(&m->base, ra, nr++); |
| 411 | if (rtype(m->index) == RTmp) |
| 412 | insert(&m->index, ra, nr++); |
| 413 | break; |
| 414 | case RTmp: |
| 415 | insert(&i->arg[x], ra, nr++); |
| 416 | break; |
| 417 | } |
| 418 | for (r=0; r<nr; r++) |
| 419 | *ra[r] = ralloc(cur, ra[r]->val); |
| 420 | if (i->op == Ocopy && req(i->to, i->arg[0])) |
| 421 | curi++; |
| 422 | |
| 423 | /* try to change the register of a hinted |
| 424 | * temporary if rf is available */ |
| 425 | if (rf != -1 && (t = cur->w[rf]) != 0) |
| 426 | if (!bshas(cur->b, rf) && *hint(t) == rf |
| 427 | && (rt = rfree(cur, t)) != -1) { |
| 428 | tmp[t].visit = -1; |
| 429 | ralloc(cur, t); |
| 430 | assert(bshas(cur->b, rf)); |
| 431 | emit(Ocopy, tmp[t].cls, TMP(rt), TMP(rf), R); |
| 432 | stmov += 1; |
| 433 | cur->w[rf] = 0; |
| 434 | for (r=0; r<nr; r++) |
| 435 | if (req(*ra[r], TMP(rt))) |
| 436 | *ra[r] = TMP(rf); |
| 437 | /* one could iterate this logic with |
| 438 | * the newly freed rt, but in this case |
| 439 | * the above loop must be changed */ |
| 440 | } |
| 441 | } |
| 442 | b->nins = &insb[NIns] - curi; |
| 443 | idup(&b->ins, curi, b->nins); |
| 444 | } |
| 445 | |
| 446 | /* qsort() comparison function to peel |
| 447 | * loop nests from inside out */ |
| 448 | static int |
| 449 | carve(const void *a, const void *b) |
| 450 | { |
| 451 | Blk *ba, *bb; |
| 452 | |
| 453 | /* todo, evaluate if this order is really |
| 454 | * better than the simple postorder */ |
| 455 | ba = *(Blk**)a; |
| 456 | bb = *(Blk**)b; |
| 457 | if (ba->loop == bb->loop) |
| 458 | return ba->id > bb->id ? -1 : ba->id < bb->id; |
| 459 | return ba->loop > bb->loop ? -1 : +1; |
| 460 | } |
| 461 | |
| 462 | /* comparison function to order temporaries |
| 463 | * for allocation at the end of blocks */ |
| 464 | static int |
| 465 | prio2(int t1, int t2) |
| 466 | { |
| 467 | if ((tmp[t1].visit ^ tmp[t2].visit) < 0) /* != signs */ |
| 468 | return tmp[t1].visit != -1 ? +1 : -1; |
| 469 | if ((*hint(t1) ^ *hint(t2)) < 0) |
| 470 | return *hint(t1) != -1 ? +1 : -1; |
| 471 | return tmp[t1].cost - tmp[t2].cost; |
| 472 | } |
| 473 | |
| 474 | /* register allocation |
| 475 | * depends on rpo, phi, cost, (and obviously spill) |
| 476 | */ |
| 477 | void |
| 478 | rega(Fn *fn) |
| 479 | { |
| 480 | int j, t, r, x, rl[Tmp0]; |
| 481 | Blk *b, *b1, *s, ***ps, *blist, **blk, **bp; |
| 482 | RMap *end, *beg, cur, old, *m; |
| 483 | Ins *i; |
| 484 | Phi *p; |
| 485 | uint u, n; |
| 486 | Ref src, dst; |
| 487 | |
| 488 | /* 1. setup */ |
| 489 | stmov = 0; |
| 490 | stblk = 0; |
| 491 | regu = 0; |
| 492 | tmp = fn->tmp; |
| 493 | mem = fn->mem; |
| 494 | blk = alloc(fn->nblk * sizeof blk[0]); |
| 495 | end = alloc(fn->nblk * sizeof end[0]); |
| 496 | beg = alloc(fn->nblk * sizeof beg[0]); |
| 497 | for (n=0; n<fn->nblk; n++) { |
| 498 | bsinit(end[n].b, fn->ntmp); |
| 499 | bsinit(beg[n].b, fn->ntmp); |
| 500 | } |
| 501 | bsinit(cur.b, fn->ntmp); |
| 502 | bsinit(old.b, fn->ntmp); |
| 503 | |
| 504 | loop = INT_MAX; |
| 505 | for (t=0; t<fn->ntmp; t++) { |
| 506 | tmp[t].hint.r = t < Tmp0 ? t : -1; |
| 507 | tmp[t].hint.w = loop; |
| 508 | tmp[t].visit = -1; |
| 509 | } |
| 510 | for (bp=blk, b=fn->start; b; b=b->link) |
| 511 | *bp++ = b; |
| 512 | qsort(blk, fn->nblk, sizeof blk[0], carve); |
| 513 | for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++) |
| 514 | if (i->op != Ocopy || !isreg(i->arg[0])) |
| 515 | break; |
| 516 | else { |
| 517 | assert(rtype(i->to) == RTmp); |
| 518 | sethint(i->to.val, i->arg[0].val); |
| 519 | } |
| 520 | |
| 521 | /* 2. assign registers */ |
| 522 | for (bp=blk; bp<&blk[fn->nblk]; bp++) { |
| 523 | b = *bp; |
| 524 | n = b->id; |
| 525 | loop = b->loop; |
| 526 | cur.n = 0; |
| 527 | bszero(cur.b); |
| 528 | memset(cur.w, 0, sizeof cur.w); |
| 529 | for (x=0, t=Tmp0; bsiter(b->out, &t); t++) { |
| 530 | j = x++; |
| 531 | rl[j] = t; |
| 532 | while (j-- > 0 && prio2(t, rl[j]) > 0) { |
| 533 | rl[j+1] = rl[j]; |
| 534 | rl[j] = t; |
| 535 | } |
| 536 | } |
| 537 | for (j=0; j<x; j++) |
| 538 | ralloctry(&cur, rl[j], 1); |
| 539 | for (j=0; j<x; j++) |
| 540 | ralloc(&cur, rl[j]); |
| 541 | rcopy(&end[n], &cur); |
| 542 | doblk(b, &cur); |
| 543 | bscopy(b->in, cur.b); |
| 544 | for (p=b->phi; p; p=p->link) |
| 545 | if (rtype(p->to) == RTmp) |
| 546 | bsclr(b->in, p->to.val); |
| 547 | rcopy(&beg[n], &cur); |
| 548 | } |
| 549 | |
| 550 | /* 3. emit copies shared by multiple edges |
| 551 | * to the same block */ |
| 552 | for (s=fn->start; s; s=s->link) { |
| 553 | if (s->npred <= 1) |
| 554 | continue; |
| 555 | m = &beg[s->id]; |
| 556 | |
| 557 | /* rl maps a register that is live at the |
| 558 | * beginning of s to the one used in all |
| 559 | * predecessors (if any, -1 otherwise) */ |
| 560 | memset(rl, 0, sizeof rl); |
| 561 | |
| 562 | /* to find the register of a phi in a |
| 563 | * predecessor, we have to find the |
| 564 | * corresponding argument */ |
| 565 | for (p=s->phi; p; p=p->link) { |
| 566 | if (rtype(p->to) != RTmp |
| 567 | || (r=rfind(m, p->to.val)) == -1) |
| 568 | continue; |
| 569 | for (u=0; u<p->narg; u++) { |
| 570 | b = p->blk[u]; |
| 571 | src = p->arg[u]; |
| 572 | if (rtype(src) != RTmp) |
| 573 | continue; |
| 574 | x = rfind(&end[b->id], src.val); |
| 575 | if (x == -1) /* spilled */ |
| 576 | continue; |
| 577 | rl[r] = (!rl[r] || rl[r] == x) ? x : -1; |
| 578 | } |
| 579 | if (rl[r] == 0) |
| 580 | rl[r] = -1; |
| 581 | } |
| 582 | |
| 583 | /* process non-phis temporaries */ |
| 584 | for (j=0; j<m->n; j++) { |
| 585 | t = m->t[j]; |
| 586 | r = m->r[j]; |
| 587 | if (rl[r] || t < Tmp0 /* todo, remove this */) |
| 588 | continue; |
| 589 | for (bp=s->pred; bp<&s->pred[s->npred]; bp++) { |
| 590 | x = rfind(&end[(*bp)->id], t); |
| 591 | if (x == -1) /* spilled */ |
| 592 | continue; |
| 593 | rl[r] = (!rl[r] || rl[r] == x) ? x : -1; |
| 594 | } |
| 595 | if (rl[r] == 0) |
| 596 | rl[r] = -1; |
| 597 | } |
| 598 | |
| 599 | npm = 0; |
| 600 | for (j=0; j<m->n; j++) { |
| 601 | t = m->t[j]; |
| 602 | r = m->r[j]; |
| 603 | x = rl[r]; |
| 604 | assert(x != 0 || t < Tmp0 /* todo, ditto */); |
| 605 | if (x > 0 && !bshas(m->b, x)) { |
| 606 | pmadd(TMP(x), TMP(r), tmp[t].cls); |
| 607 | m->r[j] = x; |
| 608 | bsset(m->b, x); |
| 609 | } |
| 610 | } |
| 611 | curi = &insb[NIns]; |
| 612 | pmgen(); |
| 613 | j = &insb[NIns] - curi; |
| 614 | if (j == 0) |
| 615 | continue; |
| 616 | stmov += j; |
| 617 | s->nins += j; |
| 618 | i = alloc(s->nins * sizeof(Ins)); |
| 619 | icpy(icpy(i, curi, j), s->ins, s->nins-j); |
| 620 | s->ins = i; |
| 621 | } |
| 622 | |
| 623 | if (debug['R']) { |
| 624 | fprintf(stderr, "\n> Register mappings:\n" ); |
| 625 | for (n=0; n<fn->nblk; n++) { |
| 626 | b = fn->rpo[n]; |
| 627 | fprintf(stderr, "\t%-10s beg" , b->name); |
| 628 | mdump(&beg[n]); |
| 629 | fprintf(stderr, "\t end" ); |
| 630 | mdump(&end[n]); |
| 631 | } |
| 632 | fprintf(stderr, "\n" ); |
| 633 | } |
| 634 | |
| 635 | /* 4. emit remaining copies in new blocks */ |
| 636 | blist = 0; |
| 637 | for (b=fn->start;; b=b->link) { |
| 638 | ps = (Blk**[3]){&b->s1, &b->s2, (Blk*[1]){0}}; |
| 639 | for (; (s=**ps); ps++) { |
| 640 | npm = 0; |
| 641 | for (p=s->phi; p; p=p->link) { |
| 642 | dst = p->to; |
| 643 | assert(rtype(dst)==RSlot || rtype(dst)==RTmp); |
| 644 | if (rtype(dst) == RTmp) { |
| 645 | r = rfind(&beg[s->id], dst.val); |
| 646 | if (r == -1) |
| 647 | continue; |
| 648 | dst = TMP(r); |
| 649 | } |
| 650 | for (u=0; p->blk[u]!=b; u++) |
| 651 | assert(u+1 < p->narg); |
| 652 | src = p->arg[u]; |
| 653 | if (rtype(src) == RTmp) |
| 654 | src = rref(&end[b->id], src.val); |
| 655 | pmadd(src, dst, p->cls); |
| 656 | } |
| 657 | for (t=Tmp0; bsiter(s->in, &t); t++) { |
| 658 | src = rref(&end[b->id], t); |
| 659 | dst = rref(&beg[s->id], t); |
| 660 | pmadd(src, dst, tmp[t].cls); |
| 661 | } |
| 662 | curi = &insb[NIns]; |
| 663 | pmgen(); |
| 664 | if (curi == &insb[NIns]) |
| 665 | continue; |
| 666 | b1 = blknew(); |
| 667 | b1->loop = (b->loop+s->loop) / 2; |
| 668 | b1->link = blist; |
| 669 | blist = b1; |
| 670 | fn->nblk++; |
| 671 | (void)!snprintf(b1->name, sizeof(b1->name), "%s_%s" , b->name, s->name); |
| 672 | b1->nins = &insb[NIns] - curi; |
| 673 | stmov += b1->nins; |
| 674 | stblk += 1; |
| 675 | idup(&b1->ins, curi, b1->nins); |
| 676 | b1->jmp.type = Jjmp; |
| 677 | b1->s1 = s; |
| 678 | **ps = b1; |
| 679 | } |
| 680 | if (!b->link) { |
| 681 | b->link = blist; |
| 682 | break; |
| 683 | } |
| 684 | } |
| 685 | for (b=fn->start; b; b=b->link) |
| 686 | b->phi = 0; |
| 687 | fn->reg = regu; |
| 688 | |
| 689 | if (debug['R']) { |
| 690 | fprintf(stderr, "\n> Register allocation statistics:\n" ); |
| 691 | fprintf(stderr, "\tnew moves: %d\n" , stmov); |
| 692 | fprintf(stderr, "\tnew blocks: %d\n" , stblk); |
| 693 | fprintf(stderr, "\n> After register allocation:\n" ); |
| 694 | printfn(fn, stderr); |
| 695 | } |
| 696 | } |
| 697 | |