| 1 | #include "all.h" |
| 2 | |
| 3 | #define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */ |
| 4 | |
| 5 | typedef struct Loc Loc; |
| 6 | typedef struct Slice Slice; |
| 7 | typedef struct Insert Insert; |
| 8 | |
| 9 | |
| 10 | struct Loc { |
| 11 | enum { |
| 12 | LRoot, /* right above the original load */ |
| 13 | LLoad, /* inserting a load is allowed */ |
| 14 | LNoLoad, /* only scalar operations allowed */ |
| 15 | } type; |
| 16 | uint off; |
| 17 | Blk *blk; |
| 18 | }; |
| 19 | |
| 20 | struct Slice { |
| 21 | Ref ref; |
| 22 | short sz; |
| 23 | short cls; /* load class */ |
| 24 | }; |
| 25 | |
| 26 | struct Insert { |
| 27 | uint isphi:1; |
| 28 | uint num:31; |
| 29 | uint bid; |
| 30 | uint off; |
| 31 | union { |
| 32 | Ins ins; |
| 33 | struct { |
| 34 | Slice m; |
| 35 | Phi *p; |
| 36 | } phi; |
| 37 | } new; |
| 38 | }; |
| 39 | |
| 40 | static Fn *curf; |
| 41 | static uint inum; /* current insertion number */ |
| 42 | static Insert *ilog; /* global insertion log */ |
| 43 | static uint nlog; /* number of entries in the log */ |
| 44 | |
| 45 | int |
| 46 | loadsz(Ins *l) |
| 47 | { |
| 48 | switch (l->op) { |
| 49 | case Oloadsb: case Oloadub: return 1; |
| 50 | case Oloadsh: case Oloaduh: return 2; |
| 51 | case Oloadsw: case Oloaduw: return 4; |
| 52 | case Oload: return KWIDE(l->cls) ? 8 : 4; |
| 53 | } |
| 54 | die("unreachable" ); |
| 55 | } |
| 56 | |
| 57 | int |
| 58 | storesz(Ins *s) |
| 59 | { |
| 60 | switch (s->op) { |
| 61 | case Ostoreb: return 1; |
| 62 | case Ostoreh: return 2; |
| 63 | case Ostorew: case Ostores: return 4; |
| 64 | case Ostorel: case Ostored: return 8; |
| 65 | } |
| 66 | die("unreachable" ); |
| 67 | } |
| 68 | |
| 69 | static Ref |
| 70 | iins(int cls, int op, Ref a0, Ref a1, Loc *l) |
| 71 | { |
| 72 | Insert *ist; |
| 73 | |
| 74 | vgrow(&ilog, ++nlog); |
| 75 | ist = &ilog[nlog-1]; |
| 76 | ist->isphi = 0; |
| 77 | ist->num = inum++; |
| 78 | ist->bid = l->blk->id; |
| 79 | ist->off = l->off; |
| 80 | ist->new.ins = (Ins){op, cls, R, {a0, a1}}; |
| 81 | return ist->new.ins.to = newtmp("ld" , cls, curf); |
| 82 | } |
| 83 | |
| 84 | static void |
| 85 | cast(Ref *r, int cls, Loc *l) |
| 86 | { |
| 87 | int cls0; |
| 88 | |
| 89 | if (rtype(*r) == RCon) |
| 90 | return; |
| 91 | assert(rtype(*r) == RTmp); |
| 92 | cls0 = curf->tmp[r->val].cls; |
| 93 | if (cls0 == cls || (cls == Kw && cls0 == Kl)) |
| 94 | return; |
| 95 | if (KWIDE(cls0) < KWIDE(cls)) { |
| 96 | if (cls0 == Ks) |
| 97 | *r = iins(Kw, Ocast, *r, R, l); |
| 98 | *r = iins(Kl, Oextuw, *r, R, l); |
| 99 | if (cls == Kd) |
| 100 | *r = iins(Kd, Ocast, *r, R, l); |
| 101 | } else { |
| 102 | if (cls0 == Kd && cls != Kl) |
| 103 | *r = iins(Kl, Ocast, *r, R, l); |
| 104 | if (cls0 != Kd || cls != Kw) |
| 105 | *r = iins(cls, Ocast, *r, R, l); |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | static inline void |
| 110 | mask(int cls, Ref *r, bits msk, Loc *l) |
| 111 | { |
| 112 | cast(r, cls, l); |
| 113 | *r = iins(cls, Oand, *r, getcon(msk, curf), l); |
| 114 | } |
| 115 | |
| 116 | static Ref |
| 117 | load(Slice sl, bits msk, Loc *l) |
| 118 | { |
| 119 | Alias *a; |
| 120 | Ref r, r1; |
| 121 | int ld, cls, all; |
| 122 | Con c; |
| 123 | |
| 124 | ld = (int[]){ |
| 125 | [1] = Oloadub, |
| 126 | [2] = Oloaduh, |
| 127 | [4] = Oloaduw, |
| 128 | [8] = Oload |
| 129 | }[sl.sz]; |
| 130 | all = msk == MASK(sl.sz); |
| 131 | if (all) |
| 132 | cls = sl.cls; |
| 133 | else |
| 134 | cls = sl.sz > 4 ? Kl : Kw; |
| 135 | r = sl.ref; |
| 136 | /* sl.ref might not be live here, |
| 137 | * but its alias base ref will be |
| 138 | * (see killsl() below) */ |
| 139 | if (rtype(r) == RTmp) { |
| 140 | a = &curf->tmp[r.val].alias; |
| 141 | switch (a->type) { |
| 142 | default: |
| 143 | die("unreachable" ); |
| 144 | case ALoc: |
| 145 | case AEsc: |
| 146 | case AUnk: |
| 147 | r = a->base; |
| 148 | if (!a->offset) |
| 149 | break; |
| 150 | r1 = getcon(a->offset, curf); |
| 151 | r = iins(Kl, Oadd, r, r1, l); |
| 152 | break; |
| 153 | case ACon: |
| 154 | case ASym: |
| 155 | c.type = CAddr; |
| 156 | c.label = a->label; |
| 157 | c.bits.i = a->offset; |
| 158 | c.local = 0; |
| 159 | r = newcon(&c, curf); |
| 160 | break; |
| 161 | } |
| 162 | } |
| 163 | r = iins(cls, ld, r, R, l); |
| 164 | if (!all) |
| 165 | mask(cls, &r, msk, l); |
| 166 | return r; |
| 167 | } |
| 168 | |
| 169 | static int |
| 170 | killsl(Ref r, Slice sl) |
| 171 | { |
| 172 | Alias *a; |
| 173 | |
| 174 | if (rtype(sl.ref) != RTmp) |
| 175 | return 0; |
| 176 | a = &curf->tmp[sl.ref.val].alias; |
| 177 | switch (a->type) { |
| 178 | default: die("unreachable" ); |
| 179 | case ALoc: |
| 180 | case AEsc: |
| 181 | case AUnk: return req(a->base, r); |
| 182 | case ACon: |
| 183 | case ASym: return 0; |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | /* returns a ref containing the contents of the slice |
| 188 | * passed as argument, all the bits set to 0 in the |
| 189 | * mask argument are zeroed in the result; |
| 190 | * the returned ref has an integer class when the |
| 191 | * mask does not cover all the bits of the slice, |
| 192 | * otherwise, it has class sl.cls |
| 193 | * the procedure returns R when it fails */ |
| 194 | static Ref |
| 195 | def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il) |
| 196 | { |
| 197 | Blk *bp; |
| 198 | bits msk1, msks; |
| 199 | int off, cls, cls1, op, sz, ld; |
| 200 | uint np, oldl, oldt; |
| 201 | Ref r, r1; |
| 202 | Phi *p; |
| 203 | Insert *ist; |
| 204 | Loc l; |
| 205 | |
| 206 | /* invariants: |
| 207 | * -1- b dominates il->blk; so we can use |
| 208 | * temporaries of b in il->blk |
| 209 | * -2- if il->type != LNoLoad, then il->blk |
| 210 | * postdominates the original load; so it |
| 211 | * is safe to load in il->blk |
| 212 | * -3- if il->type != LNoLoad, then b |
| 213 | * postdominates il->blk (and by 2, the |
| 214 | * original load) |
| 215 | */ |
| 216 | assert(dom(b, il->blk)); |
| 217 | oldl = nlog; |
| 218 | oldt = curf->ntmp; |
| 219 | if (0) { |
| 220 | Load: |
| 221 | curf->ntmp = oldt; |
| 222 | nlog = oldl; |
| 223 | if (il->type != LLoad) |
| 224 | return R; |
| 225 | return load(sl, msk, il); |
| 226 | } |
| 227 | |
| 228 | if (!i) |
| 229 | i = &b->ins[b->nins]; |
| 230 | cls = sl.sz > 4 ? Kl : Kw; |
| 231 | msks = MASK(sl.sz); |
| 232 | |
| 233 | while (i > b->ins) { |
| 234 | --i; |
| 235 | if (killsl(i->to, sl) |
| 236 | || (i->op == Ocall && escapes(sl.ref, curf))) |
| 237 | goto Load; |
| 238 | ld = isload(i->op); |
| 239 | if (ld) { |
| 240 | sz = loadsz(i); |
| 241 | r1 = i->arg[0]; |
| 242 | r = i->to; |
| 243 | } else if (isstore(i->op)) { |
| 244 | sz = storesz(i); |
| 245 | r1 = i->arg[1]; |
| 246 | r = i->arg[0]; |
| 247 | } else |
| 248 | continue; |
| 249 | switch (alias(sl.ref, sl.sz, r1, sz, &off, curf)) { |
| 250 | case MustAlias: |
| 251 | if (off < 0) { |
| 252 | off = -off; |
| 253 | msk1 = (MASK(sz) << 8*off) & msks; |
| 254 | op = Oshl; |
| 255 | } else { |
| 256 | msk1 = (MASK(sz) >> 8*off) & msks; |
| 257 | op = Oshr; |
| 258 | } |
| 259 | if ((msk1 & msk) == 0) |
| 260 | break; |
| 261 | if (off) { |
| 262 | cls1 = cls; |
| 263 | if (op == Oshr && off + sl.sz > 4) |
| 264 | cls1 = Kl; |
| 265 | cast(&r, cls1, il); |
| 266 | r1 = getcon(8*off, curf); |
| 267 | r = iins(cls1, op, r, r1, il); |
| 268 | } |
| 269 | if ((msk1 & msk) != msk1 || off + sz < sl.sz) |
| 270 | mask(cls, &r, msk1 & msk, il); |
| 271 | if ((msk & ~msk1) != 0) { |
| 272 | r1 = def(sl, msk & ~msk1, b, i, il); |
| 273 | if (req(r1, R)) |
| 274 | goto Load; |
| 275 | r = iins(cls, Oor, r, r1, il); |
| 276 | } |
| 277 | if (msk == msks) |
| 278 | cast(&r, sl.cls, il); |
| 279 | return r; |
| 280 | case MayAlias: |
| 281 | if (ld) |
| 282 | break; |
| 283 | else |
| 284 | goto Load; |
| 285 | case NoAlias: |
| 286 | break; |
| 287 | default: |
| 288 | die("unreachable" ); |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | for (ist=ilog; ist<&ilog[nlog]; ++ist) |
| 293 | if (ist->isphi && ist->bid == b->id) |
| 294 | if (req(ist->new.phi.m.ref, sl.ref)) |
| 295 | if (ist->new.phi.m.sz == sl.sz) { |
| 296 | r = ist->new.phi.p->to; |
| 297 | if (msk != msks) |
| 298 | mask(cls, &r, msk, il); |
| 299 | else |
| 300 | cast(&r, sl.cls, il); |
| 301 | return r; |
| 302 | } |
| 303 | |
| 304 | for (p=b->phi; p; p=p->link) |
| 305 | if (killsl(p->to, sl)) |
| 306 | /* scanning predecessors in that |
| 307 | * case would be unsafe */ |
| 308 | goto Load; |
| 309 | |
| 310 | if (b->npred == 0) |
| 311 | goto Load; |
| 312 | if (b->npred == 1) { |
| 313 | bp = b->pred[0]; |
| 314 | assert(bp->loop >= il->blk->loop); |
| 315 | l = *il; |
| 316 | if (bp->s2) |
| 317 | l.type = LNoLoad; |
| 318 | r1 = def(sl, msk, bp, 0, &l); |
| 319 | if (req(r1, R)) |
| 320 | goto Load; |
| 321 | return r1; |
| 322 | } |
| 323 | |
| 324 | r = newtmp("ld" , sl.cls, curf); |
| 325 | p = alloc(sizeof *p); |
| 326 | vgrow(&ilog, ++nlog); |
| 327 | ist = &ilog[nlog-1]; |
| 328 | ist->isphi = 1; |
| 329 | ist->bid = b->id; |
| 330 | ist->new.phi.m = sl; |
| 331 | ist->new.phi.p = p; |
| 332 | p->to = r; |
| 333 | p->cls = sl.cls; |
| 334 | p->narg = b->npred; |
| 335 | p->arg = vnew(p->narg, sizeof p->arg[0], Pfn); |
| 336 | p->blk = vnew(p->narg, sizeof p->blk[0], Pfn); |
| 337 | for (np=0; np<b->npred; ++np) { |
| 338 | bp = b->pred[np]; |
| 339 | if (!bp->s2 |
| 340 | && il->type != LNoLoad |
| 341 | && bp->loop < il->blk->loop) |
| 342 | l.type = LLoad; |
| 343 | else |
| 344 | l.type = LNoLoad; |
| 345 | l.blk = bp; |
| 346 | l.off = bp->nins; |
| 347 | r1 = def(sl, msks, bp, 0, &l); |
| 348 | if (req(r1, R)) |
| 349 | goto Load; |
| 350 | p->arg[np] = r1; |
| 351 | p->blk[np] = bp; |
| 352 | } |
| 353 | if (msk != msks) |
| 354 | mask(cls, &r, msk, il); |
| 355 | return r; |
| 356 | } |
| 357 | |
| 358 | static int |
| 359 | icmp(const void *pa, const void *pb) |
| 360 | { |
| 361 | Insert *a, *b; |
| 362 | int c; |
| 363 | |
| 364 | a = (Insert *)pa; |
| 365 | b = (Insert *)pb; |
| 366 | if ((c = a->bid - b->bid)) |
| 367 | return c; |
| 368 | if (a->isphi && b->isphi) |
| 369 | return 0; |
| 370 | if (a->isphi) |
| 371 | return -1; |
| 372 | if (b->isphi) |
| 373 | return +1; |
| 374 | if ((c = a->off - b->off)) |
| 375 | return c; |
| 376 | return a->num - b->num; |
| 377 | } |
| 378 | |
| 379 | /* require rpo ssa alias */ |
| 380 | void |
| 381 | loadopt(Fn *fn) |
| 382 | { |
| 383 | Ins *i, *ib; |
| 384 | Blk *b; |
| 385 | int sz; |
| 386 | uint n, ni, ext, nt; |
| 387 | Insert *ist; |
| 388 | Slice sl; |
| 389 | Loc l; |
| 390 | |
| 391 | curf = fn; |
| 392 | ilog = vnew(0, sizeof ilog[0], Pheap); |
| 393 | nlog = 0; |
| 394 | inum = 0; |
| 395 | for (b=fn->start; b; b=b->link) |
| 396 | for (i=b->ins; i<&b->ins[b->nins]; ++i) { |
| 397 | if (!isload(i->op)) |
| 398 | continue; |
| 399 | sz = loadsz(i); |
| 400 | sl = (Slice){i->arg[0], sz, i->cls}; |
| 401 | l = (Loc){LRoot, i-b->ins, b}; |
| 402 | i->arg[1] = def(sl, MASK(sz), b, i, &l); |
| 403 | } |
| 404 | qsort(ilog, nlog, sizeof ilog[0], icmp); |
| 405 | vgrow(&ilog, nlog+1); |
| 406 | ilog[nlog].bid = fn->nblk; /* add a sentinel */ |
| 407 | ib = vnew(0, sizeof(Ins), Pheap); |
| 408 | for (ist=ilog, n=0; n<fn->nblk; ++n) { |
| 409 | b = fn->rpo[n]; |
| 410 | for (; ist->bid == n && ist->isphi; ++ist) { |
| 411 | ist->new.phi.p->link = b->phi; |
| 412 | b->phi = ist->new.phi.p; |
| 413 | } |
| 414 | ni = 0; |
| 415 | nt = 0; |
| 416 | for (;;) { |
| 417 | if (ist->bid == n && ist->off == ni) |
| 418 | i = &ist++->new.ins; |
| 419 | else { |
| 420 | if (ni == b->nins) |
| 421 | break; |
| 422 | i = &b->ins[ni++]; |
| 423 | if (isload(i->op) |
| 424 | && !req(i->arg[1], R)) { |
| 425 | ext = Oextsb + i->op - Oloadsb; |
| 426 | switch (i->op) { |
| 427 | default: |
| 428 | die("unreachable" ); |
| 429 | case Oloadsb: |
| 430 | case Oloadub: |
| 431 | case Oloadsh: |
| 432 | case Oloaduh: |
| 433 | i->op = ext; |
| 434 | break; |
| 435 | case Oloadsw: |
| 436 | case Oloaduw: |
| 437 | if (i->cls == Kl) { |
| 438 | i->op = ext; |
| 439 | break; |
| 440 | } |
| 441 | /* fall through */ |
| 442 | case Oload: |
| 443 | i->op = Ocopy; |
| 444 | break; |
| 445 | } |
| 446 | i->arg[0] = i->arg[1]; |
| 447 | i->arg[1] = R; |
| 448 | } |
| 449 | } |
| 450 | vgrow(&ib, ++nt); |
| 451 | ib[nt-1] = *i; |
| 452 | } |
| 453 | b->nins = nt; |
| 454 | idup(&b->ins, ib, nt); |
| 455 | } |
| 456 | vfree(ib); |
| 457 | vfree(ilog); |
| 458 | if (debug['M']) { |
| 459 | fprintf(stderr, "\n> After load elimination:\n" ); |
| 460 | printfn(fn, stderr); |
| 461 | } |
| 462 | } |
| 463 | |