| 1 | #include "all.h" | 
|---|
| 2 |  | 
|---|
| 3 | #define MASK(w) (BIT(8*(w)-1)*2-1) /* must work when w==8 */ | 
|---|
| 4 |  | 
|---|
| 5 | typedef struct Loc Loc; | 
|---|
| 6 | typedef struct Slice Slice; | 
|---|
| 7 | typedef struct Insert Insert; | 
|---|
| 8 |  | 
|---|
| 9 |  | 
|---|
| 10 | struct Loc { | 
|---|
| 11 | enum { | 
|---|
| 12 | LRoot,   /* right above the original load */ | 
|---|
| 13 | LLoad,   /* inserting a load is allowed */ | 
|---|
| 14 | LNoLoad, /* only scalar operations allowed */ | 
|---|
| 15 | } type; | 
|---|
| 16 | uint off; | 
|---|
| 17 | Blk *blk; | 
|---|
| 18 | }; | 
|---|
| 19 |  | 
|---|
| 20 | struct Slice { | 
|---|
| 21 | Ref ref; | 
|---|
| 22 | short sz; | 
|---|
| 23 | short cls; /* load class */ | 
|---|
| 24 | }; | 
|---|
| 25 |  | 
|---|
| 26 | struct Insert { | 
|---|
| 27 | uint isphi:1; | 
|---|
| 28 | uint num:31; | 
|---|
| 29 | uint bid; | 
|---|
| 30 | uint off; | 
|---|
| 31 | union { | 
|---|
| 32 | Ins ins; | 
|---|
| 33 | struct { | 
|---|
| 34 | Slice m; | 
|---|
| 35 | Phi *p; | 
|---|
| 36 | } phi; | 
|---|
| 37 | } new; | 
|---|
| 38 | }; | 
|---|
| 39 |  | 
|---|
| 40 | static Fn *curf; | 
|---|
| 41 | static uint inum;    /* current insertion number */ | 
|---|
| 42 | static Insert *ilog; /* global insertion log */ | 
|---|
| 43 | static uint nlog;    /* number of entries in the log */ | 
|---|
| 44 |  | 
|---|
| 45 | int | 
|---|
| 46 | loadsz(Ins *l) | 
|---|
| 47 | { | 
|---|
| 48 | switch (l->op) { | 
|---|
| 49 | case Oloadsb: case Oloadub: return 1; | 
|---|
| 50 | case Oloadsh: case Oloaduh: return 2; | 
|---|
| 51 | case Oloadsw: case Oloaduw: return 4; | 
|---|
| 52 | case Oload: return KWIDE(l->cls) ? 8 : 4; | 
|---|
| 53 | } | 
|---|
| 54 | die( "unreachable"); | 
|---|
| 55 | } | 
|---|
| 56 |  | 
|---|
| 57 | int | 
|---|
| 58 | storesz(Ins *s) | 
|---|
| 59 | { | 
|---|
| 60 | switch (s->op) { | 
|---|
| 61 | case Ostoreb: return 1; | 
|---|
| 62 | case Ostoreh: return 2; | 
|---|
| 63 | case Ostorew: case Ostores: return 4; | 
|---|
| 64 | case Ostorel: case Ostored: return 8; | 
|---|
| 65 | } | 
|---|
| 66 | die( "unreachable"); | 
|---|
| 67 | } | 
|---|
| 68 |  | 
|---|
| 69 | static Ref | 
|---|
| 70 | iins(int cls, int op, Ref a0, Ref a1, Loc *l) | 
|---|
| 71 | { | 
|---|
| 72 | Insert *ist; | 
|---|
| 73 |  | 
|---|
| 74 | vgrow(&ilog, ++nlog); | 
|---|
| 75 | ist = &ilog[nlog-1]; | 
|---|
| 76 | ist->isphi = 0; | 
|---|
| 77 | ist->num = inum++; | 
|---|
| 78 | ist->bid = l->blk->id; | 
|---|
| 79 | ist->off = l->off; | 
|---|
| 80 | ist->new.ins = (Ins){op, cls, R, {a0, a1}}; | 
|---|
| 81 | return ist->new.ins.to = newtmp( "ld", cls, curf); | 
|---|
| 82 | } | 
|---|
| 83 |  | 
|---|
| 84 | static void | 
|---|
| 85 | cast(Ref *r, int cls, Loc *l) | 
|---|
| 86 | { | 
|---|
| 87 | int cls0; | 
|---|
| 88 |  | 
|---|
| 89 | if (rtype(*r) == RCon) | 
|---|
| 90 | return; | 
|---|
| 91 | assert(rtype(*r) == RTmp); | 
|---|
| 92 | cls0 = curf->tmp[r->val].cls; | 
|---|
| 93 | if (cls0 == cls || (cls == Kw && cls0 == Kl)) | 
|---|
| 94 | return; | 
|---|
| 95 | if (KWIDE(cls0) < KWIDE(cls)) { | 
|---|
| 96 | if (cls0 == Ks) | 
|---|
| 97 | *r = iins(Kw, Ocast, *r, R, l); | 
|---|
| 98 | *r = iins(Kl, Oextuw, *r, R, l); | 
|---|
| 99 | if (cls == Kd) | 
|---|
| 100 | *r = iins(Kd, Ocast, *r, R, l); | 
|---|
| 101 | } else { | 
|---|
| 102 | if (cls0 == Kd && cls != Kl) | 
|---|
| 103 | *r = iins(Kl, Ocast, *r, R, l); | 
|---|
| 104 | if (cls0 != Kd || cls != Kw) | 
|---|
| 105 | *r = iins(cls, Ocast, *r, R, l); | 
|---|
| 106 | } | 
|---|
| 107 | } | 
|---|
| 108 |  | 
|---|
| 109 | static inline void | 
|---|
| 110 | mask(int cls, Ref *r, bits msk, Loc *l) | 
|---|
| 111 | { | 
|---|
| 112 | cast(r, cls, l); | 
|---|
| 113 | *r = iins(cls, Oand, *r, getcon(msk, curf), l); | 
|---|
| 114 | } | 
|---|
| 115 |  | 
|---|
| 116 | static Ref | 
|---|
| 117 | load(Slice sl, bits msk, Loc *l) | 
|---|
| 118 | { | 
|---|
| 119 | Alias *a; | 
|---|
| 120 | Ref r, r1; | 
|---|
| 121 | int ld, cls, all; | 
|---|
| 122 | Con c; | 
|---|
| 123 |  | 
|---|
| 124 | ld = (int[]){ | 
|---|
| 125 | [1] = Oloadub, | 
|---|
| 126 | [2] = Oloaduh, | 
|---|
| 127 | [4] = Oloaduw, | 
|---|
| 128 | [8] = Oload | 
|---|
| 129 | }[sl.sz]; | 
|---|
| 130 | all = msk == MASK(sl.sz); | 
|---|
| 131 | if (all) | 
|---|
| 132 | cls = sl.cls; | 
|---|
| 133 | else | 
|---|
| 134 | cls = sl.sz > 4 ? Kl : Kw; | 
|---|
| 135 | r = sl.ref; | 
|---|
| 136 | /* sl.ref might not be live here, | 
|---|
| 137 | * but its alias base ref will be | 
|---|
| 138 | * (see killsl() below) */ | 
|---|
| 139 | if (rtype(r) == RTmp) { | 
|---|
| 140 | a = &curf->tmp[r.val].alias; | 
|---|
| 141 | switch (a->type) { | 
|---|
| 142 | default: | 
|---|
| 143 | die( "unreachable"); | 
|---|
| 144 | case ALoc: | 
|---|
| 145 | case AEsc: | 
|---|
| 146 | case AUnk: | 
|---|
| 147 | r = a->base; | 
|---|
| 148 | if (!a->offset) | 
|---|
| 149 | break; | 
|---|
| 150 | r1 = getcon(a->offset, curf); | 
|---|
| 151 | r = iins(Kl, Oadd, r, r1, l); | 
|---|
| 152 | break; | 
|---|
| 153 | case ACon: | 
|---|
| 154 | case ASym: | 
|---|
| 155 | c.type = CAddr; | 
|---|
| 156 | c.label = a->label; | 
|---|
| 157 | c.bits.i = a->offset; | 
|---|
| 158 | c.local = 0; | 
|---|
| 159 | r = newcon(&c, curf); | 
|---|
| 160 | break; | 
|---|
| 161 | } | 
|---|
| 162 | } | 
|---|
| 163 | r = iins(cls, ld, r, R, l); | 
|---|
| 164 | if (!all) | 
|---|
| 165 | mask(cls, &r, msk, l); | 
|---|
| 166 | return r; | 
|---|
| 167 | } | 
|---|
| 168 |  | 
|---|
| 169 | static int | 
|---|
| 170 | killsl(Ref r, Slice sl) | 
|---|
| 171 | { | 
|---|
| 172 | Alias *a; | 
|---|
| 173 |  | 
|---|
| 174 | if (rtype(sl.ref) != RTmp) | 
|---|
| 175 | return 0; | 
|---|
| 176 | a = &curf->tmp[sl.ref.val].alias; | 
|---|
| 177 | switch (a->type) { | 
|---|
| 178 | default:   die( "unreachable"); | 
|---|
| 179 | case ALoc: | 
|---|
| 180 | case AEsc: | 
|---|
| 181 | case AUnk: return req(a->base, r); | 
|---|
| 182 | case ACon: | 
|---|
| 183 | case ASym: return 0; | 
|---|
| 184 | } | 
|---|
| 185 | } | 
|---|
| 186 |  | 
|---|
| 187 | /* returns a ref containing the contents of the slice | 
|---|
| 188 | * passed as argument, all the bits set to 0 in the | 
|---|
| 189 | * mask argument are zeroed in the result; | 
|---|
| 190 | * the returned ref has an integer class when the | 
|---|
| 191 | * mask does not cover all the bits of the slice, | 
|---|
| 192 | * otherwise, it has class sl.cls | 
|---|
| 193 | * the procedure returns R when it fails */ | 
|---|
| 194 | static Ref | 
|---|
| 195 | def(Slice sl, bits msk, Blk *b, Ins *i, Loc *il) | 
|---|
| 196 | { | 
|---|
| 197 | Blk *bp; | 
|---|
| 198 | bits msk1, msks; | 
|---|
| 199 | int off, cls, cls1, op, sz, ld; | 
|---|
| 200 | uint np, oldl, oldt; | 
|---|
| 201 | Ref r, r1; | 
|---|
| 202 | Phi *p; | 
|---|
| 203 | Insert *ist; | 
|---|
| 204 | Loc l; | 
|---|
| 205 |  | 
|---|
| 206 | /* invariants: | 
|---|
| 207 | * -1- b dominates il->blk; so we can use | 
|---|
| 208 | *     temporaries of b in il->blk | 
|---|
| 209 | * -2- if il->type != LNoLoad, then il->blk | 
|---|
| 210 | *     postdominates the original load; so it | 
|---|
| 211 | *     is safe to load in il->blk | 
|---|
| 212 | * -3- if il->type != LNoLoad, then b | 
|---|
| 213 | *     postdominates il->blk (and by 2, the | 
|---|
| 214 | *     original load) | 
|---|
| 215 | */ | 
|---|
| 216 | assert(dom(b, il->blk)); | 
|---|
| 217 | oldl = nlog; | 
|---|
| 218 | oldt = curf->ntmp; | 
|---|
| 219 | if (0) { | 
|---|
| 220 | Load: | 
|---|
| 221 | curf->ntmp = oldt; | 
|---|
| 222 | nlog = oldl; | 
|---|
| 223 | if (il->type != LLoad) | 
|---|
| 224 | return R; | 
|---|
| 225 | return load(sl, msk, il); | 
|---|
| 226 | } | 
|---|
| 227 |  | 
|---|
| 228 | if (!i) | 
|---|
| 229 | i = &b->ins[b->nins]; | 
|---|
| 230 | cls = sl.sz > 4 ? Kl : Kw; | 
|---|
| 231 | msks = MASK(sl.sz); | 
|---|
| 232 |  | 
|---|
| 233 | while (i > b->ins) { | 
|---|
| 234 | --i; | 
|---|
| 235 | if (killsl(i->to, sl) | 
|---|
| 236 | || (i->op == Ocall && escapes(sl.ref, curf))) | 
|---|
| 237 | goto Load; | 
|---|
| 238 | ld = isload(i->op); | 
|---|
| 239 | if (ld) { | 
|---|
| 240 | sz = loadsz(i); | 
|---|
| 241 | r1 = i->arg[0]; | 
|---|
| 242 | r = i->to; | 
|---|
| 243 | } else if (isstore(i->op)) { | 
|---|
| 244 | sz = storesz(i); | 
|---|
| 245 | r1 = i->arg[1]; | 
|---|
| 246 | r = i->arg[0]; | 
|---|
| 247 | } else | 
|---|
| 248 | continue; | 
|---|
| 249 | switch (alias(sl.ref, sl.sz, r1, sz, &off, curf)) { | 
|---|
| 250 | case MustAlias: | 
|---|
| 251 | if (off < 0) { | 
|---|
| 252 | off = -off; | 
|---|
| 253 | msk1 = (MASK(sz) << 8*off) & msks; | 
|---|
| 254 | op = Oshl; | 
|---|
| 255 | } else { | 
|---|
| 256 | msk1 = (MASK(sz) >> 8*off) & msks; | 
|---|
| 257 | op = Oshr; | 
|---|
| 258 | } | 
|---|
| 259 | if ((msk1 & msk) == 0) | 
|---|
| 260 | break; | 
|---|
| 261 | if (off) { | 
|---|
| 262 | cls1 = cls; | 
|---|
| 263 | if (op == Oshr && off + sl.sz > 4) | 
|---|
| 264 | cls1 = Kl; | 
|---|
| 265 | cast(&r, cls1, il); | 
|---|
| 266 | r1 = getcon(8*off, curf); | 
|---|
| 267 | r = iins(cls1, op, r, r1, il); | 
|---|
| 268 | } | 
|---|
| 269 | if ((msk1 & msk) != msk1 || off + sz < sl.sz) | 
|---|
| 270 | mask(cls, &r, msk1 & msk, il); | 
|---|
| 271 | if ((msk & ~msk1) != 0) { | 
|---|
| 272 | r1 = def(sl, msk & ~msk1, b, i, il); | 
|---|
| 273 | if (req(r1, R)) | 
|---|
| 274 | goto Load; | 
|---|
| 275 | r = iins(cls, Oor, r, r1, il); | 
|---|
| 276 | } | 
|---|
| 277 | if (msk == msks) | 
|---|
| 278 | cast(&r, sl.cls, il); | 
|---|
| 279 | return r; | 
|---|
| 280 | case MayAlias: | 
|---|
| 281 | if (ld) | 
|---|
| 282 | break; | 
|---|
| 283 | else | 
|---|
| 284 | goto Load; | 
|---|
| 285 | case NoAlias: | 
|---|
| 286 | break; | 
|---|
| 287 | default: | 
|---|
| 288 | die( "unreachable"); | 
|---|
| 289 | } | 
|---|
| 290 | } | 
|---|
| 291 |  | 
|---|
| 292 | for (ist=ilog; ist<&ilog[nlog]; ++ist) | 
|---|
| 293 | if (ist->isphi && ist->bid == b->id) | 
|---|
| 294 | if (req(ist->new.phi.m.ref, sl.ref)) | 
|---|
| 295 | if (ist->new.phi.m.sz == sl.sz) { | 
|---|
| 296 | r = ist->new.phi.p->to; | 
|---|
| 297 | if (msk != msks) | 
|---|
| 298 | mask(cls, &r, msk, il); | 
|---|
| 299 | else | 
|---|
| 300 | cast(&r, sl.cls, il); | 
|---|
| 301 | return r; | 
|---|
| 302 | } | 
|---|
| 303 |  | 
|---|
| 304 | for (p=b->phi; p; p=p->link) | 
|---|
| 305 | if (killsl(p->to, sl)) | 
|---|
| 306 | /* scanning predecessors in that | 
|---|
| 307 | * case would be unsafe */ | 
|---|
| 308 | goto Load; | 
|---|
| 309 |  | 
|---|
| 310 | if (b->npred == 0) | 
|---|
| 311 | goto Load; | 
|---|
| 312 | if (b->npred == 1) { | 
|---|
| 313 | bp = b->pred[0]; | 
|---|
| 314 | assert(bp->loop >= il->blk->loop); | 
|---|
| 315 | l = *il; | 
|---|
| 316 | if (bp->s2) | 
|---|
| 317 | l.type = LNoLoad; | 
|---|
| 318 | r1 = def(sl, msk, bp, 0, &l); | 
|---|
| 319 | if (req(r1, R)) | 
|---|
| 320 | goto Load; | 
|---|
| 321 | return r1; | 
|---|
| 322 | } | 
|---|
| 323 |  | 
|---|
| 324 | r = newtmp( "ld", sl.cls, curf); | 
|---|
| 325 | p = alloc(sizeof *p); | 
|---|
| 326 | vgrow(&ilog, ++nlog); | 
|---|
| 327 | ist = &ilog[nlog-1]; | 
|---|
| 328 | ist->isphi = 1; | 
|---|
| 329 | ist->bid = b->id; | 
|---|
| 330 | ist->new.phi.m = sl; | 
|---|
| 331 | ist->new.phi.p = p; | 
|---|
| 332 | p->to = r; | 
|---|
| 333 | p->cls = sl.cls; | 
|---|
| 334 | p->narg = b->npred; | 
|---|
| 335 | p->arg = vnew(p->narg, sizeof p->arg[0], Pfn); | 
|---|
| 336 | p->blk = vnew(p->narg, sizeof p->blk[0], Pfn); | 
|---|
| 337 | for (np=0; np<b->npred; ++np) { | 
|---|
| 338 | bp = b->pred[np]; | 
|---|
| 339 | if (!bp->s2 | 
|---|
| 340 | && il->type != LNoLoad | 
|---|
| 341 | && bp->loop < il->blk->loop) | 
|---|
| 342 | l.type = LLoad; | 
|---|
| 343 | else | 
|---|
| 344 | l.type = LNoLoad; | 
|---|
| 345 | l.blk = bp; | 
|---|
| 346 | l.off = bp->nins; | 
|---|
| 347 | r1 = def(sl, msks, bp, 0, &l); | 
|---|
| 348 | if (req(r1, R)) | 
|---|
| 349 | goto Load; | 
|---|
| 350 | p->arg[np] = r1; | 
|---|
| 351 | p->blk[np] = bp; | 
|---|
| 352 | } | 
|---|
| 353 | if (msk != msks) | 
|---|
| 354 | mask(cls, &r, msk, il); | 
|---|
| 355 | return r; | 
|---|
| 356 | } | 
|---|
| 357 |  | 
|---|
| 358 | static int | 
|---|
| 359 | icmp(const void *pa, const void *pb) | 
|---|
| 360 | { | 
|---|
| 361 | Insert *a, *b; | 
|---|
| 362 | int c; | 
|---|
| 363 |  | 
|---|
| 364 | a = (Insert *)pa; | 
|---|
| 365 | b = (Insert *)pb; | 
|---|
| 366 | if ((c = a->bid - b->bid)) | 
|---|
| 367 | return c; | 
|---|
| 368 | if (a->isphi && b->isphi) | 
|---|
| 369 | return 0; | 
|---|
| 370 | if (a->isphi) | 
|---|
| 371 | return -1; | 
|---|
| 372 | if (b->isphi) | 
|---|
| 373 | return +1; | 
|---|
| 374 | if ((c = a->off - b->off)) | 
|---|
| 375 | return c; | 
|---|
| 376 | return a->num - b->num; | 
|---|
| 377 | } | 
|---|
| 378 |  | 
|---|
| 379 | /* require rpo ssa alias */ | 
|---|
| 380 | void | 
|---|
| 381 | loadopt(Fn *fn) | 
|---|
| 382 | { | 
|---|
| 383 | Ins *i, *ib; | 
|---|
| 384 | Blk *b; | 
|---|
| 385 | int sz; | 
|---|
| 386 | uint n, ni, ext, nt; | 
|---|
| 387 | Insert *ist; | 
|---|
| 388 | Slice sl; | 
|---|
| 389 | Loc l; | 
|---|
| 390 |  | 
|---|
| 391 | curf = fn; | 
|---|
| 392 | ilog = vnew(0, sizeof ilog[0], Pheap); | 
|---|
| 393 | nlog = 0; | 
|---|
| 394 | inum = 0; | 
|---|
| 395 | for (b=fn->start; b; b=b->link) | 
|---|
| 396 | for (i=b->ins; i<&b->ins[b->nins]; ++i) { | 
|---|
| 397 | if (!isload(i->op)) | 
|---|
| 398 | continue; | 
|---|
| 399 | sz = loadsz(i); | 
|---|
| 400 | sl = (Slice){i->arg[0], sz, i->cls}; | 
|---|
| 401 | l = (Loc){LRoot, i-b->ins, b}; | 
|---|
| 402 | i->arg[1] = def(sl, MASK(sz), b, i, &l); | 
|---|
| 403 | } | 
|---|
| 404 | qsort(ilog, nlog, sizeof ilog[0], icmp); | 
|---|
| 405 | vgrow(&ilog, nlog+1); | 
|---|
| 406 | ilog[nlog].bid = fn->nblk; /* add a sentinel */ | 
|---|
| 407 | ib = vnew(0, sizeof(Ins), Pheap); | 
|---|
| 408 | for (ist=ilog, n=0; n<fn->nblk; ++n) { | 
|---|
| 409 | b = fn->rpo[n]; | 
|---|
| 410 | for (; ist->bid == n && ist->isphi; ++ist) { | 
|---|
| 411 | ist->new.phi.p->link = b->phi; | 
|---|
| 412 | b->phi = ist->new.phi.p; | 
|---|
| 413 | } | 
|---|
| 414 | ni = 0; | 
|---|
| 415 | nt = 0; | 
|---|
| 416 | for (;;) { | 
|---|
| 417 | if (ist->bid == n && ist->off == ni) | 
|---|
| 418 | i = &ist++->new.ins; | 
|---|
| 419 | else { | 
|---|
| 420 | if (ni == b->nins) | 
|---|
| 421 | break; | 
|---|
| 422 | i = &b->ins[ni++]; | 
|---|
| 423 | if (isload(i->op) | 
|---|
| 424 | && !req(i->arg[1], R)) { | 
|---|
| 425 | ext = Oextsb + i->op - Oloadsb; | 
|---|
| 426 | switch (i->op) { | 
|---|
| 427 | default: | 
|---|
| 428 | die( "unreachable"); | 
|---|
| 429 | case Oloadsb: | 
|---|
| 430 | case Oloadub: | 
|---|
| 431 | case Oloadsh: | 
|---|
| 432 | case Oloaduh: | 
|---|
| 433 | i->op = ext; | 
|---|
| 434 | break; | 
|---|
| 435 | case Oloadsw: | 
|---|
| 436 | case Oloaduw: | 
|---|
| 437 | if (i->cls == Kl) { | 
|---|
| 438 | i->op = ext; | 
|---|
| 439 | break; | 
|---|
| 440 | } | 
|---|
| 441 | /* fall through */ | 
|---|
| 442 | case Oload: | 
|---|
| 443 | i->op = Ocopy; | 
|---|
| 444 | break; | 
|---|
| 445 | } | 
|---|
| 446 | i->arg[0] = i->arg[1]; | 
|---|
| 447 | i->arg[1] = R; | 
|---|
| 448 | } | 
|---|
| 449 | } | 
|---|
| 450 | vgrow(&ib, ++nt); | 
|---|
| 451 | ib[nt-1] = *i; | 
|---|
| 452 | } | 
|---|
| 453 | b->nins = nt; | 
|---|
| 454 | idup(&b->ins, ib, nt); | 
|---|
| 455 | } | 
|---|
| 456 | vfree(ib); | 
|---|
| 457 | vfree(ilog); | 
|---|
| 458 | if (debug['M']) { | 
|---|
| 459 | fprintf(stderr, "\n> After load elimination:\n"); | 
|---|
| 460 | printfn(fn, stderr); | 
|---|
| 461 | } | 
|---|
| 462 | } | 
|---|
| 463 |  | 
|---|