| 1 | #include "all.h" |
| 2 | #include <limits.h> |
| 3 | |
| 4 | /* For x86_64, do the following: |
| 5 | * |
| 6 | * - check that constants are used only in |
| 7 | * places allowed |
| 8 | * - ensure immediates always fit in 32b |
| 9 | * - expose machine register contraints |
| 10 | * on instructions like division. |
| 11 | * - implement fast locals (the streak of |
| 12 | * constant allocX in the first basic block) |
| 13 | * - recognize complex addressing modes |
| 14 | * |
| 15 | * Invariant: the use counts that are used |
| 16 | * in sel() must be sound. This |
| 17 | * is not so trivial, maybe the |
| 18 | * dce should be moved out... |
| 19 | */ |
| 20 | |
| 21 | typedef struct ANum ANum; |
| 22 | |
| 23 | struct ANum { |
| 24 | char n, l, r; |
| 25 | Ins *i; |
| 26 | }; |
| 27 | |
| 28 | static int amatch(Addr *, Ref, int, ANum *, Fn *); |
| 29 | |
| 30 | static int |
| 31 | noimm(Ref r, Fn *fn) |
| 32 | { |
| 33 | int64_t val; |
| 34 | |
| 35 | if (rtype(r) != RCon) |
| 36 | return 0; |
| 37 | switch (fn->con[r.val].type) { |
| 38 | case CAddr: |
| 39 | /* we only support the 'small' |
| 40 | * code model of the ABI, this |
| 41 | * means that we can always |
| 42 | * address data with 32bits |
| 43 | */ |
| 44 | return 0; |
| 45 | case CBits: |
| 46 | val = fn->con[r.val].bits.i; |
| 47 | return (val < INT32_MIN || val > INT32_MAX); |
| 48 | default: |
| 49 | die("invalid constant" ); |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | static int |
| 54 | rslot(Ref r, Fn *fn) |
| 55 | { |
| 56 | if (rtype(r) != RTmp) |
| 57 | return -1; |
| 58 | return fn->tmp[r.val].slot; |
| 59 | } |
| 60 | |
| 61 | static void |
| 62 | fixarg(Ref *r, int k, Ins *i, Fn *fn) |
| 63 | { |
| 64 | char buf[32]; |
| 65 | Addr a, *m; |
| 66 | Ref r0, r1; |
| 67 | int s, n, op; |
| 68 | |
| 69 | r1 = r0 = *r; |
| 70 | s = rslot(r0, fn); |
| 71 | op = i ? i->op : Ocopy; |
| 72 | if (KBASE(k) == 1 && rtype(r0) == RCon) { |
| 73 | /* load floating points from memory |
| 74 | * slots, they can't be used as |
| 75 | * immediates |
| 76 | */ |
| 77 | r1 = MEM(fn->nmem); |
| 78 | vgrow(&fn->mem, ++fn->nmem); |
| 79 | memset(&a, 0, sizeof a); |
| 80 | a.offset.type = CAddr; |
| 81 | a.offset.local = 1; |
| 82 | n = gasstash(&fn->con[r0.val].bits, KWIDE(k) ? 8 : 4); |
| 83 | sprintf(buf, "fp%d" , n); |
| 84 | a.offset.label = intern(buf); |
| 85 | fn->mem[fn->nmem-1] = a; |
| 86 | } |
| 87 | else if (op != Ocopy && k == Kl && noimm(r0, fn)) { |
| 88 | /* load constants that do not fit in |
| 89 | * a 32bit signed integer into a |
| 90 | * long temporary |
| 91 | */ |
| 92 | r1 = newtmp("isel" , Kl, fn); |
| 93 | emit(Ocopy, Kl, r1, r0, R); |
| 94 | } |
| 95 | else if (s != -1) { |
| 96 | /* load fast locals' addresses into |
| 97 | * temporaries right before the |
| 98 | * instruction |
| 99 | */ |
| 100 | r1 = newtmp("isel" , Kl, fn); |
| 101 | emit(Oaddr, Kl, r1, SLOT(s), R); |
| 102 | } |
| 103 | else if (!(isstore(op) && r == &i->arg[1]) |
| 104 | && !isload(op) && op != Ocall && rtype(r0) == RCon |
| 105 | && fn->con[r0.val].type == CAddr) { |
| 106 | /* apple as does not support 32-bit |
| 107 | * absolute addressing, use a rip- |
| 108 | * relative leaq instead |
| 109 | */ |
| 110 | r1 = newtmp("isel" , Kl, fn); |
| 111 | emit(Oaddr, Kl, r1, r0, R); |
| 112 | } |
| 113 | else if (rtype(r0) == RMem) { |
| 114 | /* eliminate memory operands of |
| 115 | * the form $foo(%rip, ...) |
| 116 | */ |
| 117 | m = &fn->mem[r0.val]; |
| 118 | if (req(m->base, R)) |
| 119 | if (m->offset.type == CAddr) { |
| 120 | r0 = newtmp("isel" , Kl, fn); |
| 121 | emit(Oaddr, Kl, r0, newcon(&m->offset, fn), R); |
| 122 | m->offset.type = CUndef; |
| 123 | m->base = r0; |
| 124 | } |
| 125 | } |
| 126 | *r = r1; |
| 127 | } |
| 128 | |
| 129 | static void |
| 130 | seladdr(Ref *r, ANum *an, Fn *fn) |
| 131 | { |
| 132 | Addr a; |
| 133 | Ref r0; |
| 134 | |
| 135 | r0 = *r; |
| 136 | if (rtype(r0) == RTmp) { |
| 137 | memset(&a, 0, sizeof a); |
| 138 | if (!amatch(&a, r0, an[r0.val].n, an, fn)) |
| 139 | return; |
| 140 | if (!req(a.base, R)) |
| 141 | if (a.offset.type == CAddr) { |
| 142 | /* apple as does not support |
| 143 | * $foo(%r0, %r1, M); try to |
| 144 | * rewrite it or bail out if |
| 145 | * impossible |
| 146 | */ |
| 147 | if (!req(a.index, R) || rtype(a.base) != RTmp) |
| 148 | return; |
| 149 | else { |
| 150 | a.index = a.base; |
| 151 | a.scale = 1; |
| 152 | a.base = R; |
| 153 | } |
| 154 | } |
| 155 | chuse(r0, -1, fn); |
| 156 | vgrow(&fn->mem, ++fn->nmem); |
| 157 | fn->mem[fn->nmem-1] = a; |
| 158 | chuse(a.base, +1, fn); |
| 159 | chuse(a.index, +1, fn); |
| 160 | *r = MEM(fn->nmem-1); |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | static int |
| 165 | cmpswap(Ref arg[2], int op) |
| 166 | { |
| 167 | switch (op) { |
| 168 | case NCmpI+Cflt: |
| 169 | case NCmpI+Cfle: |
| 170 | return 1; |
| 171 | case NCmpI+Cfgt: |
| 172 | case NCmpI+Cfge: |
| 173 | return 0; |
| 174 | } |
| 175 | return rtype(arg[0]) == RCon; |
| 176 | } |
| 177 | |
| 178 | static void |
| 179 | selcmp(Ref arg[2], int k, int swap, Fn *fn) |
| 180 | { |
| 181 | Ref r; |
| 182 | Ins *icmp; |
| 183 | |
| 184 | if (swap) { |
| 185 | r = arg[1]; |
| 186 | arg[1] = arg[0]; |
| 187 | arg[0] = r; |
| 188 | } |
| 189 | emit(Oxcmp, k, R, arg[1], arg[0]); |
| 190 | icmp = curi; |
| 191 | if (rtype(arg[0]) == RCon) { |
| 192 | assert(k != Kw); |
| 193 | icmp->arg[1] = newtmp("isel" , k, fn); |
| 194 | emit(Ocopy, k, icmp->arg[1], arg[0], R); |
| 195 | fixarg(&curi->arg[0], k, curi, fn); |
| 196 | } |
| 197 | fixarg(&icmp->arg[0], k, icmp, fn); |
| 198 | fixarg(&icmp->arg[1], k, icmp, fn); |
| 199 | } |
| 200 | |
| 201 | static void |
| 202 | sel(Ins i, ANum *an, Fn *fn) |
| 203 | { |
| 204 | Ref r0, r1, tmp[7]; |
| 205 | int x, j, k, kc, sh, swap; |
| 206 | Ins *i0, *i1; |
| 207 | |
| 208 | if (rtype(i.to) == RTmp) |
| 209 | if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1])) |
| 210 | if (fn->tmp[i.to.val].nuse == 0) { |
| 211 | chuse(i.arg[0], -1, fn); |
| 212 | chuse(i.arg[1], -1, fn); |
| 213 | return; |
| 214 | } |
| 215 | i0 = curi; |
| 216 | k = i.cls; |
| 217 | switch (i.op) { |
| 218 | case Odiv: |
| 219 | case Orem: |
| 220 | case Oudiv: |
| 221 | case Ourem: |
| 222 | if (KBASE(k) == 1) |
| 223 | goto Emit; |
| 224 | if (i.op == Odiv || i.op == Oudiv) |
| 225 | r0 = TMP(RAX), r1 = TMP(RDX); |
| 226 | else |
| 227 | r0 = TMP(RDX), r1 = TMP(RAX); |
| 228 | emit(Ocopy, k, i.to, r0, R); |
| 229 | emit(Ocopy, k, R, r1, R); |
| 230 | if (rtype(i.arg[1]) == RCon) { |
| 231 | /* immediates not allowed for |
| 232 | * divisions in x86 |
| 233 | */ |
| 234 | r0 = newtmp("isel" , k, fn); |
| 235 | } else |
| 236 | r0 = i.arg[1]; |
| 237 | if (fn->tmp[r0.val].slot != -1) |
| 238 | err("unlikely argument %%%s in %s" , |
| 239 | fn->tmp[r0.val].name, optab[i.op].name); |
| 240 | if (i.op == Odiv || i.op == Orem) { |
| 241 | emit(Oxidiv, k, R, r0, R); |
| 242 | emit(Osign, k, TMP(RDX), TMP(RAX), R); |
| 243 | } else { |
| 244 | emit(Oxdiv, k, R, r0, R); |
| 245 | emit(Ocopy, k, TMP(RDX), CON_Z, R); |
| 246 | } |
| 247 | emit(Ocopy, k, TMP(RAX), i.arg[0], R); |
| 248 | fixarg(&curi->arg[0], k, curi, fn); |
| 249 | if (rtype(i.arg[1]) == RCon) |
| 250 | emit(Ocopy, k, r0, i.arg[1], R); |
| 251 | break; |
| 252 | case Osar: |
| 253 | case Oshr: |
| 254 | case Oshl: |
| 255 | r0 = i.arg[1]; |
| 256 | if (rtype(r0) == RCon) |
| 257 | goto Emit; |
| 258 | if (fn->tmp[r0.val].slot != -1) |
| 259 | err("unlikely argument %%%s in %s" , |
| 260 | fn->tmp[r0.val].name, optab[i.op].name); |
| 261 | i.arg[1] = TMP(RCX); |
| 262 | emit(Ocopy, Kw, R, TMP(RCX), R); |
| 263 | emiti(i); |
| 264 | i1 = curi; |
| 265 | emit(Ocopy, Kw, TMP(RCX), r0, R); |
| 266 | fixarg(&i1->arg[0], argcls(&i, 0), i1, fn); |
| 267 | break; |
| 268 | case Ouwtof: |
| 269 | r0 = newtmp("utof" , Kl, fn); |
| 270 | emit(Osltof, k, i.to, r0, R); |
| 271 | emit(Oextuw, Kl, r0, i.arg[0], R); |
| 272 | fixarg(&curi->arg[0], k, curi, fn); |
| 273 | break; |
| 274 | case Oultof: |
| 275 | /* %mask =l and %arg.0, 1 |
| 276 | %isbig =l shr %arg.0, 63 |
| 277 | %divided =l shr %arg.0, %isbig |
| 278 | %or =l or %mask, %divided |
| 279 | %float =d sltof %or |
| 280 | %cast =l cast %float |
| 281 | %addend =l shl %isbig, 52 |
| 282 | %sum =l add %cast, %addend |
| 283 | %result =d cast %sum |
| 284 | */ |
| 285 | r0 = newtmp("utof" , k, fn); |
| 286 | if (k == Ks) |
| 287 | kc = Kw, sh = 23; |
| 288 | else |
| 289 | kc = Kl, sh = 52; |
| 290 | for (j=0; j<4; j++) |
| 291 | tmp[j] = newtmp("utof" , Kl, fn); |
| 292 | for (; j<7; j++) |
| 293 | tmp[j] = newtmp("utof" , kc, fn); |
| 294 | emit(Ocast, k, i.to, tmp[6], R); |
| 295 | emit(Oadd, kc, tmp[6], tmp[4], tmp[5]); |
| 296 | emit(Oshl, kc, tmp[5], tmp[1], getcon(sh, fn)); |
| 297 | emit(Ocast, kc, tmp[4], r0, R); |
| 298 | emit(Osltof, k, r0, tmp[3], R); |
| 299 | emit(Oor, Kl, tmp[3], tmp[0], tmp[2]); |
| 300 | emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]); |
| 301 | sel(*curi++, 0, fn); |
| 302 | emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn)); |
| 303 | fixarg(&curi->arg[0], Kl, curi, fn); |
| 304 | emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn)); |
| 305 | fixarg(&curi->arg[0], Kl, curi, fn); |
| 306 | break; |
| 307 | case Ostoui: |
| 308 | i.op = Ostosi; |
| 309 | kc = Ks; |
| 310 | tmp[4] = getcon(0xdf000000, fn); |
| 311 | goto Oftoui; |
| 312 | case Odtoui: |
| 313 | i.op = Odtosi; |
| 314 | kc = Kd; |
| 315 | tmp[4] = getcon(0xc3e0000000000000, fn); |
| 316 | Oftoui: |
| 317 | if (k == Kw) |
| 318 | goto Emit; |
| 319 | r0 = newtmp("ftou" , kc, fn); |
| 320 | for (j=0; j<4; j++) |
| 321 | tmp[j] = newtmp("ftou" , Kl, fn); |
| 322 | emit(Oor, Kl, i.to, tmp[0], tmp[3]); |
| 323 | emit(Oand, Kl, tmp[3], tmp[2], tmp[1]); |
| 324 | emit(i.op, Kl, tmp[2], r0, R); |
| 325 | emit(Oadd, kc, r0, tmp[4], i.arg[0]); |
| 326 | i1 = curi; /* fixarg() can change curi */ |
| 327 | fixarg(&i1->arg[0], kc, i1, fn); |
| 328 | fixarg(&i1->arg[1], kc, i1, fn); |
| 329 | emit(Osar, Kl, tmp[1], tmp[0], getcon(63, fn)); |
| 330 | emit(i.op, Kl, tmp[0], i.arg[0], R); |
| 331 | fixarg(&curi->arg[0], Kl, curi, fn); |
| 332 | break; |
| 333 | case Onop: |
| 334 | break; |
| 335 | case Ostored: |
| 336 | case Ostores: |
| 337 | case Ostorel: |
| 338 | case Ostorew: |
| 339 | case Ostoreh: |
| 340 | case Ostoreb: |
| 341 | if (rtype(i.arg[0]) == RCon) { |
| 342 | if (i.op == Ostored) |
| 343 | i.op = Ostorel; |
| 344 | if (i.op == Ostores) |
| 345 | i.op = Ostorew; |
| 346 | } |
| 347 | seladdr(&i.arg[1], an, fn); |
| 348 | goto Emit; |
| 349 | case_Oload: |
| 350 | seladdr(&i.arg[0], an, fn); |
| 351 | goto Emit; |
| 352 | case Ocall: |
| 353 | case Osalloc: |
| 354 | case Ocopy: |
| 355 | case Oadd: |
| 356 | case Osub: |
| 357 | case Oneg: |
| 358 | case Omul: |
| 359 | case Oand: |
| 360 | case Oor: |
| 361 | case Oxor: |
| 362 | case Oxtest: |
| 363 | case Ostosi: |
| 364 | case Odtosi: |
| 365 | case Oswtof: |
| 366 | case Osltof: |
| 367 | case Oexts: |
| 368 | case Otruncd: |
| 369 | case Ocast: |
| 370 | case_OExt: |
| 371 | Emit: |
| 372 | emiti(i); |
| 373 | i1 = curi; /* fixarg() can change curi */ |
| 374 | fixarg(&i1->arg[0], argcls(&i, 0), i1, fn); |
| 375 | fixarg(&i1->arg[1], argcls(&i, 1), i1, fn); |
| 376 | break; |
| 377 | case Oalloc4: |
| 378 | case Oalloc8: |
| 379 | case Oalloc16: |
| 380 | salloc(i.to, i.arg[0], fn); |
| 381 | break; |
| 382 | default: |
| 383 | if (isext(i.op)) |
| 384 | goto case_OExt; |
| 385 | if (isload(i.op)) |
| 386 | goto case_Oload; |
| 387 | if (iscmp(i.op, &kc, &x)) { |
| 388 | switch (x) { |
| 389 | case NCmpI+Cfeq: |
| 390 | /* zf is set when operands are |
| 391 | * unordered, so we may have to |
| 392 | * check pf |
| 393 | */ |
| 394 | r0 = newtmp("isel" , Kw, fn); |
| 395 | r1 = newtmp("isel" , Kw, fn); |
| 396 | emit(Oand, Kw, i.to, r0, r1); |
| 397 | emit(Oflagfo, k, r1, R, R); |
| 398 | i.to = r0; |
| 399 | break; |
| 400 | case NCmpI+Cfne: |
| 401 | r0 = newtmp("isel" , Kw, fn); |
| 402 | r1 = newtmp("isel" , Kw, fn); |
| 403 | emit(Oor, Kw, i.to, r0, r1); |
| 404 | emit(Oflagfuo, k, r1, R, R); |
| 405 | i.to = r0; |
| 406 | break; |
| 407 | } |
| 408 | swap = cmpswap(i.arg, x); |
| 409 | if (swap) |
| 410 | x = cmpop(x); |
| 411 | emit(Oflag+x, k, i.to, R, R); |
| 412 | selcmp(i.arg, kc, swap, fn); |
| 413 | break; |
| 414 | } |
| 415 | die("unknown instruction %s" , optab[i.op].name); |
| 416 | } |
| 417 | |
| 418 | while (i0>curi && --i0) { |
| 419 | assert(rslot(i0->arg[0], fn) == -1); |
| 420 | assert(rslot(i0->arg[1], fn) == -1); |
| 421 | } |
| 422 | } |
| 423 | |
| 424 | static Ins * |
| 425 | flagi(Ins *i0, Ins *i) |
| 426 | { |
| 427 | while (i>i0) { |
| 428 | i--; |
| 429 | if (amd64_op[i->op].zflag) |
| 430 | return i; |
| 431 | if (amd64_op[i->op].lflag) |
| 432 | continue; |
| 433 | return 0; |
| 434 | } |
| 435 | return 0; |
| 436 | } |
| 437 | |
| 438 | static void |
| 439 | seljmp(Blk *b, Fn *fn) |
| 440 | { |
| 441 | Ref r; |
| 442 | int c, k, swap; |
| 443 | Ins *fi; |
| 444 | Tmp *t; |
| 445 | |
| 446 | if (b->jmp.type == Jret0 || b->jmp.type == Jjmp) |
| 447 | return; |
| 448 | assert(b->jmp.type == Jjnz); |
| 449 | r = b->jmp.arg; |
| 450 | t = &fn->tmp[r.val]; |
| 451 | b->jmp.arg = R; |
| 452 | assert(rtype(r) == RTmp); |
| 453 | if (b->s1 == b->s2) { |
| 454 | chuse(r, -1, fn); |
| 455 | b->jmp.type = Jjmp; |
| 456 | b->s2 = 0; |
| 457 | return; |
| 458 | } |
| 459 | fi = flagi(b->ins, &b->ins[b->nins]); |
| 460 | if (!fi || !req(fi->to, r)) { |
| 461 | selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn); /* todo, long jnz */ |
| 462 | b->jmp.type = Jjf + Cine; |
| 463 | } |
| 464 | else if (iscmp(fi->op, &k, &c) |
| 465 | && c != NCmpI+Cfeq /* see sel() */ |
| 466 | && c != NCmpI+Cfne) { |
| 467 | swap = cmpswap(fi->arg, c); |
| 468 | if (swap) |
| 469 | c = cmpop(c); |
| 470 | if (t->nuse == 1) { |
| 471 | selcmp(fi->arg, k, swap, fn); |
| 472 | *fi = (Ins){.op = Onop}; |
| 473 | } |
| 474 | b->jmp.type = Jjf + c; |
| 475 | } |
| 476 | else if (fi->op == Oand && t->nuse == 1 |
| 477 | && (rtype(fi->arg[0]) == RTmp || |
| 478 | rtype(fi->arg[1]) == RTmp)) { |
| 479 | fi->op = Oxtest; |
| 480 | fi->to = R; |
| 481 | b->jmp.type = Jjf + Cine; |
| 482 | if (rtype(fi->arg[1]) == RCon) { |
| 483 | r = fi->arg[1]; |
| 484 | fi->arg[1] = fi->arg[0]; |
| 485 | fi->arg[0] = r; |
| 486 | } |
| 487 | } |
| 488 | else { |
| 489 | /* since flags are not tracked in liveness, |
| 490 | * the result of the flag-setting instruction |
| 491 | * has to be marked as live |
| 492 | */ |
| 493 | if (t->nuse == 1) |
| 494 | emit(Ocopy, Kw, R, r, R); |
| 495 | b->jmp.type = Jjf + Cine; |
| 496 | } |
| 497 | } |
| 498 | |
| 499 | static int |
| 500 | aref(Ref r, ANum *ai) |
| 501 | { |
| 502 | switch (rtype(r)) { |
| 503 | case RCon: |
| 504 | return 2; |
| 505 | case RTmp: |
| 506 | return ai[r.val].n; |
| 507 | default: |
| 508 | die("constant or temporary expected" ); |
| 509 | } |
| 510 | } |
| 511 | |
| 512 | static int |
| 513 | ascale(Ref r, Con *con) |
| 514 | { |
| 515 | int64_t n; |
| 516 | |
| 517 | if (rtype(r) != RCon) |
| 518 | return 0; |
| 519 | if (con[r.val].type != CBits) |
| 520 | return 0; |
| 521 | n = con[r.val].bits.i; |
| 522 | return n == 1 || n == 2 || n == 4 || n == 8; |
| 523 | } |
| 524 | |
| 525 | static void |
| 526 | anumber(ANum *ai, Blk *b, Con *con) |
| 527 | { |
| 528 | /* This should be made obsolete by a proper |
| 529 | * reassoc pass. |
| 530 | * |
| 531 | * Rules: |
| 532 | * |
| 533 | * RTmp(_) -> 0 tmp |
| 534 | * ( RTmp(_) -> 1 slot ) |
| 535 | * RCon(_) -> 2 con |
| 536 | * 0 * 2 -> 3 s * i (when constant is 1,2,4,8) |
| 537 | */ |
| 538 | static char add[10][10] = { |
| 539 | [2] [4] = 4, [4] [2] = 4, |
| 540 | [2] [6] = 6, [6] [2] = 6, |
| 541 | [2] [7] = 7, [7] [2] = 7, |
| 542 | [0] [2] = 4, [2] [0] = 4, /* 4: o + b */ |
| 543 | [0] [0] = 5, /* 5: b + s * i */ |
| 544 | [0] [3] = 5, [3] [0] = 5, |
| 545 | [2] [3] = 6, [3] [2] = 6, /* 6: o + s * i */ |
| 546 | [2] [5] = 7, [5] [2] = 7, /* 7: o + b + s * i */ |
| 547 | [0] [6] = 7, [6] [0] = 7, |
| 548 | [4] [3] = 7, [3] [4] = 7, |
| 549 | }; |
| 550 | int a, a1, a2, n1, n2, t1, t2; |
| 551 | Ins *i; |
| 552 | |
| 553 | for (i=b->ins; i<&b->ins[b->nins]; i++) { |
| 554 | if (rtype(i->to) == RTmp) |
| 555 | ai[i->to.val].i = i; |
| 556 | if (i->op != Oadd && i->op != Omul) |
| 557 | continue; |
| 558 | a1 = aref(i->arg[0], ai); |
| 559 | a2 = aref(i->arg[1], ai); |
| 560 | t1 = a1 != 1 && a1 != 2; |
| 561 | t2 = a2 != 1 && a2 != 2; |
| 562 | if (i->op == Oadd) { |
| 563 | a = add[n1 = a1][n2 = a2]; |
| 564 | if (t1 && a < add[0][a2]) |
| 565 | a = add[n1 = 0][n2 = a2]; |
| 566 | if (t2 && a < add[a1][0]) |
| 567 | a = add[n1 = a1][n2 = 0]; |
| 568 | if (t1 && t2 && a < add[0][0]) |
| 569 | a = add[n1 = 0][n2 = 0]; |
| 570 | } else { |
| 571 | n1 = n2 = a = 0; |
| 572 | if (ascale(i->arg[0], con) && t2) |
| 573 | a = 3, n1 = 2, n2 = 0; |
| 574 | if (t1 && ascale(i->arg[1], con)) |
| 575 | a = 3, n1 = 0, n2 = 2; |
| 576 | } |
| 577 | ai[i->to.val].n = a; |
| 578 | ai[i->to.val].l = n1; |
| 579 | ai[i->to.val].r = n2; |
| 580 | } |
| 581 | } |
| 582 | |
| 583 | static int |
| 584 | amatch(Addr *a, Ref r, int n, ANum *ai, Fn *fn) |
| 585 | { |
| 586 | Ins *i; |
| 587 | int nl, nr, t, s; |
| 588 | Ref al, ar; |
| 589 | |
| 590 | if (rtype(r) == RCon) { |
| 591 | if (!addcon(&a->offset, &fn->con[r.val])) |
| 592 | err("unlikely sum of $%s and $%s" , |
| 593 | str(a->offset.label), str(fn->con[r.val].label)); |
| 594 | return 1; |
| 595 | } |
| 596 | assert(rtype(r) == RTmp); |
| 597 | i = ai[r.val].i; |
| 598 | nl = ai[r.val].l; |
| 599 | nr = ai[r.val].r; |
| 600 | if (i) { |
| 601 | if (nl > nr) { |
| 602 | al = i->arg[1]; |
| 603 | ar = i->arg[0]; |
| 604 | t = nl, nl = nr, nr = t; |
| 605 | } else { |
| 606 | al = i->arg[0]; |
| 607 | ar = i->arg[1]; |
| 608 | } |
| 609 | } |
| 610 | switch (n) { |
| 611 | case 3: /* s * i */ |
| 612 | a->index = al; |
| 613 | a->scale = fn->con[ar.val].bits.i; |
| 614 | return 0; |
| 615 | case 5: /* b + s * i */ |
| 616 | switch (nr) { |
| 617 | case 0: |
| 618 | if (fn->tmp[ar.val].slot != -1) { |
| 619 | al = i->arg[1]; |
| 620 | ar = i->arg[0]; |
| 621 | } |
| 622 | a->index = ar; |
| 623 | a->scale = 1; |
| 624 | break; |
| 625 | case 3: |
| 626 | amatch(a, ar, nr, ai, fn); |
| 627 | break; |
| 628 | } |
| 629 | r = al; |
| 630 | /* fall through */ |
| 631 | case 0: |
| 632 | s = fn->tmp[r.val].slot; |
| 633 | if (s != -1) |
| 634 | r = SLOT(s); |
| 635 | a->base = r; |
| 636 | return n || s != -1; |
| 637 | case 2: /* constants */ |
| 638 | case 4: /* o + b */ |
| 639 | case 6: /* o + s * i */ |
| 640 | case 7: /* o + b + s * i */ |
| 641 | amatch(a, ar, nr, ai, fn); |
| 642 | amatch(a, al, nl, ai, fn); |
| 643 | return 1; |
| 644 | default: |
| 645 | die("unreachable" ); |
| 646 | } |
| 647 | } |
| 648 | |
| 649 | /* instruction selection |
| 650 | * requires use counts (as given by parsing) |
| 651 | */ |
| 652 | void |
| 653 | amd64_isel(Fn *fn) |
| 654 | { |
| 655 | Blk *b, **sb; |
| 656 | Ins *i; |
| 657 | Phi *p; |
| 658 | uint a; |
| 659 | int n, al; |
| 660 | int64_t sz; |
| 661 | ANum *ainfo; |
| 662 | |
| 663 | /* assign slots to fast allocs */ |
| 664 | b = fn->start; |
| 665 | /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */ |
| 666 | for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2) |
| 667 | for (i=b->ins; i<&b->ins[b->nins]; i++) |
| 668 | if (i->op == al) { |
| 669 | if (rtype(i->arg[0]) != RCon) |
| 670 | break; |
| 671 | sz = fn->con[i->arg[0].val].bits.i; |
| 672 | if (sz < 0 || sz >= INT_MAX-15) |
| 673 | err("invalid alloc size %" PRId64, sz); |
| 674 | sz = (sz + n-1) & -n; |
| 675 | sz /= 4; |
| 676 | if (sz > INT_MAX - fn->slot) |
| 677 | die("alloc too large" ); |
| 678 | fn->tmp[i->to.val].slot = fn->slot; |
| 679 | fn->slot += sz; |
| 680 | *i = (Ins){.op = Onop}; |
| 681 | } |
| 682 | |
| 683 | /* process basic blocks */ |
| 684 | n = fn->ntmp; |
| 685 | ainfo = emalloc(n * sizeof ainfo[0]); |
| 686 | for (b=fn->start; b; b=b->link) { |
| 687 | curi = &insb[NIns]; |
| 688 | for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) |
| 689 | for (p=(*sb)->phi; p; p=p->link) { |
| 690 | for (a=0; p->blk[a] != b; a++) |
| 691 | assert(a+1 < p->narg); |
| 692 | fixarg(&p->arg[a], p->cls, 0, fn); |
| 693 | } |
| 694 | memset(ainfo, 0, n * sizeof ainfo[0]); |
| 695 | anumber(ainfo, b, fn->con); |
| 696 | seljmp(b, fn); |
| 697 | for (i=&b->ins[b->nins]; i!=b->ins;) |
| 698 | sel(*--i, ainfo, fn); |
| 699 | b->nins = &insb[NIns] - curi; |
| 700 | idup(&b->ins, curi, b->nins); |
| 701 | } |
| 702 | free(ainfo); |
| 703 | |
| 704 | if (debug['I']) { |
| 705 | fprintf(stderr, "\n> After instruction selection:\n" ); |
| 706 | printfn(fn, stderr); |
| 707 | } |
| 708 | } |
| 709 | |