| 1 | #include "all.h" |
| 2 | |
| 3 | static int |
| 4 | memarg(Ref *r, int op, Ins *i) |
| 5 | { |
| 6 | if (isload(op) || op == Ocall) |
| 7 | return r == &i->arg[0]; |
| 8 | if (isstore(op)) |
| 9 | return r == &i->arg[1]; |
| 10 | return 0; |
| 11 | } |
| 12 | |
| 13 | static int |
| 14 | immarg(Ref *r, int op, Ins *i) |
| 15 | { |
| 16 | return rv64_op[op].imm && r == &i->arg[1]; |
| 17 | } |
| 18 | |
| 19 | static void |
| 20 | fixarg(Ref *r, int k, Ins *i, Fn *fn) |
| 21 | { |
| 22 | char buf[32]; |
| 23 | Ref r0, r1; |
| 24 | int s, n, op; |
| 25 | Con *c; |
| 26 | |
| 27 | r0 = r1 = *r; |
| 28 | op = i ? i->op : Ocopy; |
| 29 | switch (rtype(r0)) { |
| 30 | case RCon: |
| 31 | c = &fn->con[r0.val]; |
| 32 | if (c->type == CAddr && memarg(r, op, i)) |
| 33 | break; |
| 34 | if (c->type == CBits && immarg(r, op, i)) |
| 35 | if (-2048 <= c->bits.i && c->bits.i < 2048) |
| 36 | break; |
| 37 | r1 = newtmp("isel" , k, fn); |
| 38 | if (KBASE(k) == 1) { |
| 39 | /* load floating points from memory |
| 40 | * slots, they can't be used as |
| 41 | * immediates |
| 42 | */ |
| 43 | assert(c->type == CBits); |
| 44 | n = gasstash(&c->bits, KWIDE(k) ? 8 : 4); |
| 45 | vgrow(&fn->con, ++fn->ncon); |
| 46 | c = &fn->con[fn->ncon-1]; |
| 47 | sprintf(buf, "fp%d" , n); |
| 48 | *c = (Con){.type = CAddr, .local = 1}; |
| 49 | c->label = intern(buf); |
| 50 | emit(Oload, k, r1, CON(c-fn->con), R); |
| 51 | break; |
| 52 | } |
| 53 | emit(Ocopy, k, r1, r0, R); |
| 54 | break; |
| 55 | case RTmp: |
| 56 | if (isreg(r0)) |
| 57 | break; |
| 58 | s = fn->tmp[r0.val].slot; |
| 59 | if (s != -1) { |
| 60 | /* aggregate passed by value on |
| 61 | * stack, or fast local address, |
| 62 | * replace with slot if we can |
| 63 | */ |
| 64 | if (memarg(r, op, i)) { |
| 65 | r1 = SLOT(s); |
| 66 | break; |
| 67 | } |
| 68 | r1 = newtmp("isel" , k, fn); |
| 69 | emit(Oaddr, k, r1, SLOT(s), R); |
| 70 | break; |
| 71 | } |
| 72 | if (k == Kw && fn->tmp[r0.val].cls == Kl) { |
| 73 | /* TODO: this sign extension isn't needed |
| 74 | * for 32-bit arithmetic instructions |
| 75 | */ |
| 76 | r1 = newtmp("isel" , k, fn); |
| 77 | emit(Oextsw, Kl, r1, r0, R); |
| 78 | } else { |
| 79 | assert(k == fn->tmp[r0.val].cls); |
| 80 | } |
| 81 | break; |
| 82 | } |
| 83 | *r = r1; |
| 84 | } |
| 85 | |
| 86 | static void |
| 87 | negate(Ref *pr, Fn *fn) |
| 88 | { |
| 89 | Ref r; |
| 90 | |
| 91 | r = newtmp("isel" , Kw, fn); |
| 92 | emit(Oxor, Kw, *pr, r, getcon(1, fn)); |
| 93 | *pr = r; |
| 94 | } |
| 95 | |
| 96 | static void |
| 97 | selcmp(Ins i, int k, int op, Fn *fn) |
| 98 | { |
| 99 | Ins *icmp; |
| 100 | Ref r, r0, r1; |
| 101 | int sign, swap, neg; |
| 102 | |
| 103 | switch (op) { |
| 104 | case Cieq: |
| 105 | r = newtmp("isel" , k, fn); |
| 106 | emit(Oreqz, i.cls, i.to, r, R); |
| 107 | emit(Oxor, k, r, i.arg[0], i.arg[1]); |
| 108 | icmp = curi; |
| 109 | fixarg(&icmp->arg[0], k, icmp, fn); |
| 110 | fixarg(&icmp->arg[1], k, icmp, fn); |
| 111 | return; |
| 112 | case Cine: |
| 113 | r = newtmp("isel" , k, fn); |
| 114 | emit(Ornez, i.cls, i.to, r, R); |
| 115 | emit(Oxor, k, r, i.arg[0], i.arg[1]); |
| 116 | icmp = curi; |
| 117 | fixarg(&icmp->arg[0], k, icmp, fn); |
| 118 | fixarg(&icmp->arg[1], k, icmp, fn); |
| 119 | return; |
| 120 | case Cisge: sign = 1, swap = 0, neg = 1; break; |
| 121 | case Cisgt: sign = 1, swap = 1, neg = 0; break; |
| 122 | case Cisle: sign = 1, swap = 1, neg = 1; break; |
| 123 | case Cislt: sign = 1, swap = 0, neg = 0; break; |
| 124 | case Ciuge: sign = 0, swap = 0, neg = 1; break; |
| 125 | case Ciugt: sign = 0, swap = 1, neg = 0; break; |
| 126 | case Ciule: sign = 0, swap = 1, neg = 1; break; |
| 127 | case Ciult: sign = 0, swap = 0, neg = 0; break; |
| 128 | case NCmpI+Cfeq: |
| 129 | case NCmpI+Cfge: |
| 130 | case NCmpI+Cfgt: |
| 131 | case NCmpI+Cfle: |
| 132 | case NCmpI+Cflt: |
| 133 | swap = 0, neg = 0; |
| 134 | break; |
| 135 | case NCmpI+Cfuo: |
| 136 | negate(&i.to, fn); |
| 137 | /* fall through */ |
| 138 | case NCmpI+Cfo: |
| 139 | r0 = newtmp("isel" , i.cls, fn); |
| 140 | r1 = newtmp("isel" , i.cls, fn); |
| 141 | emit(Oand, i.cls, i.to, r0, r1); |
| 142 | op = KWIDE(k) ? Oceqd : Oceqs; |
| 143 | emit(op, i.cls, r0, i.arg[0], i.arg[0]); |
| 144 | icmp = curi; |
| 145 | fixarg(&icmp->arg[0], k, icmp, fn); |
| 146 | fixarg(&icmp->arg[1], k, icmp, fn); |
| 147 | emit(op, i.cls, r1, i.arg[1], i.arg[1]); |
| 148 | icmp = curi; |
| 149 | fixarg(&icmp->arg[0], k, icmp, fn); |
| 150 | fixarg(&icmp->arg[1], k, icmp, fn); |
| 151 | return; |
| 152 | case NCmpI+Cfne: |
| 153 | swap = 0, neg = 1; |
| 154 | i.op = KWIDE(k) ? Oceqd : Oceqs; |
| 155 | break; |
| 156 | default: |
| 157 | assert(0 && "unknown comparison" ); |
| 158 | } |
| 159 | if (op < NCmpI) |
| 160 | i.op = sign ? Ocsltl : Ocultl; |
| 161 | if (swap) { |
| 162 | r = i.arg[0]; |
| 163 | i.arg[0] = i.arg[1]; |
| 164 | i.arg[1] = r; |
| 165 | } |
| 166 | if (neg) |
| 167 | negate(&i.to, fn); |
| 168 | emiti(i); |
| 169 | icmp = curi; |
| 170 | fixarg(&icmp->arg[0], k, icmp, fn); |
| 171 | fixarg(&icmp->arg[1], k, icmp, fn); |
| 172 | } |
| 173 | |
| 174 | static void |
| 175 | sel(Ins i, Fn *fn) |
| 176 | { |
| 177 | Ins *i0; |
| 178 | int ck, cc; |
| 179 | |
| 180 | if (INRANGE(i.op, Oalloc, Oalloc1)) { |
| 181 | i0 = curi - 1; |
| 182 | salloc(i.to, i.arg[0], fn); |
| 183 | fixarg(&i0->arg[0], Kl, i0, fn); |
| 184 | return; |
| 185 | } |
| 186 | if (iscmp(i.op, &ck, &cc)) { |
| 187 | selcmp(i, ck, cc, fn); |
| 188 | return; |
| 189 | } |
| 190 | if (i.op != Onop) { |
| 191 | emiti(i); |
| 192 | i0 = curi; /* fixarg() can change curi */ |
| 193 | fixarg(&i0->arg[0], argcls(&i, 0), i0, fn); |
| 194 | fixarg(&i0->arg[1], argcls(&i, 1), i0, fn); |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | static void |
| 199 | seljmp(Blk *b, Fn *fn) |
| 200 | { |
| 201 | /* TODO: replace cmp+jnz with beq/bne/blt[u]/bge[u] */ |
| 202 | if (b->jmp.type == Jjnz) |
| 203 | fixarg(&b->jmp.arg, Kw, 0, fn); |
| 204 | } |
| 205 | |
| 206 | void |
| 207 | rv64_isel(Fn *fn) |
| 208 | { |
| 209 | Blk *b, **sb; |
| 210 | Ins *i; |
| 211 | Phi *p; |
| 212 | uint n; |
| 213 | int al; |
| 214 | int64_t sz; |
| 215 | |
| 216 | /* assign slots to fast allocs */ |
| 217 | b = fn->start; |
| 218 | /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */ |
| 219 | for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2) |
| 220 | for (i=b->ins; i<&b->ins[b->nins]; i++) |
| 221 | if (i->op == al) { |
| 222 | if (rtype(i->arg[0]) != RCon) |
| 223 | break; |
| 224 | sz = fn->con[i->arg[0].val].bits.i; |
| 225 | if (sz < 0 || sz >= INT_MAX-15) |
| 226 | err("invalid alloc size %" PRId64, sz); |
| 227 | sz = (sz + n-1) & -n; |
| 228 | sz /= 4; |
| 229 | if (sz > INT_MAX - fn->slot) |
| 230 | die("alloc too large" ); |
| 231 | fn->tmp[i->to.val].slot = fn->slot; |
| 232 | fn->slot += sz; |
| 233 | *i = (Ins){.op = Onop}; |
| 234 | } |
| 235 | |
| 236 | for (b=fn->start; b; b=b->link) { |
| 237 | curi = &insb[NIns]; |
| 238 | for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) |
| 239 | for (p=(*sb)->phi; p; p=p->link) { |
| 240 | for (n=0; p->blk[n] != b; n++) |
| 241 | assert(n+1 < p->narg); |
| 242 | fixarg(&p->arg[n], p->cls, 0, fn); |
| 243 | } |
| 244 | seljmp(b, fn); |
| 245 | for (i=&b->ins[b->nins]; i!=b->ins;) |
| 246 | sel(*--i, fn); |
| 247 | b->nins = &insb[NIns] - curi; |
| 248 | idup(&b->ins, curi, b->nins); |
| 249 | } |
| 250 | |
| 251 | if (debug['I']) { |
| 252 | fprintf(stderr, "\n> After instruction selection:\n" ); |
| 253 | printfn(fn, stderr); |
| 254 | } |
| 255 | } |
| 256 | |