| 1 | #include "all.h" |
| 2 | #include <stdarg.h> |
| 3 | |
| 4 | typedef struct Bitset Bitset; |
| 5 | typedef struct Vec Vec; |
| 6 | typedef struct Bucket Bucket; |
| 7 | |
| 8 | struct Vec { |
| 9 | ulong mag; |
| 10 | Pool pool; |
| 11 | size_t esz; |
| 12 | ulong cap; |
| 13 | union { |
| 14 | long long ll; |
| 15 | long double ld; |
| 16 | void *ptr; |
| 17 | } align[]; |
| 18 | }; |
| 19 | |
| 20 | struct Bucket { |
| 21 | uint nstr; |
| 22 | char **str; |
| 23 | }; |
| 24 | |
| 25 | enum { |
| 26 | VMin = 2, |
| 27 | VMag = 0xcabba9e, |
| 28 | NPtr = 256, |
| 29 | IBits = 12, |
| 30 | IMask = (1<<IBits) - 1, |
| 31 | }; |
| 32 | |
| 33 | Typ *typ; |
| 34 | Ins insb[NIns], *curi; |
| 35 | |
| 36 | static void *ptr[NPtr]; |
| 37 | static void **pool = ptr; |
| 38 | static int nptr = 1; |
| 39 | |
| 40 | static Bucket itbl[IMask+1]; /* string interning table */ |
| 41 | |
| 42 | uint32_t |
| 43 | hash(char *s) |
| 44 | { |
| 45 | uint32_t h; |
| 46 | |
| 47 | for (h=0; *s; ++s) |
| 48 | h = *s + 17*h; |
| 49 | return h; |
| 50 | } |
| 51 | |
| 52 | void |
| 53 | die_(char *file, char *s, ...) |
| 54 | { |
| 55 | va_list ap; |
| 56 | |
| 57 | fprintf(stderr, "%s: dying: " , file); |
| 58 | va_start(ap, s); |
| 59 | vfprintf(stderr, s, ap); |
| 60 | va_end(ap); |
| 61 | fputc('\n', stderr); |
| 62 | abort(); |
| 63 | } |
| 64 | |
| 65 | void * |
| 66 | emalloc(size_t n) |
| 67 | { |
| 68 | void *p; |
| 69 | |
| 70 | p = calloc(1, n); |
| 71 | if (!p) |
| 72 | die("emalloc, out of memory" ); |
| 73 | return p; |
| 74 | } |
| 75 | |
| 76 | void * |
| 77 | alloc(size_t n) |
| 78 | { |
| 79 | void **pp; |
| 80 | |
| 81 | if (n == 0) |
| 82 | return 0; |
| 83 | if (nptr >= NPtr) { |
| 84 | pp = emalloc(NPtr * sizeof(void *)); |
| 85 | pp[0] = pool; |
| 86 | pool = pp; |
| 87 | nptr = 1; |
| 88 | } |
| 89 | return pool[nptr++] = emalloc(n); |
| 90 | } |
| 91 | |
| 92 | void |
| 93 | freeall() |
| 94 | { |
| 95 | void **pp; |
| 96 | |
| 97 | for (;;) { |
| 98 | for (pp = &pool[1]; pp < &pool[nptr]; pp++) |
| 99 | free(*pp); |
| 100 | pp = pool[0]; |
| 101 | if (!pp) |
| 102 | break; |
| 103 | free(pool); |
| 104 | pool = pp; |
| 105 | nptr = NPtr; |
| 106 | } |
| 107 | nptr = 1; |
| 108 | } |
| 109 | |
| 110 | void * |
| 111 | vnew(ulong len, size_t esz, Pool pool) |
| 112 | { |
| 113 | void *(*f)(size_t); |
| 114 | ulong cap; |
| 115 | Vec *v; |
| 116 | |
| 117 | for (cap=VMin; cap<len; cap*=2) |
| 118 | ; |
| 119 | f = pool == Pheap ? emalloc : alloc; |
| 120 | v = f(cap * esz + sizeof(Vec)); |
| 121 | v->mag = VMag; |
| 122 | v->cap = cap; |
| 123 | v->esz = esz; |
| 124 | v->pool = pool; |
| 125 | return v + 1; |
| 126 | } |
| 127 | |
| 128 | void |
| 129 | vfree(void *p) |
| 130 | { |
| 131 | Vec *v; |
| 132 | |
| 133 | v = (Vec *)p - 1; |
| 134 | assert(v->mag == VMag); |
| 135 | if (v->pool == Pheap) { |
| 136 | v->mag = 0; |
| 137 | free(v); |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | void |
| 142 | vgrow(void *vp, ulong len) |
| 143 | { |
| 144 | Vec *v; |
| 145 | void *v1; |
| 146 | |
| 147 | v = *(Vec **)vp - 1; |
| 148 | assert(v+1 && v->mag == VMag); |
| 149 | if (v->cap >= len) |
| 150 | return; |
| 151 | v1 = vnew(len, v->esz, v->pool); |
| 152 | memcpy(v1, v+1, v->cap * v->esz); |
| 153 | vfree(v+1); |
| 154 | *(Vec **)vp = v1; |
| 155 | } |
| 156 | |
| 157 | uint32_t |
| 158 | intern(char *s) |
| 159 | { |
| 160 | Bucket *b; |
| 161 | uint32_t h; |
| 162 | uint i, n; |
| 163 | |
| 164 | h = hash(s) & IMask; |
| 165 | b = &itbl[h]; |
| 166 | n = b->nstr; |
| 167 | |
| 168 | for (i=0; i<n; i++) |
| 169 | if (strcmp(s, b->str[i]) == 0) |
| 170 | return h + (i<<IBits); |
| 171 | |
| 172 | if (n == 1<<(32-IBits)) |
| 173 | die("interning table overflow" ); |
| 174 | if (n == 0) |
| 175 | b->str = vnew(1, sizeof b->str[0], Pheap); |
| 176 | else if ((n & (n-1)) == 0) |
| 177 | vgrow(&b->str, n+n); |
| 178 | |
| 179 | b->str[n] = emalloc(strlen(s)+1); |
| 180 | b->nstr = n + 1; |
| 181 | strcpy(b->str[n], s); |
| 182 | return h + (n<<IBits); |
| 183 | } |
| 184 | |
| 185 | char * |
| 186 | str(uint32_t id) |
| 187 | { |
| 188 | assert(id>>IBits < itbl[id&IMask].nstr); |
| 189 | return itbl[id&IMask].str[id>>IBits]; |
| 190 | } |
| 191 | |
| 192 | int |
| 193 | isreg(Ref r) |
| 194 | { |
| 195 | return rtype(r) == RTmp && r.val < Tmp0; |
| 196 | } |
| 197 | |
| 198 | int |
| 199 | iscmp(int op, int *pk, int *pc) |
| 200 | { |
| 201 | if (Ocmpw <= op && op <= Ocmpw1) { |
| 202 | *pc = op - Ocmpw; |
| 203 | *pk = Kw; |
| 204 | } |
| 205 | else if (Ocmpl <= op && op <= Ocmpl1) { |
| 206 | *pc = op - Ocmpl; |
| 207 | *pk = Kl; |
| 208 | } |
| 209 | else if (Ocmps <= op && op <= Ocmps1) { |
| 210 | *pc = NCmpI + op - Ocmps; |
| 211 | *pk = Ks; |
| 212 | } |
| 213 | else if (Ocmpd <= op && op <= Ocmpd1) { |
| 214 | *pc = NCmpI + op - Ocmpd; |
| 215 | *pk = Kd; |
| 216 | } |
| 217 | else |
| 218 | return 0; |
| 219 | return 1; |
| 220 | } |
| 221 | |
| 222 | int |
| 223 | argcls(Ins *i, int n) |
| 224 | { |
| 225 | return optab[i->op].argcls[n][i->cls]; |
| 226 | } |
| 227 | |
| 228 | void |
| 229 | emit(int op, int k, Ref to, Ref arg0, Ref arg1) |
| 230 | { |
| 231 | if (curi == insb) |
| 232 | die("emit, too many instructions" ); |
| 233 | *--curi = (Ins){ |
| 234 | .op = op, .cls = k, |
| 235 | .to = to, .arg = {arg0, arg1} |
| 236 | }; |
| 237 | } |
| 238 | |
| 239 | void |
| 240 | emiti(Ins i) |
| 241 | { |
| 242 | emit(i.op, i.cls, i.to, i.arg[0], i.arg[1]); |
| 243 | } |
| 244 | |
| 245 | void |
| 246 | idup(Ins **pd, Ins *s, ulong n) |
| 247 | { |
| 248 | *pd = alloc(n * sizeof(Ins)); |
| 249 | if (n) |
| 250 | memcpy(*pd, s, n * sizeof(Ins)); |
| 251 | } |
| 252 | |
| 253 | Ins * |
| 254 | icpy(Ins *d, Ins *s, ulong n) |
| 255 | { |
| 256 | if (n) |
| 257 | memcpy(d, s, n * sizeof(Ins)); |
| 258 | return d + n; |
| 259 | } |
| 260 | |
| 261 | static int cmptab[][2] ={ |
| 262 | /* negation swap */ |
| 263 | [Ciule] = {Ciugt, Ciuge}, |
| 264 | [Ciult] = {Ciuge, Ciugt}, |
| 265 | [Ciugt] = {Ciule, Ciult}, |
| 266 | [Ciuge] = {Ciult, Ciule}, |
| 267 | [Cisle] = {Cisgt, Cisge}, |
| 268 | [Cislt] = {Cisge, Cisgt}, |
| 269 | [Cisgt] = {Cisle, Cislt}, |
| 270 | [Cisge] = {Cislt, Cisle}, |
| 271 | [Cieq] = {Cine, Cieq}, |
| 272 | [Cine] = {Cieq, Cine}, |
| 273 | [NCmpI+Cfle] = {NCmpI+Cfgt, NCmpI+Cfge}, |
| 274 | [NCmpI+Cflt] = {NCmpI+Cfge, NCmpI+Cfgt}, |
| 275 | [NCmpI+Cfgt] = {NCmpI+Cfle, NCmpI+Cflt}, |
| 276 | [NCmpI+Cfge] = {NCmpI+Cflt, NCmpI+Cfle}, |
| 277 | [NCmpI+Cfeq] = {NCmpI+Cfne, NCmpI+Cfeq}, |
| 278 | [NCmpI+Cfne] = {NCmpI+Cfeq, NCmpI+Cfne}, |
| 279 | [NCmpI+Cfo] = {NCmpI+Cfuo, NCmpI+Cfo}, |
| 280 | [NCmpI+Cfuo] = {NCmpI+Cfo, NCmpI+Cfuo}, |
| 281 | }; |
| 282 | |
| 283 | int |
| 284 | cmpneg(int c) |
| 285 | { |
| 286 | assert(0 <= c && c < NCmp); |
| 287 | return cmptab[c][0]; |
| 288 | } |
| 289 | |
| 290 | int |
| 291 | cmpop(int c) |
| 292 | { |
| 293 | assert(0 <= c && c < NCmp); |
| 294 | return cmptab[c][1]; |
| 295 | } |
| 296 | |
| 297 | int |
| 298 | clsmerge(short *pk, short k) |
| 299 | { |
| 300 | short k1; |
| 301 | |
| 302 | k1 = *pk; |
| 303 | if (k1 == Kx) { |
| 304 | *pk = k; |
| 305 | return 0; |
| 306 | } |
| 307 | if ((k1 == Kw && k == Kl) || (k1 == Kl && k == Kw)) { |
| 308 | *pk = Kw; |
| 309 | return 0; |
| 310 | } |
| 311 | return k1 != k; |
| 312 | } |
| 313 | |
| 314 | int |
| 315 | phicls(int t, Tmp *tmp) |
| 316 | { |
| 317 | int t1; |
| 318 | |
| 319 | t1 = tmp[t].phi; |
| 320 | if (!t1) |
| 321 | return t; |
| 322 | t1 = phicls(t1, tmp); |
| 323 | tmp[t].phi = t1; |
| 324 | return t1; |
| 325 | } |
| 326 | |
| 327 | Ref |
| 328 | newtmp(char *prfx, int k, Fn *fn) |
| 329 | { |
| 330 | static int n; |
| 331 | int t; |
| 332 | |
| 333 | t = fn->ntmp++; |
| 334 | vgrow(&fn->tmp, fn->ntmp); |
| 335 | memset(&fn->tmp[t], 0, sizeof(Tmp)); |
| 336 | if (prfx) |
| 337 | sprintf(fn->tmp[t].name, "%s.%d" , prfx, ++n); |
| 338 | fn->tmp[t].cls = k; |
| 339 | fn->tmp[t].slot = -1; |
| 340 | fn->tmp[t].nuse = +1; |
| 341 | fn->tmp[t].ndef = +1; |
| 342 | return TMP(t); |
| 343 | } |
| 344 | |
| 345 | void |
| 346 | chuse(Ref r, int du, Fn *fn) |
| 347 | { |
| 348 | if (rtype(r) == RTmp) |
| 349 | fn->tmp[r.val].nuse += du; |
| 350 | } |
| 351 | |
| 352 | Ref |
| 353 | newcon(Con *c0, Fn *fn) |
| 354 | { |
| 355 | Con *c1; |
| 356 | int i; |
| 357 | |
| 358 | for (i=0; i<fn->ncon; i++) { |
| 359 | c1 = &fn->con[i]; |
| 360 | if (c0->type == c1->type |
| 361 | && c0->bits.i == c1->bits.i |
| 362 | && c0->label == c1->label |
| 363 | && c0->local == c1->local) |
| 364 | return CON(i); |
| 365 | } |
| 366 | vgrow(&fn->con, ++fn->ncon); |
| 367 | fn->con[i] = *c0; |
| 368 | return CON(i); |
| 369 | } |
| 370 | |
| 371 | Ref |
| 372 | getcon(int64_t val, Fn *fn) |
| 373 | { |
| 374 | int c; |
| 375 | |
| 376 | for (c=0; c<fn->ncon; c++) |
| 377 | if (fn->con[c].type == CBits && fn->con[c].bits.i == val) |
| 378 | return CON(c); |
| 379 | vgrow(&fn->con, ++fn->ncon); |
| 380 | fn->con[c] = (Con){.type = CBits, .bits.i = val}; |
| 381 | return CON(c); |
| 382 | } |
| 383 | |
| 384 | int |
| 385 | addcon(Con *c0, Con *c1) |
| 386 | { |
| 387 | if (c0->type == CUndef) |
| 388 | *c0 = *c1; |
| 389 | else { |
| 390 | if (c1->type == CAddr) { |
| 391 | if (c0->type == CAddr) |
| 392 | return 0; |
| 393 | c0->type = CAddr; |
| 394 | c0->label = c1->label; |
| 395 | } |
| 396 | c0->bits.i += c1->bits.i; |
| 397 | } |
| 398 | return 1; |
| 399 | } |
| 400 | |
| 401 | void |
| 402 | blit(Ref rdst, uint doff, Ref rsrc, uint boff, uint sz, Fn *fn) |
| 403 | { |
| 404 | struct { int st, ld, cls, size; } *p, tbl[] = { |
| 405 | { Ostorel, Oload, Kl, 8 }, |
| 406 | { Ostorew, Oload, Kw, 4 }, |
| 407 | { Ostoreh, Oloaduh, Kw, 2 }, |
| 408 | { Ostoreb, Oloadub, Kw, 1 } |
| 409 | }; |
| 410 | Ref r, r1; |
| 411 | uint s; |
| 412 | |
| 413 | for (p=tbl; sz; p++) |
| 414 | for (s=p->size; sz>=s; sz-=s, doff+=s, boff+=s) { |
| 415 | r = newtmp("blt" , Kl, fn); |
| 416 | r1 = newtmp("blt" , Kl, fn); |
| 417 | emit(p->st, 0, R, r, r1); |
| 418 | emit(Oadd, Kl, r1, rdst, getcon(doff, fn)); |
| 419 | r1 = newtmp("blt" , Kl, fn); |
| 420 | emit(p->ld, p->cls, r, r1, R); |
| 421 | emit(Oadd, Kl, r1, rsrc, getcon(boff, fn)); |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | void |
| 426 | blit0(Ref rdst, Ref rsrc, uint sz, Fn *fn) |
| 427 | { |
| 428 | blit(rdst, 0, rsrc, 0, sz, fn); |
| 429 | } |
| 430 | |
| 431 | void |
| 432 | salloc(Ref rt, Ref rs, Fn *fn) |
| 433 | { |
| 434 | Ref r0, r1; |
| 435 | int64_t sz; |
| 436 | |
| 437 | /* we need to make sure |
| 438 | * the stack remains aligned |
| 439 | * (rsp = 0) mod 16 |
| 440 | */ |
| 441 | fn->dynalloc = 1; |
| 442 | if (rtype(rs) == RCon) { |
| 443 | sz = fn->con[rs.val].bits.i; |
| 444 | if (sz < 0 || sz >= INT_MAX-15) |
| 445 | err("invalid alloc size %" PRId64, sz); |
| 446 | sz = (sz + 15) & -16; |
| 447 | emit(Osalloc, Kl, rt, getcon(sz, fn), R); |
| 448 | } else { |
| 449 | /* r0 = (r + 15) & -16 */ |
| 450 | r0 = newtmp("isel" , Kl, fn); |
| 451 | r1 = newtmp("isel" , Kl, fn); |
| 452 | emit(Osalloc, Kl, rt, r0, R); |
| 453 | emit(Oand, Kl, r0, r1, getcon(-16, fn)); |
| 454 | emit(Oadd, Kl, r1, rs, getcon(15, fn)); |
| 455 | if (fn->tmp[rs.val].slot != -1) |
| 456 | err("unlikely alloc argument %%%s for %%%s" , |
| 457 | fn->tmp[rs.val].name, fn->tmp[rt.val].name); |
| 458 | } |
| 459 | } |
| 460 | |
| 461 | void |
| 462 | bsinit(BSet *bs, uint n) |
| 463 | { |
| 464 | n = (n + NBit-1) / NBit; |
| 465 | bs->nt = n; |
| 466 | bs->t = alloc(n * sizeof bs->t[0]); |
| 467 | } |
| 468 | |
| 469 | MAKESURE(NBit_is_64, NBit == 64); |
| 470 | inline static uint |
| 471 | popcnt(bits b) |
| 472 | { |
| 473 | b = (b & 0x5555555555555555) + ((b>>1) & 0x5555555555555555); |
| 474 | b = (b & 0x3333333333333333) + ((b>>2) & 0x3333333333333333); |
| 475 | b = (b & 0x0f0f0f0f0f0f0f0f) + ((b>>4) & 0x0f0f0f0f0f0f0f0f); |
| 476 | b += (b>>8); |
| 477 | b += (b>>16); |
| 478 | b += (b>>32); |
| 479 | return b & 0xff; |
| 480 | } |
| 481 | |
| 482 | inline static int |
| 483 | firstbit(bits b) |
| 484 | { |
| 485 | int n; |
| 486 | |
| 487 | n = 0; |
| 488 | if (!(b & 0xffffffff)) { |
| 489 | n += 32; |
| 490 | b >>= 32; |
| 491 | } |
| 492 | if (!(b & 0xffff)) { |
| 493 | n += 16; |
| 494 | b >>= 16; |
| 495 | } |
| 496 | if (!(b & 0xff)) { |
| 497 | n += 8; |
| 498 | b >>= 8; |
| 499 | } |
| 500 | if (!(b & 0xf)) { |
| 501 | n += 4; |
| 502 | b >>= 4; |
| 503 | } |
| 504 | n += (char[16]){4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0}[b & 0xf]; |
| 505 | return n; |
| 506 | } |
| 507 | |
| 508 | uint |
| 509 | bscount(BSet *bs) |
| 510 | { |
| 511 | uint i, n; |
| 512 | |
| 513 | n = 0; |
| 514 | for (i=0; i<bs->nt; i++) |
| 515 | n += popcnt(bs->t[i]); |
| 516 | return n; |
| 517 | } |
| 518 | |
| 519 | static inline uint |
| 520 | bsmax(BSet *bs) |
| 521 | { |
| 522 | return bs->nt * NBit; |
| 523 | } |
| 524 | |
| 525 | void |
| 526 | bsset(BSet *bs, uint elt) |
| 527 | { |
| 528 | assert(elt < bsmax(bs)); |
| 529 | bs->t[elt/NBit] |= BIT(elt%NBit); |
| 530 | } |
| 531 | |
| 532 | void |
| 533 | bsclr(BSet *bs, uint elt) |
| 534 | { |
| 535 | assert(elt < bsmax(bs)); |
| 536 | bs->t[elt/NBit] &= ~BIT(elt%NBit); |
| 537 | } |
| 538 | |
| 539 | #define BSOP(f, op) \ |
| 540 | void \ |
| 541 | f(BSet *a, BSet *b) \ |
| 542 | { \ |
| 543 | uint i; \ |
| 544 | \ |
| 545 | assert(a->nt == b->nt); \ |
| 546 | for (i=0; i<a->nt; i++) \ |
| 547 | a->t[i] op b->t[i]; \ |
| 548 | } |
| 549 | |
| 550 | BSOP(bscopy, =) |
| 551 | BSOP(bsunion, |=) |
| 552 | BSOP(bsinter, &=) |
| 553 | BSOP(bsdiff, &= ~) |
| 554 | |
| 555 | int |
| 556 | bsequal(BSet *a, BSet *b) |
| 557 | { |
| 558 | uint i; |
| 559 | |
| 560 | assert(a->nt == b->nt); |
| 561 | for (i=0; i<a->nt; i++) |
| 562 | if (a->t[i] != b->t[i]) |
| 563 | return 0; |
| 564 | return 1; |
| 565 | } |
| 566 | |
| 567 | void |
| 568 | bszero(BSet *bs) |
| 569 | { |
| 570 | memset(bs->t, 0, bs->nt * sizeof bs->t[0]); |
| 571 | } |
| 572 | |
| 573 | /* iterates on a bitset, use as follows |
| 574 | * |
| 575 | * for (i=0; bsiter(set, &i); i++) |
| 576 | * use(i); |
| 577 | * |
| 578 | */ |
| 579 | int |
| 580 | bsiter(BSet *bs, int *elt) |
| 581 | { |
| 582 | bits b; |
| 583 | uint t, i; |
| 584 | |
| 585 | i = *elt; |
| 586 | t = i/NBit; |
| 587 | if (t >= bs->nt) |
| 588 | return 0; |
| 589 | b = bs->t[t]; |
| 590 | b &= ~(BIT(i%NBit) - 1); |
| 591 | while (!b) { |
| 592 | ++t; |
| 593 | if (t >= bs->nt) |
| 594 | return 0; |
| 595 | b = bs->t[t]; |
| 596 | } |
| 597 | *elt = NBit*t + firstbit(b); |
| 598 | return 1; |
| 599 | } |
| 600 | |
| 601 | void |
| 602 | dumpts(BSet *bs, Tmp *tmp, FILE *f) |
| 603 | { |
| 604 | int t; |
| 605 | |
| 606 | fprintf(f, "[" ); |
| 607 | for (t=Tmp0; bsiter(bs, &t); t++) |
| 608 | fprintf(f, " %s" , tmp[t].name); |
| 609 | fprintf(f, " ]\n" ); |
| 610 | } |
| 611 | |