1 | #include "all.h" |
2 | #include <stdarg.h> |
3 | |
4 | typedef struct Bitset Bitset; |
5 | typedef struct Vec Vec; |
6 | typedef struct Bucket Bucket; |
7 | |
8 | struct Vec { |
9 | ulong mag; |
10 | Pool pool; |
11 | size_t esz; |
12 | ulong cap; |
13 | union { |
14 | long long ll; |
15 | long double ld; |
16 | void *ptr; |
17 | } align[]; |
18 | }; |
19 | |
20 | struct Bucket { |
21 | uint nstr; |
22 | char **str; |
23 | }; |
24 | |
25 | enum { |
26 | VMin = 2, |
27 | VMag = 0xcabba9e, |
28 | NPtr = 256, |
29 | IBits = 12, |
30 | IMask = (1<<IBits) - 1, |
31 | }; |
32 | |
33 | Typ *typ; |
34 | Ins insb[NIns], *curi; |
35 | |
36 | static void *ptr[NPtr]; |
37 | static void **pool = ptr; |
38 | static int nptr = 1; |
39 | |
40 | static Bucket itbl[IMask+1]; /* string interning table */ |
41 | |
42 | uint32_t |
43 | hash(char *s) |
44 | { |
45 | uint32_t h; |
46 | |
47 | for (h=0; *s; ++s) |
48 | h = *s + 17*h; |
49 | return h; |
50 | } |
51 | |
52 | void |
53 | die_(char *file, char *s, ...) |
54 | { |
55 | va_list ap; |
56 | |
57 | fprintf(stderr, "%s: dying: " , file); |
58 | va_start(ap, s); |
59 | vfprintf(stderr, s, ap); |
60 | va_end(ap); |
61 | fputc('\n', stderr); |
62 | abort(); |
63 | } |
64 | |
65 | void * |
66 | emalloc(size_t n) |
67 | { |
68 | void *p; |
69 | |
70 | p = calloc(1, n); |
71 | if (!p) |
72 | die("emalloc, out of memory" ); |
73 | return p; |
74 | } |
75 | |
76 | void * |
77 | alloc(size_t n) |
78 | { |
79 | void **pp; |
80 | |
81 | if (n == 0) |
82 | return 0; |
83 | if (nptr >= NPtr) { |
84 | pp = emalloc(NPtr * sizeof(void *)); |
85 | pp[0] = pool; |
86 | pool = pp; |
87 | nptr = 1; |
88 | } |
89 | return pool[nptr++] = emalloc(n); |
90 | } |
91 | |
92 | void |
93 | freeall() |
94 | { |
95 | void **pp; |
96 | |
97 | for (;;) { |
98 | for (pp = &pool[1]; pp < &pool[nptr]; pp++) |
99 | free(*pp); |
100 | pp = pool[0]; |
101 | if (!pp) |
102 | break; |
103 | free(pool); |
104 | pool = pp; |
105 | nptr = NPtr; |
106 | } |
107 | nptr = 1; |
108 | } |
109 | |
110 | void * |
111 | vnew(ulong len, size_t esz, Pool pool) |
112 | { |
113 | void *(*f)(size_t); |
114 | ulong cap; |
115 | Vec *v; |
116 | |
117 | for (cap=VMin; cap<len; cap*=2) |
118 | ; |
119 | f = pool == Pheap ? emalloc : alloc; |
120 | v = f(cap * esz + sizeof(Vec)); |
121 | v->mag = VMag; |
122 | v->cap = cap; |
123 | v->esz = esz; |
124 | v->pool = pool; |
125 | return v + 1; |
126 | } |
127 | |
128 | void |
129 | vfree(void *p) |
130 | { |
131 | Vec *v; |
132 | |
133 | v = (Vec *)p - 1; |
134 | assert(v->mag == VMag); |
135 | if (v->pool == Pheap) { |
136 | v->mag = 0; |
137 | free(v); |
138 | } |
139 | } |
140 | |
141 | void |
142 | vgrow(void *vp, ulong len) |
143 | { |
144 | Vec *v; |
145 | void *v1; |
146 | |
147 | v = *(Vec **)vp - 1; |
148 | assert(v+1 && v->mag == VMag); |
149 | if (v->cap >= len) |
150 | return; |
151 | v1 = vnew(len, v->esz, v->pool); |
152 | memcpy(v1, v+1, v->cap * v->esz); |
153 | vfree(v+1); |
154 | *(Vec **)vp = v1; |
155 | } |
156 | |
157 | uint32_t |
158 | intern(char *s) |
159 | { |
160 | Bucket *b; |
161 | uint32_t h; |
162 | uint i, n; |
163 | |
164 | h = hash(s) & IMask; |
165 | b = &itbl[h]; |
166 | n = b->nstr; |
167 | |
168 | for (i=0; i<n; i++) |
169 | if (strcmp(s, b->str[i]) == 0) |
170 | return h + (i<<IBits); |
171 | |
172 | if (n == 1<<(32-IBits)) |
173 | die("interning table overflow" ); |
174 | if (n == 0) |
175 | b->str = vnew(1, sizeof b->str[0], Pheap); |
176 | else if ((n & (n-1)) == 0) |
177 | vgrow(&b->str, n+n); |
178 | |
179 | b->str[n] = emalloc(strlen(s)+1); |
180 | b->nstr = n + 1; |
181 | strcpy(b->str[n], s); |
182 | return h + (n<<IBits); |
183 | } |
184 | |
185 | char * |
186 | str(uint32_t id) |
187 | { |
188 | assert(id>>IBits < itbl[id&IMask].nstr); |
189 | return itbl[id&IMask].str[id>>IBits]; |
190 | } |
191 | |
192 | int |
193 | isreg(Ref r) |
194 | { |
195 | return rtype(r) == RTmp && r.val < Tmp0; |
196 | } |
197 | |
198 | int |
199 | iscmp(int op, int *pk, int *pc) |
200 | { |
201 | if (Ocmpw <= op && op <= Ocmpw1) { |
202 | *pc = op - Ocmpw; |
203 | *pk = Kw; |
204 | } |
205 | else if (Ocmpl <= op && op <= Ocmpl1) { |
206 | *pc = op - Ocmpl; |
207 | *pk = Kl; |
208 | } |
209 | else if (Ocmps <= op && op <= Ocmps1) { |
210 | *pc = NCmpI + op - Ocmps; |
211 | *pk = Ks; |
212 | } |
213 | else if (Ocmpd <= op && op <= Ocmpd1) { |
214 | *pc = NCmpI + op - Ocmpd; |
215 | *pk = Kd; |
216 | } |
217 | else |
218 | return 0; |
219 | return 1; |
220 | } |
221 | |
222 | int |
223 | argcls(Ins *i, int n) |
224 | { |
225 | return optab[i->op].argcls[n][i->cls]; |
226 | } |
227 | |
228 | void |
229 | emit(int op, int k, Ref to, Ref arg0, Ref arg1) |
230 | { |
231 | if (curi == insb) |
232 | die("emit, too many instructions" ); |
233 | *--curi = (Ins){ |
234 | .op = op, .cls = k, |
235 | .to = to, .arg = {arg0, arg1} |
236 | }; |
237 | } |
238 | |
239 | void |
240 | emiti(Ins i) |
241 | { |
242 | emit(i.op, i.cls, i.to, i.arg[0], i.arg[1]); |
243 | } |
244 | |
245 | void |
246 | idup(Ins **pd, Ins *s, ulong n) |
247 | { |
248 | *pd = alloc(n * sizeof(Ins)); |
249 | if (n) |
250 | memcpy(*pd, s, n * sizeof(Ins)); |
251 | } |
252 | |
253 | Ins * |
254 | icpy(Ins *d, Ins *s, ulong n) |
255 | { |
256 | if (n) |
257 | memcpy(d, s, n * sizeof(Ins)); |
258 | return d + n; |
259 | } |
260 | |
261 | static int cmptab[][2] ={ |
262 | /* negation swap */ |
263 | [Ciule] = {Ciugt, Ciuge}, |
264 | [Ciult] = {Ciuge, Ciugt}, |
265 | [Ciugt] = {Ciule, Ciult}, |
266 | [Ciuge] = {Ciult, Ciule}, |
267 | [Cisle] = {Cisgt, Cisge}, |
268 | [Cislt] = {Cisge, Cisgt}, |
269 | [Cisgt] = {Cisle, Cislt}, |
270 | [Cisge] = {Cislt, Cisle}, |
271 | [Cieq] = {Cine, Cieq}, |
272 | [Cine] = {Cieq, Cine}, |
273 | [NCmpI+Cfle] = {NCmpI+Cfgt, NCmpI+Cfge}, |
274 | [NCmpI+Cflt] = {NCmpI+Cfge, NCmpI+Cfgt}, |
275 | [NCmpI+Cfgt] = {NCmpI+Cfle, NCmpI+Cflt}, |
276 | [NCmpI+Cfge] = {NCmpI+Cflt, NCmpI+Cfle}, |
277 | [NCmpI+Cfeq] = {NCmpI+Cfne, NCmpI+Cfeq}, |
278 | [NCmpI+Cfne] = {NCmpI+Cfeq, NCmpI+Cfne}, |
279 | [NCmpI+Cfo] = {NCmpI+Cfuo, NCmpI+Cfo}, |
280 | [NCmpI+Cfuo] = {NCmpI+Cfo, NCmpI+Cfuo}, |
281 | }; |
282 | |
283 | int |
284 | cmpneg(int c) |
285 | { |
286 | assert(0 <= c && c < NCmp); |
287 | return cmptab[c][0]; |
288 | } |
289 | |
290 | int |
291 | cmpop(int c) |
292 | { |
293 | assert(0 <= c && c < NCmp); |
294 | return cmptab[c][1]; |
295 | } |
296 | |
297 | int |
298 | clsmerge(short *pk, short k) |
299 | { |
300 | short k1; |
301 | |
302 | k1 = *pk; |
303 | if (k1 == Kx) { |
304 | *pk = k; |
305 | return 0; |
306 | } |
307 | if ((k1 == Kw && k == Kl) || (k1 == Kl && k == Kw)) { |
308 | *pk = Kw; |
309 | return 0; |
310 | } |
311 | return k1 != k; |
312 | } |
313 | |
314 | int |
315 | phicls(int t, Tmp *tmp) |
316 | { |
317 | int t1; |
318 | |
319 | t1 = tmp[t].phi; |
320 | if (!t1) |
321 | return t; |
322 | t1 = phicls(t1, tmp); |
323 | tmp[t].phi = t1; |
324 | return t1; |
325 | } |
326 | |
327 | Ref |
328 | newtmp(char *prfx, int k, Fn *fn) |
329 | { |
330 | static int n; |
331 | int t; |
332 | |
333 | t = fn->ntmp++; |
334 | vgrow(&fn->tmp, fn->ntmp); |
335 | memset(&fn->tmp[t], 0, sizeof(Tmp)); |
336 | if (prfx) |
337 | sprintf(fn->tmp[t].name, "%s.%d" , prfx, ++n); |
338 | fn->tmp[t].cls = k; |
339 | fn->tmp[t].slot = -1; |
340 | fn->tmp[t].nuse = +1; |
341 | fn->tmp[t].ndef = +1; |
342 | return TMP(t); |
343 | } |
344 | |
345 | void |
346 | chuse(Ref r, int du, Fn *fn) |
347 | { |
348 | if (rtype(r) == RTmp) |
349 | fn->tmp[r.val].nuse += du; |
350 | } |
351 | |
352 | Ref |
353 | newcon(Con *c0, Fn *fn) |
354 | { |
355 | Con *c1; |
356 | int i; |
357 | |
358 | for (i=0; i<fn->ncon; i++) { |
359 | c1 = &fn->con[i]; |
360 | if (c0->type == c1->type |
361 | && c0->bits.i == c1->bits.i |
362 | && c0->label == c1->label |
363 | && c0->local == c1->local) |
364 | return CON(i); |
365 | } |
366 | vgrow(&fn->con, ++fn->ncon); |
367 | fn->con[i] = *c0; |
368 | return CON(i); |
369 | } |
370 | |
371 | Ref |
372 | getcon(int64_t val, Fn *fn) |
373 | { |
374 | int c; |
375 | |
376 | for (c=0; c<fn->ncon; c++) |
377 | if (fn->con[c].type == CBits && fn->con[c].bits.i == val) |
378 | return CON(c); |
379 | vgrow(&fn->con, ++fn->ncon); |
380 | fn->con[c] = (Con){.type = CBits, .bits.i = val}; |
381 | return CON(c); |
382 | } |
383 | |
384 | int |
385 | addcon(Con *c0, Con *c1) |
386 | { |
387 | if (c0->type == CUndef) |
388 | *c0 = *c1; |
389 | else { |
390 | if (c1->type == CAddr) { |
391 | if (c0->type == CAddr) |
392 | return 0; |
393 | c0->type = CAddr; |
394 | c0->label = c1->label; |
395 | } |
396 | c0->bits.i += c1->bits.i; |
397 | } |
398 | return 1; |
399 | } |
400 | |
401 | void |
402 | blit(Ref rdst, uint doff, Ref rsrc, uint boff, uint sz, Fn *fn) |
403 | { |
404 | struct { int st, ld, cls, size; } *p, tbl[] = { |
405 | { Ostorel, Oload, Kl, 8 }, |
406 | { Ostorew, Oload, Kw, 4 }, |
407 | { Ostoreh, Oloaduh, Kw, 2 }, |
408 | { Ostoreb, Oloadub, Kw, 1 } |
409 | }; |
410 | Ref r, r1; |
411 | uint s; |
412 | |
413 | for (p=tbl; sz; p++) |
414 | for (s=p->size; sz>=s; sz-=s, doff+=s, boff+=s) { |
415 | r = newtmp("blt" , Kl, fn); |
416 | r1 = newtmp("blt" , Kl, fn); |
417 | emit(p->st, 0, R, r, r1); |
418 | emit(Oadd, Kl, r1, rdst, getcon(doff, fn)); |
419 | r1 = newtmp("blt" , Kl, fn); |
420 | emit(p->ld, p->cls, r, r1, R); |
421 | emit(Oadd, Kl, r1, rsrc, getcon(boff, fn)); |
422 | } |
423 | } |
424 | |
425 | void |
426 | blit0(Ref rdst, Ref rsrc, uint sz, Fn *fn) |
427 | { |
428 | blit(rdst, 0, rsrc, 0, sz, fn); |
429 | } |
430 | |
431 | void |
432 | salloc(Ref rt, Ref rs, Fn *fn) |
433 | { |
434 | Ref r0, r1; |
435 | int64_t sz; |
436 | |
437 | /* we need to make sure |
438 | * the stack remains aligned |
439 | * (rsp = 0) mod 16 |
440 | */ |
441 | fn->dynalloc = 1; |
442 | if (rtype(rs) == RCon) { |
443 | sz = fn->con[rs.val].bits.i; |
444 | if (sz < 0 || sz >= INT_MAX-15) |
445 | err("invalid alloc size %" PRId64, sz); |
446 | sz = (sz + 15) & -16; |
447 | emit(Osalloc, Kl, rt, getcon(sz, fn), R); |
448 | } else { |
449 | /* r0 = (r + 15) & -16 */ |
450 | r0 = newtmp("isel" , Kl, fn); |
451 | r1 = newtmp("isel" , Kl, fn); |
452 | emit(Osalloc, Kl, rt, r0, R); |
453 | emit(Oand, Kl, r0, r1, getcon(-16, fn)); |
454 | emit(Oadd, Kl, r1, rs, getcon(15, fn)); |
455 | if (fn->tmp[rs.val].slot != -1) |
456 | err("unlikely alloc argument %%%s for %%%s" , |
457 | fn->tmp[rs.val].name, fn->tmp[rt.val].name); |
458 | } |
459 | } |
460 | |
461 | void |
462 | bsinit(BSet *bs, uint n) |
463 | { |
464 | n = (n + NBit-1) / NBit; |
465 | bs->nt = n; |
466 | bs->t = alloc(n * sizeof bs->t[0]); |
467 | } |
468 | |
469 | MAKESURE(NBit_is_64, NBit == 64); |
470 | inline static uint |
471 | popcnt(bits b) |
472 | { |
473 | b = (b & 0x5555555555555555) + ((b>>1) & 0x5555555555555555); |
474 | b = (b & 0x3333333333333333) + ((b>>2) & 0x3333333333333333); |
475 | b = (b & 0x0f0f0f0f0f0f0f0f) + ((b>>4) & 0x0f0f0f0f0f0f0f0f); |
476 | b += (b>>8); |
477 | b += (b>>16); |
478 | b += (b>>32); |
479 | return b & 0xff; |
480 | } |
481 | |
482 | inline static int |
483 | firstbit(bits b) |
484 | { |
485 | int n; |
486 | |
487 | n = 0; |
488 | if (!(b & 0xffffffff)) { |
489 | n += 32; |
490 | b >>= 32; |
491 | } |
492 | if (!(b & 0xffff)) { |
493 | n += 16; |
494 | b >>= 16; |
495 | } |
496 | if (!(b & 0xff)) { |
497 | n += 8; |
498 | b >>= 8; |
499 | } |
500 | if (!(b & 0xf)) { |
501 | n += 4; |
502 | b >>= 4; |
503 | } |
504 | n += (char[16]){4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0}[b & 0xf]; |
505 | return n; |
506 | } |
507 | |
508 | uint |
509 | bscount(BSet *bs) |
510 | { |
511 | uint i, n; |
512 | |
513 | n = 0; |
514 | for (i=0; i<bs->nt; i++) |
515 | n += popcnt(bs->t[i]); |
516 | return n; |
517 | } |
518 | |
519 | static inline uint |
520 | bsmax(BSet *bs) |
521 | { |
522 | return bs->nt * NBit; |
523 | } |
524 | |
525 | void |
526 | bsset(BSet *bs, uint elt) |
527 | { |
528 | assert(elt < bsmax(bs)); |
529 | bs->t[elt/NBit] |= BIT(elt%NBit); |
530 | } |
531 | |
532 | void |
533 | bsclr(BSet *bs, uint elt) |
534 | { |
535 | assert(elt < bsmax(bs)); |
536 | bs->t[elt/NBit] &= ~BIT(elt%NBit); |
537 | } |
538 | |
539 | #define BSOP(f, op) \ |
540 | void \ |
541 | f(BSet *a, BSet *b) \ |
542 | { \ |
543 | uint i; \ |
544 | \ |
545 | assert(a->nt == b->nt); \ |
546 | for (i=0; i<a->nt; i++) \ |
547 | a->t[i] op b->t[i]; \ |
548 | } |
549 | |
550 | BSOP(bscopy, =) |
551 | BSOP(bsunion, |=) |
552 | BSOP(bsinter, &=) |
553 | BSOP(bsdiff, &= ~) |
554 | |
555 | int |
556 | bsequal(BSet *a, BSet *b) |
557 | { |
558 | uint i; |
559 | |
560 | assert(a->nt == b->nt); |
561 | for (i=0; i<a->nt; i++) |
562 | if (a->t[i] != b->t[i]) |
563 | return 0; |
564 | return 1; |
565 | } |
566 | |
567 | void |
568 | bszero(BSet *bs) |
569 | { |
570 | memset(bs->t, 0, bs->nt * sizeof bs->t[0]); |
571 | } |
572 | |
573 | /* iterates on a bitset, use as follows |
574 | * |
575 | * for (i=0; bsiter(set, &i); i++) |
576 | * use(i); |
577 | * |
578 | */ |
579 | int |
580 | bsiter(BSet *bs, int *elt) |
581 | { |
582 | bits b; |
583 | uint t, i; |
584 | |
585 | i = *elt; |
586 | t = i/NBit; |
587 | if (t >= bs->nt) |
588 | return 0; |
589 | b = bs->t[t]; |
590 | b &= ~(BIT(i%NBit) - 1); |
591 | while (!b) { |
592 | ++t; |
593 | if (t >= bs->nt) |
594 | return 0; |
595 | b = bs->t[t]; |
596 | } |
597 | *elt = NBit*t + firstbit(b); |
598 | return 1; |
599 | } |
600 | |
601 | void |
602 | dumpts(BSet *bs, Tmp *tmp, FILE *f) |
603 | { |
604 | int t; |
605 | |
606 | fprintf(f, "[" ); |
607 | for (t=Tmp0; bsiter(bs, &t); t++) |
608 | fprintf(f, " %s" , tmp[t].name); |
609 | fprintf(f, " ]\n" ); |
610 | } |
611 | |