1#include "all.h"
2
3typedef struct Class Class;
4typedef struct Insl Insl;
5typedef struct Params Params;
6
7enum {
8 Cstk = 1, /* pass on the stack */
9 Cptr = 2, /* replaced by a pointer */
10};
11
12struct Class {
13 char class;
14 char ishfa;
15 struct {
16 char base;
17 uchar size;
18 } hfa;
19 uint size;
20 Typ *t;
21 uchar nreg;
22 uchar ngp;
23 uchar nfp;
24 int reg[4];
25 int cls[4];
26};
27
28struct Insl {
29 Ins i;
30 Insl *link;
31};
32
33struct Params {
34 uint ngp;
35 uint nfp;
36 uint nstk;
37};
38
39static int gpreg[12] = {R0, R1, R2, R3, R4, R5, R6, R7};
40static int fpreg[12] = {V0, V1, V2, V3, V4, V5, V6, V7};
41
42/* layout of call's second argument (RCall)
43 *
44 * 13
45 * 29 14 | 9 5 2 0
46 * |0.00|x|x|xxxx|xxxx|xxx|xx| range
47 * | | | | | ` gp regs returned (0..2)
48 * | | | | ` fp regs returned (0..4)
49 * | | | ` gp regs passed (0..8)
50 * | | ` fp regs passed (0..8)
51 * | ` indirect result register x8 used (0..1)
52 * ` env pointer passed in x9 (0..1)
53 */
54
55static int
56isfloatv(Typ *t, char *cls)
57{
58 Field *f;
59 uint n;
60
61 for (n=0; n<t->nunion; n++)
62 for (f=t->fields[n]; f->type != FEnd; f++)
63 switch (f->type) {
64 case Fs:
65 if (*cls == Kd)
66 return 0;
67 *cls = Ks;
68 break;
69 case Fd:
70 if (*cls == Ks)
71 return 0;
72 *cls = Kd;
73 break;
74 case FTyp:
75 if (isfloatv(&typ[f->len], cls))
76 break;
77 /* fall through */
78 default:
79 return 0;
80 }
81 return 1;
82}
83
84static void
85typclass(Class *c, Typ *t, int *gp, int *fp)
86{
87 uint64_t sz;
88 uint n;
89
90 sz = (t->size + 7) & -8;
91 c->t = t;
92 c->class = 0;
93 c->ngp = 0;
94 c->nfp = 0;
95
96 if (t->align > 4)
97 err("alignments larger than 16 are not supported");
98
99 if (t->isdark || sz > 16 || sz == 0) {
100 /* large structs are replaced by a
101 * pointer to some caller-allocated
102 * memory */
103 c->class |= Cptr;
104 c->size = 8;
105 c->ngp = 1;
106 *c->reg = *gp;
107 *c->cls = Kl;
108 return;
109 }
110
111 c->size = sz;
112 c->hfa.base = Kx;
113 c->ishfa = isfloatv(t, &c->hfa.base);
114 c->hfa.size = t->size/(KWIDE(c->hfa.base) ? 8 : 4);
115
116 if (c->ishfa)
117 for (n=0; n<c->hfa.size; n++, c->nfp++) {
118 c->reg[n] = *fp++;
119 c->cls[n] = c->hfa.base;
120 }
121 else
122 for (n=0; n<sz/8; n++, c->ngp++) {
123 c->reg[n] = *gp++;
124 c->cls[n] = Kl;
125 }
126
127 c->nreg = n;
128}
129
130static void
131sttmps(Ref tmp[], int cls[], uint nreg, Ref mem, Fn *fn)
132{
133 static int st[] = {
134 [Kw] = Ostorew, [Kl] = Ostorel,
135 [Ks] = Ostores, [Kd] = Ostored
136 };
137 uint n;
138 uint64_t off;
139 Ref r;
140
141 assert(nreg <= 4);
142 off = 0;
143 for (n=0; n<nreg; n++) {
144 tmp[n] = newtmp("abi", cls[n], fn);
145 r = newtmp("abi", Kl, fn);
146 emit(st[cls[n]], 0, R, tmp[n], r);
147 emit(Oadd, Kl, r, mem, getcon(off, fn));
148 off += KWIDE(cls[n]) ? 8 : 4;
149 }
150}
151
152/* todo, may read out of bounds */
153static void
154ldregs(int reg[], int cls[], int n, Ref mem, Fn *fn)
155{
156 int i;
157 uint64_t off;
158 Ref r;
159
160 off = 0;
161 for (i=0; i<n; i++) {
162 r = newtmp("abi", Kl, fn);
163 emit(Oload, cls[i], TMP(reg[i]), r, R);
164 emit(Oadd, Kl, r, mem, getcon(off, fn));
165 off += KWIDE(cls[i]) ? 8 : 4;
166 }
167}
168
169static void
170selret(Blk *b, Fn *fn)
171{
172 int j, k, cty;
173 Ref r;
174 Class cr;
175
176 j = b->jmp.type;
177
178 if (!isret(j) || j == Jret0)
179 return;
180
181 r = b->jmp.arg;
182 b->jmp.type = Jret0;
183
184 if (j == Jretc) {
185 typclass(&cr, &typ[fn->retty], gpreg, fpreg);
186 if (cr.class & Cptr) {
187 assert(rtype(fn->retr) == RTmp);
188 blit0(fn->retr, r, cr.t->size, fn);
189 cty = 0;
190 } else {
191 ldregs(cr.reg, cr.cls, cr.nreg, r, fn);
192 cty = (cr.nfp << 2) | cr.ngp;
193 }
194 } else {
195 k = j - Jretw;
196 if (KBASE(k) == 0) {
197 emit(Ocopy, k, TMP(R0), r, R);
198 cty = 1;
199 } else {
200 emit(Ocopy, k, TMP(V0), r, R);
201 cty = 1 << 2;
202 }
203 }
204
205 b->jmp.arg = CALL(cty);
206}
207
208static int
209argsclass(Ins *i0, Ins *i1, Class *carg)
210{
211 int envc, ngp, nfp, *gp, *fp;
212 Class *c;
213 Ins *i;
214
215 envc = 0;
216 gp = gpreg;
217 fp = fpreg;
218 ngp = 8;
219 nfp = 8;
220 for (i=i0, c=carg; i<i1; i++, c++)
221 switch (i->op) {
222 case Opar:
223 case Oarg:
224 *c->cls = i->cls;
225 c->size = 8;
226 if (KBASE(i->cls) == 0 && ngp > 0) {
227 ngp--;
228 *c->reg = *gp++;
229 break;
230 }
231 if (KBASE(i->cls) == 1 && nfp > 0) {
232 nfp--;
233 *c->reg = *fp++;
234 break;
235 }
236 c->class |= Cstk;
237 break;
238 case Oparc:
239 case Oargc:
240 typclass(c, &typ[i->arg[0].val], gp, fp);
241 if (c->ngp <= ngp) {
242 if (c->nfp <= nfp) {
243 ngp -= c->ngp;
244 nfp -= c->nfp;
245 gp += c->ngp;
246 fp += c->nfp;
247 break;
248 } else
249 nfp = 0;
250 } else
251 ngp = 0;
252 c->class |= Cstk;
253 break;
254 case Opare:
255 case Oarge:
256 *c->reg = R9;
257 *c->cls = Kl;
258 envc = 1;
259 break;
260 case Oargv:
261 break;
262 default:
263 die("unreachable");
264 }
265
266 return envc << 14 | (gp-gpreg) << 5 | (fp-fpreg) << 9;
267}
268
269bits
270arm64_retregs(Ref r, int p[2])
271{
272 bits b;
273 int ngp, nfp;
274
275 assert(rtype(r) == RCall);
276 ngp = r.val & 3;
277 nfp = (r.val >> 2) & 7;
278 if (p) {
279 p[0] = ngp;
280 p[1] = nfp;
281 }
282 b = 0;
283 while (ngp--)
284 b |= BIT(R0+ngp);
285 while (nfp--)
286 b |= BIT(V0+nfp);
287 return b;
288}
289
290bits
291arm64_argregs(Ref r, int p[2])
292{
293 bits b;
294 int ngp, nfp, x8, x9;
295
296 assert(rtype(r) == RCall);
297 ngp = (r.val >> 5) & 15;
298 nfp = (r.val >> 9) & 15;
299 x8 = (r.val >> 13) & 1;
300 x9 = (r.val >> 14) & 1;
301 if (p) {
302 p[0] = ngp + x8 + x9;
303 p[1] = nfp;
304 }
305 b = 0;
306 while (ngp--)
307 b |= BIT(R0+ngp);
308 while (nfp--)
309 b |= BIT(V0+nfp);
310 return b | ((bits)x8 << R8) | ((bits)x9 << R9);
311}
312
313static void
314stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
315{
316 Insl *il;
317 int al;
318 uint64_t sz;
319
320 il = alloc(sizeof *il);
321 al = c->t->align - 2; /* NAlign == 3 */
322 if (al < 0)
323 al = 0;
324 sz = c->class & Cptr ? c->t->size : c->size;
325 il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}};
326 il->link = *ilp;
327 *ilp = il;
328}
329
330static void
331selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp)
332{
333 Ins *i;
334 Class *ca, *c, cr;
335 int cty;
336 uint n;
337 uint64_t stk, off;
338 Ref r, rstk, tmp[4];
339
340 ca = alloc((i1-i0) * sizeof ca[0]);
341 cty = argsclass(i0, i1, ca);
342
343 stk = 0;
344 for (i=i0, c=ca; i<i1; i++, c++) {
345 if (c->class & Cptr) {
346 i->arg[0] = newtmp("abi", Kl, fn);
347 stkblob(i->arg[0], c, fn, ilp);
348 i->op = Oarg;
349 }
350 if (c->class & Cstk)
351 stk += c->size;
352 }
353 stk += stk & 15;
354 rstk = getcon(stk, fn);
355 if (stk)
356 emit(Oadd, Kl, TMP(SP), TMP(SP), rstk);
357
358 if (!req(i1->arg[1], R)) {
359 typclass(&cr, &typ[i1->arg[1].val], gpreg, fpreg);
360 stkblob(i1->to, &cr, fn, ilp);
361 cty |= (cr.nfp << 2) | cr.ngp;
362 if (cr.class & Cptr) {
363 /* spill & rega expect calls to be
364 * followed by copies from regs,
365 * so we emit a dummy
366 */
367 cty |= 1 << 13 | 1;
368 emit(Ocopy, Kw, R, TMP(R0), R);
369 } else {
370 sttmps(tmp, cr.cls, cr.nreg, i1->to, fn);
371 for (n=0; n<cr.nreg; n++) {
372 r = TMP(cr.reg[n]);
373 emit(Ocopy, cr.cls[n], tmp[n], r, R);
374 }
375 }
376 } else {
377 if (KBASE(i1->cls) == 0) {
378 emit(Ocopy, i1->cls, i1->to, TMP(R0), R);
379 cty |= 1;
380 } else {
381 emit(Ocopy, i1->cls, i1->to, TMP(V0), R);
382 cty |= 1 << 2;
383 }
384 }
385
386 emit(Ocall, 0, R, i1->arg[0], CALL(cty));
387
388 if (cty & (1 << 13))
389 /* struct return argument */
390 emit(Ocopy, Kl, TMP(R8), i1->to, R);
391
392 for (i=i0, c=ca; i<i1; i++, c++) {
393 if ((c->class & Cstk) != 0)
394 continue;
395 if (i->op == Oarg || i->op == Oarge)
396 emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R);
397 if (i->op == Oargc)
398 ldregs(c->reg, c->cls, c->nreg, i->arg[1], fn);
399 }
400
401 /* populate the stack */
402 off = 0;
403 for (i=i0, c=ca; i<i1; i++, c++) {
404 if ((c->class & Cstk) == 0)
405 continue;
406 if (i->op == Oarg) {
407 r = newtmp("abi", Kl, fn);
408 emit(Ostorel, 0, R, i->arg[0], r);
409 emit(Oadd, Kl, r, TMP(SP), getcon(off, fn));
410 }
411 if (i->op == Oargc)
412 blit(TMP(SP), off, i->arg[1], 0, c->size, fn);
413 off += c->size;
414 }
415 if (stk)
416 emit(Osub, Kl, TMP(SP), TMP(SP), rstk);
417
418 for (i=i0, c=ca; i<i1; i++, c++)
419 if (c->class & Cptr)
420 blit0(i->arg[0], i->arg[1], c->t->size, fn);
421}
422
423static Params
424selpar(Fn *fn, Ins *i0, Ins *i1)
425{
426 Class *ca, *c, cr;
427 Insl *il;
428 Ins *i;
429 int n, s, cty;
430 Ref r, tmp[16], *t;
431
432 ca = alloc((i1-i0) * sizeof ca[0]);
433 curi = &insb[NIns];
434
435 cty = argsclass(i0, i1, ca);
436 fn->reg = arm64_argregs(CALL(cty), 0);
437
438 il = 0;
439 t = tmp;
440 for (i=i0, c=ca; i<i1; i++, c++) {
441 if (i->op != Oparc || (c->class & (Cptr|Cstk)))
442 continue;
443 sttmps(t, c->cls, c->nreg, i->to, fn);
444 stkblob(i->to, c, fn, &il);
445 t += c->nreg;
446 }
447 for (; il; il=il->link)
448 emiti(il->i);
449
450 if (fn->retty >= 0) {
451 typclass(&cr, &typ[fn->retty], gpreg, fpreg);
452 if (cr.class & Cptr) {
453 fn->retr = newtmp("abi", Kl, fn);
454 emit(Ocopy, Kl, fn->retr, TMP(R8), R);
455 fn->reg |= BIT(R8);
456 }
457 }
458
459 t = tmp;
460 s = 2;
461 for (i=i0, c=ca; i<i1; i++, c++)
462 if (i->op == Oparc && !(c->class & Cptr)) {
463 if (c->class & Cstk) {
464 fn->tmp[i->to.val].slot = -s;
465 s += c->size / 8;
466 } else
467 for (n=0; n<c->nreg; n++) {
468 r = TMP(c->reg[n]);
469 emit(Ocopy, c->cls[n], *t++, r, R);
470 }
471 } else if (c->class & Cstk) {
472 emit(Oload, *c->cls, i->to, SLOT(-s), R);
473 s++;
474 } else {
475 emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R);
476 }
477
478 return (Params){
479 .nstk = s - 2,
480 .ngp = (cty >> 5) & 15,
481 .nfp = (cty >> 9) & 15
482 };
483}
484
485static Blk *
486split(Fn *fn, Blk *b)
487{
488 Blk *bn;
489
490 ++fn->nblk;
491 bn = blknew();
492 bn->nins = &insb[NIns] - curi;
493 idup(&bn->ins, curi, bn->nins);
494 curi = &insb[NIns];
495 bn->visit = ++b->visit;
496 (void)!snprintf(bn->name, NString, "%s.%d", b->name, b->visit);
497 bn->loop = b->loop;
498 bn->link = b->link;
499 b->link = bn;
500 return bn;
501}
502
503static void
504chpred(Blk *b, Blk *bp, Blk *bp1)
505{
506 Phi *p;
507 uint a;
508
509 for (p=b->phi; p; p=p->link) {
510 for (a=0; p->blk[a]!=bp; a++)
511 assert(a+1<p->narg);
512 p->blk[a] = bp1;
513 }
514}
515
516static void
517selvaarg(Fn *fn, Blk *b, Ins *i)
518{
519 Ref loc, lreg, lstk, nr, r0, r1, c8, c16, c24, c28, ap;
520 Blk *b0, *bstk, *breg;
521 int isgp;
522
523 c8 = getcon(8, fn);
524 c16 = getcon(16, fn);
525 c24 = getcon(24, fn);
526 c28 = getcon(28, fn);
527 ap = i->arg[0];
528 isgp = KBASE(i->cls) == 0;
529
530 /* @b [...]
531 r0 =l add ap, (24 or 28)
532 nr =l loadsw r0
533 r1 =w csltw nr, 0
534 jnz r1, @breg, @bstk
535 @breg
536 r0 =l add ap, (8 or 16)
537 r1 =l loadl r0
538 lreg =l add r1, nr
539 r0 =w add nr, (8 or 16)
540 r1 =l add ap, (24 or 28)
541 storew r0, r1
542 @bstk
543 lstk =l loadl ap
544 r0 =l add lstk, 8
545 storel r0, ap
546 @b0
547 %loc =l phi @breg %lreg, @bstk %lstk
548 i->to =(i->cls) load %loc
549 */
550
551 loc = newtmp("abi", Kl, fn);
552 emit(Oload, i->cls, i->to, loc, R);
553 b0 = split(fn, b);
554 b0->jmp = b->jmp;
555 b0->s1 = b->s1;
556 b0->s2 = b->s2;
557 if (b->s1)
558 chpred(b->s1, b, b0);
559 if (b->s2 && b->s2 != b->s1)
560 chpred(b->s2, b, b0);
561
562 lreg = newtmp("abi", Kl, fn);
563 nr = newtmp("abi", Kl, fn);
564 r0 = newtmp("abi", Kw, fn);
565 r1 = newtmp("abi", Kl, fn);
566 emit(Ostorew, Kw, R, r0, r1);
567 emit(Oadd, Kl, r1, ap, isgp ? c24 : c28);
568 emit(Oadd, Kw, r0, nr, isgp ? c8 : c16);
569 r0 = newtmp("abi", Kl, fn);
570 r1 = newtmp("abi", Kl, fn);
571 emit(Oadd, Kl, lreg, r1, nr);
572 emit(Oload, Kl, r1, r0, R);
573 emit(Oadd, Kl, r0, ap, isgp ? c8 : c16);
574 breg = split(fn, b);
575 breg->jmp.type = Jjmp;
576 breg->s1 = b0;
577
578 lstk = newtmp("abi", Kl, fn);
579 r0 = newtmp("abi", Kl, fn);
580 emit(Ostorel, Kw, R, r0, ap);
581 emit(Oadd, Kl, r0, lstk, c8);
582 emit(Oload, Kl, lstk, ap, R);
583 bstk = split(fn, b);
584 bstk->jmp.type = Jjmp;
585 bstk->s1 = b0;
586
587 b0->phi = alloc(sizeof *b0->phi);
588 *b0->phi = (Phi){
589 .cls = Kl, .to = loc,
590 .narg = 2,
591 .blk = vnew(2, sizeof b0->phi->blk[0], Pfn),
592 .arg = vnew(2, sizeof b0->phi->arg[0], Pfn),
593 };
594 b0->phi->blk[0] = bstk;
595 b0->phi->blk[1] = breg;
596 b0->phi->arg[0] = lstk;
597 b0->phi->arg[1] = lreg;
598 r0 = newtmp("abi", Kl, fn);
599 r1 = newtmp("abi", Kw, fn);
600 b->jmp.type = Jjnz;
601 b->jmp.arg = r1;
602 b->s1 = breg;
603 b->s2 = bstk;
604 emit(Ocmpw+Cislt, Kw, r1, nr, CON_Z);
605 emit(Oloadsw, Kl, nr, r0, R);
606 emit(Oadd, Kl, r0, ap, isgp ? c24 : c28);
607}
608
609static void
610selvastart(Fn *fn, Params p, Ref ap)
611{
612 Ref r0, r1, rsave;
613
614 rsave = newtmp("abi", Kl, fn);
615
616 r0 = newtmp("abi", Kl, fn);
617 emit(Ostorel, Kw, R, r0, ap);
618 emit(Oadd, Kl, r0, rsave, getcon(p.nstk*8 + 192, fn));
619
620 r0 = newtmp("abi", Kl, fn);
621 r1 = newtmp("abi", Kl, fn);
622 emit(Ostorel, Kw, R, r1, r0);
623 emit(Oadd, Kl, r1, rsave, getcon(64, fn));
624 emit(Oadd, Kl, r0, ap, getcon(8, fn));
625
626 r0 = newtmp("abi", Kl, fn);
627 r1 = newtmp("abi", Kl, fn);
628 emit(Ostorel, Kw, R, r1, r0);
629 emit(Oadd, Kl, r1, rsave, getcon(192, fn));
630 emit(Oaddr, Kl, rsave, SLOT(-1), R);
631 emit(Oadd, Kl, r0, ap, getcon(16, fn));
632
633 r0 = newtmp("abi", Kl, fn);
634 emit(Ostorew, Kw, R, getcon((p.ngp-8)*8, fn), r0);
635 emit(Oadd, Kl, r0, ap, getcon(24, fn));
636
637 r0 = newtmp("abi", Kl, fn);
638 emit(Ostorew, Kw, R, getcon((p.nfp-8)*16, fn), r0);
639 emit(Oadd, Kl, r0, ap, getcon(28, fn));
640}
641
642void
643arm64_abi(Fn *fn)
644{
645 Blk *b;
646 Ins *i, *i0, *ip;
647 Insl *il;
648 int n;
649 Params p;
650
651 for (b=fn->start; b; b=b->link)
652 b->visit = 0;
653
654 /* lower parameters */
655 for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
656 if (!ispar(i->op))
657 break;
658 p = selpar(fn, b->ins, i);
659 n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
660 i0 = alloc(n * sizeof(Ins));
661 ip = icpy(ip = i0, curi, &insb[NIns] - curi);
662 ip = icpy(ip, i, &b->ins[b->nins] - i);
663 b->nins = n;
664 b->ins = i0;
665
666 /* lower calls, returns, and vararg instructions */
667 il = 0;
668 b = fn->start;
669 do {
670 if (!(b = b->link))
671 b = fn->start; /* do it last */
672 if (b->visit)
673 continue;
674 curi = &insb[NIns];
675 selret(b, fn);
676 for (i=&b->ins[b->nins]; i!=b->ins;)
677 switch ((--i)->op) {
678 default:
679 emiti(*i);
680 break;
681 case Ocall:
682 for (i0=i; i0>b->ins; i0--)
683 if (!isarg((i0-1)->op))
684 break;
685 selcall(fn, i0, i, &il);
686 i = i0;
687 break;
688 case Ovastart:
689 selvastart(fn, p, i->arg[0]);
690 break;
691 case Ovaarg:
692 selvaarg(fn, b, i);
693 break;
694 case Oarg:
695 case Oargc:
696 die("unreachable");
697 }
698 if (b == fn->start)
699 for (; il; il=il->link)
700 emiti(il->i);
701 b->nins = &insb[NIns] - curi;
702 idup(&b->ins, curi, b->nins);
703 } while (b != fn->start);
704
705 if (debug['A']) {
706 fprintf(stderr, "\n> After ABI lowering:\n");
707 printfn(fn, stderr);
708 }
709}
710