1#include "all.h"
2
3typedef struct AClass AClass;
4typedef struct RAlloc RAlloc;
5
6struct AClass {
7 Typ *type;
8 int inmem;
9 int align;
10 uint size;
11 int cls[2];
12 Ref ref[2];
13};
14
15struct RAlloc {
16 Ins i;
17 RAlloc *link;
18};
19
20static void
21classify(AClass *a, Typ *t, uint s)
22{
23 Field *f;
24 int *cls;
25 uint n, s1;
26
27 for (n=0, s1=s; n<t->nunion; n++, s=s1)
28 for (f=t->fields[n]; f->type!=FEnd; f++) {
29 assert(s <= 16);
30 cls = &a->cls[s/8];
31 switch (f->type) {
32 case FEnd:
33 die("unreachable");
34 case FPad:
35 /* don't change anything */
36 s += f->len;
37 break;
38 case Fs:
39 case Fd:
40 if (*cls == Kx)
41 *cls = Kd;
42 s += f->len;
43 break;
44 case Fb:
45 case Fh:
46 case Fw:
47 case Fl:
48 *cls = Kl;
49 s += f->len;
50 break;
51 case FTyp:
52 classify(a, &typ[f->len], s);
53 s += typ[f->len].size;
54 break;
55 }
56 }
57}
58
59static void
60typclass(AClass *a, Typ *t)
61{
62 uint sz, al;
63
64 sz = t->size;
65 al = 1u << t->align;
66
67 /* the ABI requires sizes to be rounded
68 * up to the nearest multiple of 8, moreover
69 * it makes it easy load and store structures
70 * in registers
71 */
72 if (al < 8)
73 al = 8;
74 sz = (sz + al-1) & -al;
75
76 a->type = t;
77 a->size = sz;
78 a->align = t->align;
79
80 if (t->isdark || sz > 16 || sz == 0) {
81 /* large or unaligned structures are
82 * required to be passed in memory
83 */
84 a->inmem = 1;
85 return;
86 }
87
88 a->cls[0] = Kx;
89 a->cls[1] = Kx;
90 a->inmem = 0;
91 classify(a, t, 0);
92}
93
94static int
95retr(Ref reg[2], AClass *aret)
96{
97 static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
98 int n, k, ca, nr[2];
99
100 nr[0] = nr[1] = 0;
101 ca = 0;
102 for (n=0; (uint)n*8<aret->size; n++) {
103 k = KBASE(aret->cls[n]);
104 reg[n] = TMP(retreg[k][nr[k]++]);
105 ca += 1 << (2 * k);
106 }
107 return ca;
108}
109
110static void
111selret(Blk *b, Fn *fn)
112{
113 int j, k, ca;
114 Ref r, r0, reg[2];
115 AClass aret;
116
117 j = b->jmp.type;
118
119 if (!isret(j) || j == Jret0)
120 return;
121
122 r0 = b->jmp.arg;
123 b->jmp.type = Jret0;
124
125 if (j == Jretc) {
126 typclass(&aret, &typ[fn->retty]);
127 if (aret.inmem) {
128 assert(rtype(fn->retr) == RTmp);
129 emit(Ocopy, Kl, TMP(RAX), fn->retr, R);
130 blit0(fn->retr, r0, aret.type->size, fn);
131 ca = 1;
132 } else {
133 ca = retr(reg, &aret);
134 if (aret.size > 8) {
135 r = newtmp("abi", Kl, fn);
136 emit(Oload, Kl, reg[1], r, R);
137 emit(Oadd, Kl, r, r0, getcon(8, fn));
138 }
139 emit(Oload, Kl, reg[0], r0, R);
140 }
141 } else {
142 k = j - Jretw;
143 if (KBASE(k) == 0) {
144 emit(Ocopy, k, TMP(RAX), r0, R);
145 ca = 1;
146 } else {
147 emit(Ocopy, k, TMP(XMM0), r0, R);
148 ca = 1 << 2;
149 }
150 }
151
152 b->jmp.arg = CALL(ca);
153}
154
155static int
156argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
157{
158 int varc, envc, nint, ni, nsse, ns, n, *pn;
159 AClass *a;
160 Ins *i;
161
162 if (aret && aret->inmem)
163 nint = 5; /* hidden argument */
164 else
165 nint = 6;
166 nsse = 8;
167 varc = 0;
168 envc = 0;
169 for (i=i0, a=ac; i<i1; i++, a++)
170 switch (i->op - op + Oarg) {
171 case Oarg:
172 if (KBASE(i->cls) == 0)
173 pn = &nint;
174 else
175 pn = &nsse;
176 if (*pn > 0) {
177 --*pn;
178 a->inmem = 0;
179 } else
180 a->inmem = 2;
181 a->align = 3;
182 a->size = 8;
183 a->cls[0] = i->cls;
184 break;
185 case Oargc:
186 n = i->arg[0].val;
187 typclass(a, &typ[n]);
188 if (a->inmem)
189 continue;
190 ni = ns = 0;
191 for (n=0; (uint)n*8<a->size; n++)
192 if (KBASE(a->cls[n]) == 0)
193 ni++;
194 else
195 ns++;
196 if (nint >= ni && nsse >= ns) {
197 nint -= ni;
198 nsse -= ns;
199 } else
200 a->inmem = 1;
201 break;
202 case Oarge:
203 envc = 1;
204 if (op == Opar)
205 *env = i->to;
206 else
207 *env = i->arg[0];
208 break;
209 case Oargv:
210 varc = 1;
211 break;
212 default:
213 die("unreachable");
214 }
215
216 if (varc && envc)
217 err("sysv abi does not support variadic env calls");
218
219 return ((varc|envc) << 12) | ((6-nint) << 4) | ((8-nsse) << 8);
220}
221
222int amd64_sysv_rsave[] = {
223 RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
224 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
225 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
226};
227int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1};
228
229MAKESURE(sysv_arrays_ok,
230 sizeof amd64_sysv_rsave == (NGPS+NFPS+1) * sizeof(int) &&
231 sizeof amd64_sysv_rclob == (NCLR+1) * sizeof(int)
232);
233
234/* layout of call's second argument (RCall)
235 *
236 * 29 12 8 4 3 0
237 * |0...00|x|xxxx|xxxx|xx|xx| range
238 * | | | | ` gp regs returned (0..2)
239 * | | | ` sse regs returned (0..2)
240 * | | ` gp regs passed (0..6)
241 * | ` sse regs passed (0..8)
242 * ` 1 if rax is used to pass data (0..1)
243 */
244
245bits
246amd64_sysv_retregs(Ref r, int p[2])
247{
248 bits b;
249 int ni, nf;
250
251 assert(rtype(r) == RCall);
252 b = 0;
253 ni = r.val & 3;
254 nf = (r.val >> 2) & 3;
255 if (ni >= 1)
256 b |= BIT(RAX);
257 if (ni >= 2)
258 b |= BIT(RDX);
259 if (nf >= 1)
260 b |= BIT(XMM0);
261 if (nf >= 2)
262 b |= BIT(XMM1);
263 if (p) {
264 p[0] = ni;
265 p[1] = nf;
266 }
267 return b;
268}
269
270bits
271amd64_sysv_argregs(Ref r, int p[2])
272{
273 bits b;
274 int j, ni, nf, ra;
275
276 assert(rtype(r) == RCall);
277 b = 0;
278 ni = (r.val >> 4) & 15;
279 nf = (r.val >> 8) & 15;
280 ra = (r.val >> 12) & 1;
281 for (j=0; j<ni; j++)
282 b |= BIT(amd64_sysv_rsave[j]);
283 for (j=0; j<nf; j++)
284 b |= BIT(XMM0+j);
285 if (p) {
286 p[0] = ni + ra;
287 p[1] = nf;
288 }
289 return b | (ra ? BIT(RAX) : 0);
290}
291
292static Ref
293rarg(int ty, int *ni, int *ns)
294{
295 if (KBASE(ty) == 0)
296 return TMP(amd64_sysv_rsave[(*ni)++]);
297 else
298 return TMP(XMM0 + (*ns)++);
299}
300
301static void
302selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
303{
304 Ins *i;
305 AClass *ac, *a, aret;
306 int ca, ni, ns, al;
307 uint stk, off;
308 Ref r, r1, r2, reg[2], env;
309 RAlloc *ra;
310
311 env = R;
312 ac = alloc((i1-i0) * sizeof ac[0]);
313
314 if (!req(i1->arg[1], R)) {
315 assert(rtype(i1->arg[1]) == RType);
316 typclass(&aret, &typ[i1->arg[1].val]);
317 ca = argsclass(i0, i1, ac, Oarg, &aret, &env);
318 } else
319 ca = argsclass(i0, i1, ac, Oarg, 0, &env);
320
321 for (stk=0, a=&ac[i1-i0]; a>ac;)
322 if ((--a)->inmem) {
323 if (a->align > 4)
324 err("sysv abi requires alignments of 16 or less");
325 stk += a->size;
326 if (a->align == 4)
327 stk += stk & 15;
328 }
329 stk += stk & 15;
330 if (stk) {
331 r = getcon(-(int64_t)stk, fn);
332 emit(Osalloc, Kl, R, r, R);
333 }
334
335 if (!req(i1->arg[1], R)) {
336 if (aret.inmem) {
337 /* get the return location from eax
338 * it saves one callee-save reg */
339 r1 = newtmp("abi", Kl, fn);
340 emit(Ocopy, Kl, i1->to, TMP(RAX), R);
341 ca += 1;
342 } else {
343 /* todo, may read out of bounds.
344 * gcc did this up until 5.2, but
345 * this should still be fixed.
346 */
347 if (aret.size > 8) {
348 r = newtmp("abi", Kl, fn);
349 aret.ref[1] = newtmp("abi", aret.cls[1], fn);
350 emit(Ostorel, 0, R, aret.ref[1], r);
351 emit(Oadd, Kl, r, i1->to, getcon(8, fn));
352 }
353 aret.ref[0] = newtmp("abi", aret.cls[0], fn);
354 emit(Ostorel, 0, R, aret.ref[0], i1->to);
355 ca += retr(reg, &aret);
356 if (aret.size > 8)
357 emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R);
358 emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R);
359 r1 = i1->to;
360 }
361 /* allocate return pad */
362 ra = alloc(sizeof *ra);
363 /* specific to NAlign == 3 */
364 al = aret.align >= 2 ? aret.align - 2 : 0;
365 ra->i = (Ins){Oalloc+al, Kl, r1, {getcon(aret.size, fn)}};
366 ra->link = (*rap);
367 *rap = ra;
368 } else {
369 ra = 0;
370 if (KBASE(i1->cls) == 0) {
371 emit(Ocopy, i1->cls, i1->to, TMP(RAX), R);
372 ca += 1;
373 } else {
374 emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R);
375 ca += 1 << 2;
376 }
377 }
378
379 emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca));
380
381 if (!req(R, env))
382 emit(Ocopy, Kl, TMP(RAX), env, R);
383 else if ((ca >> 12) & 1) /* vararg call */
384 emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
385
386 ni = ns = 0;
387 if (ra && aret.inmem)
388 emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
389
390 for (i=i0, a=ac; i<i1; i++, a++) {
391 if (i->op >= Oarge || a->inmem)
392 continue;
393 r1 = rarg(a->cls[0], &ni, &ns);
394 if (i->op == Oargc) {
395 if (a->size > 8) {
396 r2 = rarg(a->cls[1], &ni, &ns);
397 r = newtmp("abi", Kl, fn);
398 emit(Oload, a->cls[1], r2, r, R);
399 emit(Oadd, Kl, r, i->arg[1], getcon(8, fn));
400 }
401 emit(Oload, a->cls[0], r1, i->arg[1], R);
402 } else
403 emit(Ocopy, i->cls, r1, i->arg[0], R);
404 }
405
406 if (!stk)
407 return;
408
409 r = newtmp("abi", Kl, fn);
410 for (i=i0, a=ac, off=0; i<i1; i++, a++) {
411 if (i->op >= Oarge || !a->inmem)
412 continue;
413 if (i->op == Oargc) {
414 if (a->align == 4)
415 off += off & 15;
416 blit(r, off, i->arg[1], 0, a->type->size, fn);
417 } else {
418 r1 = newtmp("abi", Kl, fn);
419 emit(Ostorel, 0, R, i->arg[0], r1);
420 emit(Oadd, Kl, r1, r, getcon(off, fn));
421 }
422 off += a->size;
423 }
424 emit(Osalloc, Kl, r, getcon(stk, fn), R);
425}
426
427static int
428selpar(Fn *fn, Ins *i0, Ins *i1)
429{
430 AClass *ac, *a, aret;
431 Ins *i;
432 int ni, ns, s, al, fa;
433 Ref r, env;
434
435 env = R;
436 ac = alloc((i1-i0) * sizeof ac[0]);
437 curi = &insb[NIns];
438 ni = ns = 0;
439
440 if (fn->retty >= 0) {
441 typclass(&aret, &typ[fn->retty]);
442 fa = argsclass(i0, i1, ac, Opar, &aret, &env);
443 } else
444 fa = argsclass(i0, i1, ac, Opar, 0, &env);
445 fn->reg = amd64_sysv_argregs(CALL(fa), 0);
446
447 for (i=i0, a=ac; i<i1; i++, a++) {
448 if (i->op != Oparc || a->inmem)
449 continue;
450 if (a->size > 8) {
451 r = newtmp("abi", Kl, fn);
452 a->ref[1] = newtmp("abi", Kl, fn);
453 emit(Ostorel, 0, R, a->ref[1], r);
454 emit(Oadd, Kl, r, i->to, getcon(8, fn));
455 }
456 a->ref[0] = newtmp("abi", Kl, fn);
457 emit(Ostorel, 0, R, a->ref[0], i->to);
458 /* specific to NAlign == 3 */
459 al = a->align >= 2 ? a->align - 2 : 0;
460 emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R);
461 }
462
463 if (fn->retty >= 0 && aret.inmem) {
464 r = newtmp("abi", Kl, fn);
465 emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R);
466 fn->retr = r;
467 }
468
469 for (i=i0, a=ac, s=4; i<i1; i++, a++) {
470 switch (a->inmem) {
471 case 1:
472 if (a->align > 4)
473 err("sysv abi requires alignments of 16 or less");
474 if (a->align == 4)
475 s = (s+3) & -4;
476 fn->tmp[i->to.val].slot = -s;
477 s += a->size / 4;
478 continue;
479 case 2:
480 emit(Oload, i->cls, i->to, SLOT(-s), R);
481 s += 2;
482 continue;
483 }
484 if (i->op == Opare)
485 continue;
486 r = rarg(a->cls[0], &ni, &ns);
487 if (i->op == Oparc) {
488 emit(Ocopy, a->cls[0], a->ref[0], r, R);
489 if (a->size > 8) {
490 r = rarg(a->cls[1], &ni, &ns);
491 emit(Ocopy, a->cls[1], a->ref[1], r, R);
492 }
493 } else
494 emit(Ocopy, i->cls, i->to, r, R);
495 }
496
497 if (!req(R, env))
498 emit(Ocopy, Kl, env, TMP(RAX), R);
499
500 return fa | (s*4)<<12;
501}
502
503static Blk *
504split(Fn *fn, Blk *b)
505{
506 Blk *bn;
507
508 ++fn->nblk;
509 bn = blknew();
510 bn->nins = &insb[NIns] - curi;
511 idup(&bn->ins, curi, bn->nins);
512 curi = &insb[NIns];
513 bn->visit = ++b->visit;
514 (void)!snprintf(bn->name, NString, "%s.%d", b->name, b->visit);
515 bn->loop = b->loop;
516 bn->link = b->link;
517 b->link = bn;
518 return bn;
519}
520
521static void
522chpred(Blk *b, Blk *bp, Blk *bp1)
523{
524 Phi *p;
525 uint a;
526
527 for (p=b->phi; p; p=p->link) {
528 for (a=0; p->blk[a]!=bp; a++)
529 assert(a+1<p->narg);
530 p->blk[a] = bp1;
531 }
532}
533
534static void
535selvaarg(Fn *fn, Blk *b, Ins *i)
536{
537 Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
538 Blk *b0, *bstk, *breg;
539 int isint;
540
541 c4 = getcon(4, fn);
542 c8 = getcon(8, fn);
543 c16 = getcon(16, fn);
544 ap = i->arg[0];
545 isint = KBASE(i->cls) == 0;
546
547 /* @b [...]
548 r0 =l add ap, (0 or 4)
549 nr =l loadsw r0
550 r1 =w cultw nr, (48 or 176)
551 jnz r1, @breg, @bstk
552 @breg
553 r0 =l add ap, 16
554 r1 =l loadl r0
555 lreg =l add r1, nr
556 r0 =w add nr, (8 or 16)
557 r1 =l add ap, (0 or 4)
558 storew r0, r1
559 @bstk
560 r0 =l add ap, 8
561 lstk =l loadl r0
562 r1 =l add lstk, 8
563 storel r1, r0
564 @b0
565 %loc =l phi @breg %lreg, @bstk %lstk
566 i->to =(i->cls) load %loc
567 */
568
569 loc = newtmp("abi", Kl, fn);
570 emit(Oload, i->cls, i->to, loc, R);
571 b0 = split(fn, b);
572 b0->jmp = b->jmp;
573 b0->s1 = b->s1;
574 b0->s2 = b->s2;
575 if (b->s1)
576 chpred(b->s1, b, b0);
577 if (b->s2 && b->s2 != b->s1)
578 chpred(b->s2, b, b0);
579
580 lreg = newtmp("abi", Kl, fn);
581 nr = newtmp("abi", Kl, fn);
582 r0 = newtmp("abi", Kw, fn);
583 r1 = newtmp("abi", Kl, fn);
584 emit(Ostorew, Kw, R, r0, r1);
585 emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4);
586 emit(Oadd, Kw, r0, nr, isint ? c8 : c16);
587 r0 = newtmp("abi", Kl, fn);
588 r1 = newtmp("abi", Kl, fn);
589 emit(Oadd, Kl, lreg, r1, nr);
590 emit(Oload, Kl, r1, r0, R);
591 emit(Oadd, Kl, r0, ap, c16);
592 breg = split(fn, b);
593 breg->jmp.type = Jjmp;
594 breg->s1 = b0;
595
596 lstk = newtmp("abi", Kl, fn);
597 r0 = newtmp("abi", Kl, fn);
598 r1 = newtmp("abi", Kl, fn);
599 emit(Ostorel, Kw, R, r1, r0);
600 emit(Oadd, Kl, r1, lstk, c8);
601 emit(Oload, Kl, lstk, r0, R);
602 emit(Oadd, Kl, r0, ap, c8);
603 bstk = split(fn, b);
604 bstk->jmp.type = Jjmp;
605 bstk->s1 = b0;
606
607 b0->phi = alloc(sizeof *b0->phi);
608 *b0->phi = (Phi){
609 .cls = Kl, .to = loc,
610 .narg = 2,
611 .blk = vnew(2, sizeof b0->phi->blk[0], Pfn),
612 .arg = vnew(2, sizeof b0->phi->arg[0], Pfn),
613 };
614 b0->phi->blk[0] = bstk;
615 b0->phi->blk[1] = breg;
616 b0->phi->arg[0] = lstk;
617 b0->phi->arg[1] = lreg;
618 r0 = newtmp("abi", Kl, fn);
619 r1 = newtmp("abi", Kw, fn);
620 b->jmp.type = Jjnz;
621 b->jmp.arg = r1;
622 b->s1 = breg;
623 b->s2 = bstk;
624 c = getcon(isint ? 48 : 176, fn);
625 emit(Ocmpw+Ciult, Kw, r1, nr, c);
626 emit(Oloadsw, Kl, nr, r0, R);
627 emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
628}
629
630static void
631selvastart(Fn *fn, int fa, Ref ap)
632{
633 Ref r0, r1;
634 int gp, fp, sp;
635
636 gp = ((fa >> 4) & 15) * 8;
637 fp = 48 + ((fa >> 8) & 15) * 16;
638 sp = fa >> 12;
639 r0 = newtmp("abi", Kl, fn);
640 r1 = newtmp("abi", Kl, fn);
641 emit(Ostorel, Kw, R, r1, r0);
642 emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn));
643 emit(Oadd, Kl, r0, ap, getcon(16, fn));
644 r0 = newtmp("abi", Kl, fn);
645 r1 = newtmp("abi", Kl, fn);
646 emit(Ostorel, Kw, R, r1, r0);
647 emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn));
648 emit(Oadd, Kl, r0, ap, getcon(8, fn));
649 r0 = newtmp("abi", Kl, fn);
650 emit(Ostorew, Kw, R, getcon(fp, fn), r0);
651 emit(Oadd, Kl, r0, ap, getcon(4, fn));
652 emit(Ostorew, Kw, R, getcon(gp, fn), ap);
653}
654
655void
656amd64_sysv_abi(Fn *fn)
657{
658 Blk *b;
659 Ins *i, *i0, *ip;
660 RAlloc *ral;
661 int n, fa;
662
663 for (b=fn->start; b; b=b->link)
664 b->visit = 0;
665
666 /* lower parameters */
667 for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++)
668 if (!ispar(i->op))
669 break;
670 fa = selpar(fn, b->ins, i);
671 n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
672 i0 = alloc(n * sizeof(Ins));
673 ip = icpy(ip = i0, curi, &insb[NIns] - curi);
674 ip = icpy(ip, i, &b->ins[b->nins] - i);
675 b->nins = n;
676 b->ins = i0;
677
678 /* lower calls, returns, and vararg instructions */
679 ral = 0;
680 b = fn->start;
681 do {
682 if (!(b = b->link))
683 b = fn->start; /* do it last */
684 if (b->visit)
685 continue;
686 curi = &insb[NIns];
687 selret(b, fn);
688 for (i=&b->ins[b->nins]; i!=b->ins;)
689 switch ((--i)->op) {
690 default:
691 emiti(*i);
692 break;
693 case Ocall:
694 for (i0=i; i0>b->ins; i0--)
695 if (!isarg((i0-1)->op))
696 break;
697 selcall(fn, i0, i, &ral);
698 i = i0;
699 break;
700 case Ovastart:
701 selvastart(fn, fa, i->arg[0]);
702 break;
703 case Ovaarg:
704 selvaarg(fn, b, i);
705 break;
706 case Oarg:
707 case Oargc:
708 die("unreachable");
709 }
710 if (b == fn->start)
711 for (; ral; ral=ral->link)
712 emiti(ral->i);
713 b->nins = &insb[NIns] - curi;
714 idup(&b->ins, curi, b->nins);
715 } while (b != fn->start);
716
717 if (debug['A']) {
718 fprintf(stderr, "\n> After ABI lowering:\n");
719 printfn(fn, stderr);
720 }
721}
722