1 | #include "all.h" |
2 | |
3 | /* the risc-v lp64d abi */ |
4 | |
5 | typedef struct Class Class; |
6 | typedef struct Insl Insl; |
7 | typedef struct Params Params; |
8 | |
9 | enum { |
10 | Cptr = 1, /* replaced by a pointer */ |
11 | Cstk1 = 2, /* pass first XLEN on the stack */ |
12 | Cstk2 = 4, /* pass second XLEN on the stack */ |
13 | Cstk = Cstk1 | Cstk2, |
14 | Cfpint = 8, /* float passed like integer */ |
15 | }; |
16 | |
17 | struct Class { |
18 | char class; |
19 | Typ *type; |
20 | int reg[2]; |
21 | int cls[2]; |
22 | int off[2]; |
23 | char ngp; /* only valid after typclass() */ |
24 | char nfp; /* ditto */ |
25 | char nreg; |
26 | }; |
27 | |
28 | struct Insl { |
29 | Ins i; |
30 | Insl *link; |
31 | }; |
32 | |
33 | struct Params { |
34 | int ngp; |
35 | int nfp; |
36 | int stk; /* stack offset for varargs */ |
37 | }; |
38 | |
39 | static int gpreg[10] = {A0, A1, A2, A3, A4, A5, A6, A7}; |
40 | static int fpreg[10] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7}; |
41 | |
42 | /* layout of call's second argument (RCall) |
43 | * |
44 | * 29 12 8 4 2 0 |
45 | * |0.00|x|xxxx|xxxx|xx|xx| range |
46 | * | | | | ` gp regs returned (0..2) |
47 | * | | | ` fp regs returned (0..2) |
48 | * | | ` gp regs passed (0..8) |
49 | * | ` fp regs passed (0..8) |
50 | * ` env pointer passed in t5 (0..1) |
51 | */ |
52 | |
53 | bits |
54 | rv64_retregs(Ref r, int p[2]) |
55 | { |
56 | bits b; |
57 | int ngp, nfp; |
58 | |
59 | assert(rtype(r) == RCall); |
60 | ngp = r.val & 3; |
61 | nfp = (r.val >> 2) & 3; |
62 | if (p) { |
63 | p[0] = ngp; |
64 | p[1] = nfp; |
65 | } |
66 | b = 0; |
67 | while (ngp--) |
68 | b |= BIT(A0+ngp); |
69 | while (nfp--) |
70 | b |= BIT(FA0+nfp); |
71 | return b; |
72 | } |
73 | |
74 | bits |
75 | rv64_argregs(Ref r, int p[2]) |
76 | { |
77 | bits b; |
78 | int ngp, nfp, t5; |
79 | |
80 | assert(rtype(r) == RCall); |
81 | ngp = (r.val >> 4) & 15; |
82 | nfp = (r.val >> 8) & 15; |
83 | t5 = (r.val >> 12) & 1; |
84 | if (p) { |
85 | p[0] = ngp + t5; |
86 | p[1] = nfp; |
87 | } |
88 | b = 0; |
89 | while (ngp--) |
90 | b |= BIT(A0+ngp); |
91 | while (nfp--) |
92 | b |= BIT(FA0+nfp); |
93 | return b | ((bits)t5 << T5); |
94 | } |
95 | |
96 | static int |
97 | fpstruct(Typ *t, int off, Class *c) |
98 | { |
99 | Field *f; |
100 | int n; |
101 | |
102 | if (t->isunion) |
103 | return -1; |
104 | |
105 | for (f=*t->fields; f->type != FEnd; f++) |
106 | if (f->type == FPad) |
107 | off += f->len; |
108 | else if (f->type == FTyp) { |
109 | if (fpstruct(&typ[f->len], off, c) == -1) |
110 | return -1; |
111 | } |
112 | else { |
113 | n = c->nfp + c->ngp; |
114 | if (n == 2) |
115 | return -1; |
116 | switch (f->type) { |
117 | default: die("unreachable" ); |
118 | case Fb: |
119 | case Fh: |
120 | case Fw: c->cls[n] = Kw; c->ngp++; break; |
121 | case Fl: c->cls[n] = Kl; c->ngp++; break; |
122 | case Fs: c->cls[n] = Ks; c->nfp++; break; |
123 | case Fd: c->cls[n] = Kd; c->nfp++; break; |
124 | } |
125 | c->off[n] = off; |
126 | off += f->len; |
127 | } |
128 | |
129 | return c->nfp; |
130 | } |
131 | |
132 | static void |
133 | typclass(Class *c, Typ *t, int fpabi, int *gp, int *fp) |
134 | { |
135 | uint n; |
136 | int i; |
137 | |
138 | c->type = t; |
139 | c->class = 0; |
140 | c->ngp = 0; |
141 | c->nfp = 0; |
142 | |
143 | if (t->align > 4) |
144 | err("alignments larger than 16 are not supported" ); |
145 | |
146 | if (t->isdark || t->size > 16 || t->size == 0) { |
147 | /* large structs are replaced by a |
148 | * pointer to some caller-allocated |
149 | * memory |
150 | */ |
151 | c->class |= Cptr; |
152 | *c->cls = Kl; |
153 | *c->off = 0; |
154 | c->ngp = 1; |
155 | } |
156 | else if (!fpabi || fpstruct(t, 0, c) <= 0) { |
157 | for (n=0; 8*n<t->size; n++) { |
158 | c->cls[n] = Kl; |
159 | c->off[n] = 8*n; |
160 | } |
161 | c->nfp = 0; |
162 | c->ngp = n; |
163 | } |
164 | |
165 | c->nreg = c->nfp + c->ngp; |
166 | for (i=0; i<c->nreg; i++) |
167 | if (KBASE(c->cls[i]) == 0) |
168 | c->reg[i] = *gp++; |
169 | else |
170 | c->reg[i] = *fp++; |
171 | } |
172 | |
173 | static void |
174 | sttmps(Ref tmp[], int ntmp, Class *c, Ref mem, Fn *fn) |
175 | { |
176 | static int st[] = { |
177 | [Kw] = Ostorew, [Kl] = Ostorel, |
178 | [Ks] = Ostores, [Kd] = Ostored |
179 | }; |
180 | int i; |
181 | Ref r; |
182 | |
183 | assert(ntmp > 0); |
184 | assert(ntmp <= 2); |
185 | for (i=0; i<ntmp; i++) { |
186 | tmp[i] = newtmp("abi" , c->cls[i], fn); |
187 | r = newtmp("abi" , Kl, fn); |
188 | emit(st[c->cls[i]], 0, R, tmp[i], r); |
189 | emit(Oadd, Kl, r, mem, getcon(c->off[i], fn)); |
190 | } |
191 | } |
192 | |
193 | static void |
194 | ldregs(Class *c, Ref mem, Fn *fn) |
195 | { |
196 | int i; |
197 | Ref r; |
198 | |
199 | for (i=0; i<c->nreg; i++) { |
200 | r = newtmp("abi" , Kl, fn); |
201 | emit(Oload, c->cls[i], TMP(c->reg[i]), r, R); |
202 | emit(Oadd, Kl, r, mem, getcon(c->off[i], fn)); |
203 | } |
204 | } |
205 | |
206 | static void |
207 | selret(Blk *b, Fn *fn) |
208 | { |
209 | int j, k, cty; |
210 | Ref r; |
211 | Class cr; |
212 | |
213 | j = b->jmp.type; |
214 | |
215 | if (!isret(j) || j == Jret0) |
216 | return; |
217 | |
218 | r = b->jmp.arg; |
219 | b->jmp.type = Jret0; |
220 | |
221 | if (j == Jretc) { |
222 | typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg); |
223 | if (cr.class & Cptr) { |
224 | assert(rtype(fn->retr) == RTmp); |
225 | blit0(fn->retr, r, cr.type->size, fn); |
226 | cty = 0; |
227 | } else { |
228 | ldregs(&cr, r, fn); |
229 | cty = (cr.nfp << 2) | cr.ngp; |
230 | } |
231 | } else { |
232 | k = j - Jretw; |
233 | if (KBASE(k) == 0) { |
234 | emit(Ocopy, k, TMP(A0), r, R); |
235 | cty = 1; |
236 | } else { |
237 | emit(Ocopy, k, TMP(FA0), r, R); |
238 | cty = 1 << 2; |
239 | } |
240 | } |
241 | |
242 | b->jmp.arg = CALL(cty); |
243 | } |
244 | |
245 | static int |
246 | argsclass(Ins *i0, Ins *i1, Class *carg, int retptr) |
247 | { |
248 | int ngp, nfp, *gp, *fp, vararg, envc; |
249 | Class *c; |
250 | Typ *t; |
251 | Ins *i; |
252 | |
253 | gp = gpreg; |
254 | fp = fpreg; |
255 | ngp = 8; |
256 | nfp = 8; |
257 | vararg = 0; |
258 | envc = 0; |
259 | if (retptr) { |
260 | gp++; |
261 | ngp--; |
262 | } |
263 | for (i=i0, c=carg; i<i1; i++, c++) { |
264 | switch (i->op) { |
265 | case Opar: |
266 | case Oarg: |
267 | *c->cls = i->cls; |
268 | if (!vararg && KBASE(i->cls) == 1 && nfp > 0) { |
269 | nfp--; |
270 | *c->reg = *fp++; |
271 | } else if (ngp > 0) { |
272 | if (KBASE(i->cls) == 1) |
273 | c->class |= Cfpint; |
274 | ngp--; |
275 | *c->reg = *gp++; |
276 | } else |
277 | c->class |= Cstk1; |
278 | break; |
279 | case Oargv: |
280 | vararg = 1; |
281 | break; |
282 | case Oparc: |
283 | case Oargc: |
284 | t = &typ[i->arg[0].val]; |
285 | typclass(c, t, 1, gp, fp); |
286 | if (c->nfp > 0) |
287 | if (c->nfp >= nfp || c->ngp >= ngp) |
288 | typclass(c, t, 0, gp, fp); |
289 | assert(c->nfp <= nfp); |
290 | if (c->ngp <= ngp) { |
291 | ngp -= c->ngp; |
292 | nfp -= c->nfp; |
293 | gp += c->ngp; |
294 | fp += c->nfp; |
295 | } else if (ngp > 0) { |
296 | assert(c->ngp == 2); |
297 | assert(c->class == 0); |
298 | c->class |= Cstk2; |
299 | c->nreg = 1; |
300 | ngp--; |
301 | gp++; |
302 | } else { |
303 | c->class |= Cstk1; |
304 | if (c->nreg > 1) |
305 | c->class |= Cstk2; |
306 | c->nreg = 0; |
307 | } |
308 | break; |
309 | case Opare: |
310 | case Oarge: |
311 | *c->reg = T5; |
312 | *c->cls = Kl; |
313 | envc = 1; |
314 | break; |
315 | } |
316 | } |
317 | return envc << 12 | (gp-gpreg) << 4 | (fp-fpreg) << 8; |
318 | } |
319 | |
320 | static void |
321 | stkblob(Ref r, Typ *t, Fn *fn, Insl **ilp) |
322 | { |
323 | Insl *il; |
324 | int al; |
325 | uint64_t sz; |
326 | |
327 | il = alloc(sizeof *il); |
328 | al = t->align - 2; /* specific to NAlign == 3 */ |
329 | if (al < 0) |
330 | al = 0; |
331 | sz = (t->size + 7) & ~7; |
332 | il->i = (Ins){Oalloc+al, Kl, r, {getcon(sz, fn)}}; |
333 | il->link = *ilp; |
334 | *ilp = il; |
335 | } |
336 | |
337 | static void |
338 | selcall(Fn *fn, Ins *i0, Ins *i1, Insl **ilp) |
339 | { |
340 | Ins *i; |
341 | Class *ca, *c, cr; |
342 | int j, k, cty; |
343 | uint64_t stk, off; |
344 | Ref r, r1, tmp[2]; |
345 | |
346 | ca = alloc((i1-i0) * sizeof ca[0]); |
347 | cr.class = 0; |
348 | |
349 | if (!req(i1->arg[1], R)) |
350 | typclass(&cr, &typ[i1->arg[1].val], 1, gpreg, fpreg); |
351 | |
352 | cty = argsclass(i0, i1, ca, cr.class & Cptr); |
353 | stk = 0; |
354 | for (i=i0, c=ca; i<i1; i++, c++) { |
355 | if (i->op == Oargv) |
356 | continue; |
357 | if (c->class & Cptr) { |
358 | i->arg[0] = newtmp("abi" , Kl, fn); |
359 | stkblob(i->arg[0], c->type, fn, ilp); |
360 | i->op = Oarg; |
361 | } |
362 | if (c->class & Cstk1) |
363 | stk += 8; |
364 | if (c->class & Cstk2) |
365 | stk += 8; |
366 | } |
367 | stk += stk & 15; |
368 | if (stk) |
369 | emit(Osalloc, Kl, R, getcon(-stk, fn), R); |
370 | |
371 | if (!req(i1->arg[1], R)) { |
372 | stkblob(i1->to, cr.type, fn, ilp); |
373 | cty |= (cr.nfp << 2) | cr.ngp; |
374 | if (cr.class & Cptr) |
375 | /* spill & rega expect calls to be |
376 | * followed by copies from regs, |
377 | * so we emit a dummy |
378 | */ |
379 | emit(Ocopy, Kw, R, TMP(A0), R); |
380 | else { |
381 | sttmps(tmp, cr.nreg, &cr, i1->to, fn); |
382 | for (j=0; j<cr.nreg; j++) { |
383 | r = TMP(cr.reg[j]); |
384 | emit(Ocopy, cr.cls[j], tmp[j], r, R); |
385 | } |
386 | } |
387 | } else if (KBASE(i1->cls) == 0) { |
388 | emit(Ocopy, i1->cls, i1->to, TMP(A0), R); |
389 | cty |= 1; |
390 | } else { |
391 | emit(Ocopy, i1->cls, i1->to, TMP(FA0), R); |
392 | cty |= 1 << 2; |
393 | } |
394 | |
395 | emit(Ocall, 0, R, i1->arg[0], CALL(cty)); |
396 | |
397 | if (cr.class & Cptr) |
398 | /* struct return argument */ |
399 | emit(Ocopy, Kl, TMP(A0), i1->to, R); |
400 | |
401 | /* move arguments into registers */ |
402 | for (i=i0, c=ca; i<i1; i++, c++) { |
403 | if (i->op == Oargv || c->class & Cstk1) |
404 | continue; |
405 | if (i->op == Oargc) { |
406 | ldregs(c, i->arg[1], fn); |
407 | } else if (c->class & Cfpint) { |
408 | k = KWIDE(*c->cls) ? Kl : Kw; |
409 | r = newtmp("abi" , k, fn); |
410 | emit(Ocopy, k, TMP(*c->reg), r, R); |
411 | *c->reg = r.val; |
412 | } else { |
413 | emit(Ocopy, *c->cls, TMP(*c->reg), i->arg[0], R); |
414 | } |
415 | } |
416 | |
417 | for (i=i0, c=ca; i<i1; i++, c++) { |
418 | if (c->class & Cfpint) { |
419 | k = KWIDE(*c->cls) ? Kl : Kw; |
420 | emit(Ocast, k, TMP(*c->reg), i->arg[0], R); |
421 | } |
422 | if (c->class & Cptr) |
423 | blit0(i->arg[0], i->arg[1], c->type->size, fn); |
424 | } |
425 | |
426 | if (!stk) |
427 | return; |
428 | |
429 | /* populate the stack */ |
430 | off = 0; |
431 | r = newtmp("abi" , Kl, fn); |
432 | for (i=i0, c=ca; i<i1; i++, c++) { |
433 | if (i->op == Oargv || !(c->class & Cstk)) |
434 | continue; |
435 | if (i->op == Oarg) { |
436 | r1 = newtmp("abi" , Kl, fn); |
437 | emit(Ostorew+i->cls, Kw, R, i->arg[0], r1); |
438 | if (i->cls == Kw) { |
439 | /* TODO: we only need this sign |
440 | * extension for l temps passed |
441 | * as w arguments |
442 | * (see rv64/isel.c:fixarg) |
443 | */ |
444 | curi->op = Ostorel; |
445 | curi->arg[0] = newtmp("abi" , Kl, fn); |
446 | emit(Oextsw, Kl, curi->arg[0], i->arg[0], R); |
447 | } |
448 | emit(Oadd, Kl, r1, r, getcon(off, fn)); |
449 | off += 8; |
450 | } |
451 | if (i->op == Oargc) { |
452 | if (c->class & Cstk1) { |
453 | blit(r, off, i->arg[1], 0, 8, fn); |
454 | off += 8; |
455 | } |
456 | if (c->class & Cstk2) { |
457 | blit(r, off, i->arg[1], 8, 8, fn); |
458 | off += 8; |
459 | } |
460 | } |
461 | } |
462 | emit(Osalloc, Kl, r, getcon(stk, fn), R); |
463 | } |
464 | |
465 | static Params |
466 | selpar(Fn *fn, Ins *i0, Ins *i1) |
467 | { |
468 | Class *ca, *c, cr; |
469 | Insl *il; |
470 | Ins *i; |
471 | int j, k, s, cty, nt; |
472 | Ref r, tmp[17], *t; |
473 | |
474 | ca = alloc((i1-i0) * sizeof ca[0]); |
475 | cr.class = 0; |
476 | curi = &insb[NIns]; |
477 | |
478 | if (fn->retty >= 0) { |
479 | typclass(&cr, &typ[fn->retty], 1, gpreg, fpreg); |
480 | if (cr.class & Cptr) { |
481 | fn->retr = newtmp("abi" , Kl, fn); |
482 | emit(Ocopy, Kl, fn->retr, TMP(A0), R); |
483 | } |
484 | } |
485 | |
486 | cty = argsclass(i0, i1, ca, cr.class & Cptr); |
487 | fn->reg = rv64_argregs(CALL(cty), 0); |
488 | |
489 | il = 0; |
490 | t = tmp; |
491 | for (i=i0, c=ca; i<i1; i++, c++) { |
492 | if (c->class & Cfpint) { |
493 | r = i->to; |
494 | k = *c->cls; |
495 | *c->cls = KWIDE(k) ? Kl : Kw; |
496 | i->to = newtmp("abi" , k, fn); |
497 | emit(Ocast, k, r, i->to, R); |
498 | } |
499 | if (i->op == Oparc) |
500 | if (!(c->class & Cptr)) |
501 | if (c->nreg != 0) { |
502 | nt = c->nreg; |
503 | if (c->class & Cstk2) { |
504 | c->cls[1] = Kl; |
505 | c->off[1] = 8; |
506 | assert(nt == 1); |
507 | nt = 2; |
508 | } |
509 | sttmps(t, nt, c, i->to, fn); |
510 | stkblob(i->to, c->type, fn, &il); |
511 | t += nt; |
512 | } |
513 | } |
514 | for (; il; il=il->link) |
515 | emiti(il->i); |
516 | |
517 | t = tmp; |
518 | s = 2 + 8*fn->vararg; |
519 | for (i=i0, c=ca; i<i1; i++, c++) |
520 | if (i->op == Oparc && !(c->class & Cptr)) { |
521 | if (c->nreg == 0) { |
522 | fn->tmp[i->to.val].slot = -s; |
523 | s += (c->class & Cstk2) ? 2 : 1; |
524 | continue; |
525 | } |
526 | for (j=0; j<c->nreg; j++) { |
527 | r = TMP(c->reg[j]); |
528 | emit(Ocopy, c->cls[j], *t++, r, R); |
529 | } |
530 | if (c->class & Cstk2) { |
531 | emit(Oload, Kl, *t, SLOT(-s), R); |
532 | t++, s++; |
533 | } |
534 | } else if (c->class & Cstk1) { |
535 | emit(Oload, *c->cls, i->to, SLOT(-s), R); |
536 | s++; |
537 | } else { |
538 | emit(Ocopy, *c->cls, i->to, TMP(*c->reg), R); |
539 | } |
540 | |
541 | return (Params){ |
542 | .stk = s, |
543 | .ngp = (cty >> 4) & 15, |
544 | .nfp = (cty >> 8) & 15, |
545 | }; |
546 | } |
547 | |
548 | static void |
549 | selvaarg(Fn *fn, Ins *i) |
550 | { |
551 | Ref loc, newloc; |
552 | |
553 | loc = newtmp("abi" , Kl, fn); |
554 | newloc = newtmp("abi" , Kl, fn); |
555 | emit(Ostorel, Kw, R, newloc, i->arg[0]); |
556 | emit(Oadd, Kl, newloc, loc, getcon(8, fn)); |
557 | emit(Oload, i->cls, i->to, loc, R); |
558 | emit(Oload, Kl, loc, i->arg[0], R); |
559 | } |
560 | |
561 | static void |
562 | selvastart(Fn *fn, Params p, Ref ap) |
563 | { |
564 | Ref rsave; |
565 | int s; |
566 | |
567 | rsave = newtmp("abi" , Kl, fn); |
568 | emit(Ostorel, Kw, R, rsave, ap); |
569 | s = p.stk > 2 + 8 * fn->vararg ? p.stk : 2 + p.ngp; |
570 | emit(Oaddr, Kl, rsave, SLOT(-s), R); |
571 | } |
572 | |
573 | void |
574 | rv64_abi(Fn *fn) |
575 | { |
576 | Blk *b; |
577 | Ins *i, *i0, *ip; |
578 | Insl *il; |
579 | int n; |
580 | Params p; |
581 | |
582 | for (b=fn->start; b; b=b->link) |
583 | b->visit = 0; |
584 | |
585 | /* lower parameters */ |
586 | for (b=fn->start, i=b->ins; i<&b->ins[b->nins]; i++) |
587 | if (!ispar(i->op)) |
588 | break; |
589 | p = selpar(fn, b->ins, i); |
590 | n = b->nins - (i - b->ins) + (&insb[NIns] - curi); |
591 | i0 = alloc(n * sizeof(Ins)); |
592 | ip = icpy(ip = i0, curi, &insb[NIns] - curi); |
593 | ip = icpy(ip, i, &b->ins[b->nins] - i); |
594 | b->nins = n; |
595 | b->ins = i0; |
596 | |
597 | /* lower calls, returns, and vararg instructions */ |
598 | il = 0; |
599 | b = fn->start; |
600 | do { |
601 | if (!(b = b->link)) |
602 | b = fn->start; /* do it last */ |
603 | if (b->visit) |
604 | continue; |
605 | curi = &insb[NIns]; |
606 | selret(b, fn); |
607 | for (i=&b->ins[b->nins]; i!=b->ins;) |
608 | switch ((--i)->op) { |
609 | default: |
610 | emiti(*i); |
611 | break; |
612 | case Ocall: |
613 | for (i0=i; i0>b->ins; i0--) |
614 | if (!isarg((i0-1)->op)) |
615 | break; |
616 | selcall(fn, i0, i, &il); |
617 | i = i0; |
618 | break; |
619 | case Ovastart: |
620 | selvastart(fn, p, i->arg[0]); |
621 | break; |
622 | case Ovaarg: |
623 | selvaarg(fn, i); |
624 | break; |
625 | case Oarg: |
626 | case Oargc: |
627 | die("unreachable" ); |
628 | } |
629 | if (b == fn->start) |
630 | for (; il; il=il->link) |
631 | emiti(il->i); |
632 | b->nins = &insb[NIns] - curi; |
633 | idup(&b->ins, curi, b->nins); |
634 | } while (b != fn->start); |
635 | |
636 | if (debug['A']) { |
637 | fprintf(stderr, "\n> After ABI lowering:\n" ); |
638 | printfn(fn, stderr); |
639 | } |
640 | } |
641 | |