1 | #include "all.h" |
2 | |
3 | typedef struct E E; |
4 | |
5 | struct E { |
6 | FILE *f; |
7 | Fn *fn; |
8 | uint64_t frame; |
9 | uint padding; |
10 | }; |
11 | |
12 | #define CMP(X) \ |
13 | X(Cieq, "eq") \ |
14 | X(Cine, "ne") \ |
15 | X(Cisge, "ge") \ |
16 | X(Cisgt, "gt") \ |
17 | X(Cisle, "le") \ |
18 | X(Cislt, "lt") \ |
19 | X(Ciuge, "cs") \ |
20 | X(Ciugt, "hi") \ |
21 | X(Ciule, "ls") \ |
22 | X(Ciult, "cc") \ |
23 | X(NCmpI+Cfeq, "eq") \ |
24 | X(NCmpI+Cfge, "ge") \ |
25 | X(NCmpI+Cfgt, "gt") \ |
26 | X(NCmpI+Cfle, "ls") \ |
27 | X(NCmpI+Cflt, "mi") \ |
28 | X(NCmpI+Cfne, "ne") \ |
29 | X(NCmpI+Cfo, "vc") \ |
30 | X(NCmpI+Cfuo, "vs") |
31 | |
32 | enum { |
33 | Ki = -1, /* matches Kw and Kl */ |
34 | Ka = -2, /* matches all classes */ |
35 | }; |
36 | |
37 | static struct { |
38 | short op; |
39 | short cls; |
40 | char *asm; |
41 | } omap[] = { |
42 | { Oadd, Ki, "add %=, %0, %1" }, |
43 | { Oadd, Ka, "fadd %=, %0, %1" }, |
44 | { Osub, Ki, "sub %=, %0, %1" }, |
45 | { Osub, Ka, "fsub %=, %0, %1" }, |
46 | { Oneg, Ki, "neg %=, %0" }, |
47 | { Oneg, Ka, "fneg %=, %0" }, |
48 | { Oand, Ki, "and %=, %0, %1" }, |
49 | { Oor, Ki, "orr %=, %0, %1" }, |
50 | { Oxor, Ki, "eor %=, %0, %1" }, |
51 | { Osar, Ki, "asr %=, %0, %1" }, |
52 | { Oshr, Ki, "lsr %=, %0, %1" }, |
53 | { Oshl, Ki, "lsl %=, %0, %1" }, |
54 | { Omul, Ki, "mul %=, %0, %1" }, |
55 | { Omul, Ka, "fmul %=, %0, %1" }, |
56 | { Odiv, Ki, "sdiv %=, %0, %1" }, |
57 | { Odiv, Ka, "fdiv %=, %0, %1" }, |
58 | { Oudiv, Ki, "udiv %=, %0, %1" }, |
59 | { Orem, Ki, "sdiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" }, |
60 | { Ourem, Ki, "udiv %?, %0, %1\n\tmsub\t%=, %?, %1, %0" }, |
61 | { Ocopy, Ki, "mov %=, %0" }, |
62 | { Ocopy, Ka, "fmov %=, %0" }, |
63 | { Oswap, Ki, "mov %?, %0\n\tmov\t%0, %1\n\tmov\t%1, %?" }, |
64 | { Oswap, Ka, "fmov %?, %0\n\tfmov\t%0, %1\n\tfmov\t%1, %?" }, |
65 | { Ostoreb, Kw, "strb %W0, %M1" }, |
66 | { Ostoreh, Kw, "strh %W0, %M1" }, |
67 | { Ostorew, Kw, "str %W0, %M1" }, |
68 | { Ostorel, Kw, "str %L0, %M1" }, |
69 | { Ostores, Kw, "str %S0, %M1" }, |
70 | { Ostored, Kw, "str %D0, %M1" }, |
71 | { Oloadsb, Ki, "ldrsb %=, %M0" }, |
72 | { Oloadub, Ki, "ldrb %W=, %M0" }, |
73 | { Oloadsh, Ki, "ldrsh %=, %M0" }, |
74 | { Oloaduh, Ki, "ldrh %W=, %M0" }, |
75 | { Oloadsw, Kw, "ldr %=, %M0" }, |
76 | { Oloadsw, Kl, "ldrsw %=, %M0" }, |
77 | { Oloaduw, Ki, "ldr %W=, %M0" }, |
78 | { Oload, Ka, "ldr %=, %M0" }, |
79 | { Oextsb, Ki, "sxtb %=, %W0" }, |
80 | { Oextub, Ki, "uxtb %W=, %W0" }, |
81 | { Oextsh, Ki, "sxth %=, %W0" }, |
82 | { Oextuh, Ki, "uxth %W=, %W0" }, |
83 | { Oextsw, Ki, "sxtw %L=, %W0" }, |
84 | { Oextuw, Ki, "mov %W=, %W0" }, |
85 | { Oexts, Kd, "fcvt %=, %S0" }, |
86 | { Otruncd, Ks, "fcvt %=, %D0" }, |
87 | { Ocast, Kw, "fmov %=, %S0" }, |
88 | { Ocast, Kl, "fmov %=, %D0" }, |
89 | { Ocast, Ks, "fmov %=, %W0" }, |
90 | { Ocast, Kd, "fmov %=, %L0" }, |
91 | { Ostosi, Ka, "fcvtzs %=, %S0" }, |
92 | { Ostoui, Ka, "fcvtzu %=, %S0" }, |
93 | { Odtosi, Ka, "fcvtzs %=, %D0" }, |
94 | { Odtoui, Ka, "fcvtzu %=, %D0" }, |
95 | { Oswtof, Ka, "scvtf %=, %W0" }, |
96 | { Ouwtof, Ka, "ucvtf %=, %W0" }, |
97 | { Osltof, Ka, "scvtf %=, %L0" }, |
98 | { Oultof, Ka, "ucvtf %=, %L0" }, |
99 | { Ocall, Kw, "blr %L0" }, |
100 | |
101 | { Oacmp, Ki, "cmp %0, %1" }, |
102 | { Oacmn, Ki, "cmn %0, %1" }, |
103 | { Oafcmp, Ka, "fcmpe %0, %1" }, |
104 | |
105 | #define X(c, str) \ |
106 | { Oflag+c, Ki, "cset %=, " str }, |
107 | CMP(X) |
108 | #undef X |
109 | { NOp, 0, 0 } |
110 | }; |
111 | |
112 | static char * |
113 | rname(int r, int k) |
114 | { |
115 | static char buf[4]; |
116 | |
117 | if (r == SP) { |
118 | assert(k == Kl); |
119 | sprintf(buf, "sp" ); |
120 | } |
121 | else if (R0 <= r && r <= LR) |
122 | switch (k) { |
123 | default: die("invalid class" ); |
124 | case Kw: sprintf(buf, "w%d" , r-R0); break; |
125 | case Kx: |
126 | case Kl: sprintf(buf, "x%d" , r-R0); break; |
127 | } |
128 | else if (V0 <= r && r <= V30) |
129 | switch (k) { |
130 | default: die("invalid class" ); |
131 | case Ks: sprintf(buf, "s%d" , r-V0); break; |
132 | case Kx: |
133 | case Kd: sprintf(buf, "d%d" , r-V0); break; |
134 | } |
135 | else |
136 | die("invalid register" ); |
137 | return buf; |
138 | } |
139 | |
140 | static uint64_t |
141 | slot(int s, E *e) |
142 | { |
143 | s = ((int32_t)s << 3) >> 3; |
144 | if (s == -1) |
145 | return 16 + e->frame; |
146 | if (s < 0) { |
147 | if (e->fn->vararg) |
148 | return 16 + e->frame + 192 - (s+2)*8; |
149 | else |
150 | return 16 + e->frame - (s+2)*8; |
151 | } else |
152 | return 16 + e->padding + 4 * s; |
153 | } |
154 | |
155 | static void |
156 | emitf(char *s, Ins *i, E *e) |
157 | { |
158 | Ref r; |
159 | int k, c; |
160 | Con *pc; |
161 | unsigned n, sp; |
162 | |
163 | fputc('\t', e->f); |
164 | |
165 | sp = 0; |
166 | for (;;) { |
167 | k = i->cls; |
168 | while ((c = *s++) != '%') |
169 | if (c == ' ' && !sp) { |
170 | fputc('\t', e->f); |
171 | sp = 1; |
172 | } else if ( !c) { |
173 | fputc('\n', e->f); |
174 | return; |
175 | } else |
176 | fputc(c, e->f); |
177 | Switch: |
178 | switch ((c = *s++)) { |
179 | default: |
180 | die("invalid escape" ); |
181 | case 'W': |
182 | k = Kw; |
183 | goto Switch; |
184 | case 'L': |
185 | k = Kl; |
186 | goto Switch; |
187 | case 'S': |
188 | k = Ks; |
189 | goto Switch; |
190 | case 'D': |
191 | k = Kd; |
192 | goto Switch; |
193 | case '?': |
194 | if (KBASE(k) == 0) |
195 | fputs(rname(R18, k), e->f); |
196 | else |
197 | fputs(k==Ks ? "s31" : "d31" , e->f); |
198 | break; |
199 | case '=': |
200 | case '0': |
201 | r = c == '=' ? i->to : i->arg[0]; |
202 | assert(isreg(r)); |
203 | fputs(rname(r.val, k), e->f); |
204 | break; |
205 | case '1': |
206 | r = i->arg[1]; |
207 | switch (rtype(r)) { |
208 | default: |
209 | die("invalid second argument" ); |
210 | case RTmp: |
211 | assert(isreg(r)); |
212 | fputs(rname(r.val, k), e->f); |
213 | break; |
214 | case RCon: |
215 | pc = &e->fn->con[r.val]; |
216 | n = pc->bits.i; |
217 | assert(pc->type == CBits); |
218 | if (n & 0xfff000) |
219 | fprintf(e->f, "#%u, lsl #12" , n>>12); |
220 | else |
221 | fprintf(e->f, "#%u" , n); |
222 | break; |
223 | } |
224 | break; |
225 | case 'M': |
226 | c = *s++; |
227 | assert(c == '0' || c == '1' || c == '='); |
228 | r = c == '=' ? i->to : i->arg[c - '0']; |
229 | switch (rtype(r)) { |
230 | default: |
231 | die("todo (arm emit): unhandled ref" ); |
232 | case RTmp: |
233 | assert(isreg(r)); |
234 | fprintf(e->f, "[%s]" , rname(r.val, Kl)); |
235 | break; |
236 | case RSlot: |
237 | fprintf(e->f, "[x29, %" PRIu64"]" , slot(r.val, e)); |
238 | break; |
239 | } |
240 | break; |
241 | } |
242 | } |
243 | } |
244 | |
245 | static void |
246 | loadcon(Con *c, int r, int k, FILE *f) |
247 | { |
248 | char *rn, *p, off[32]; |
249 | int64_t n; |
250 | int w, sh; |
251 | |
252 | w = KWIDE(k); |
253 | rn = rname(r, k); |
254 | n = c->bits.i; |
255 | if (c->type == CAddr) { |
256 | rn = rname(r, Kl); |
257 | if (n) |
258 | sprintf(off, "+%" PRIi64, n); |
259 | else |
260 | off[0] = 0; |
261 | p = c->local ? ".L" : "" ; |
262 | fprintf(f, "\tadrp\t%s, %s%s%s\n" , |
263 | rn, p, str(c->label), off); |
264 | fprintf(f, "\tadd\t%s, %s, #:lo12:%s%s%s\n" , |
265 | rn, rn, p, str(c->label), off); |
266 | return; |
267 | } |
268 | assert(c->type == CBits); |
269 | if (!w) |
270 | n = (int32_t)n; |
271 | if ((n | 0xffff) == -1 || arm64_logimm(n, k)) { |
272 | fprintf(f, "\tmov\t%s, #%" PRIi64"\n" , rn, n); |
273 | } else { |
274 | fprintf(f, "\tmov\t%s, #%d\n" , |
275 | rn, (int)(n & 0xffff)); |
276 | for (sh=16; n>>=16; sh+=16) { |
277 | if ((!w && sh == 32) || sh == 64) |
278 | break; |
279 | fprintf(f, "\tmovk\t%s, #0x%x, lsl #%d\n" , |
280 | rn, (unsigned)(n & 0xffff), sh); |
281 | } |
282 | } |
283 | } |
284 | |
285 | static void emitins(Ins *, E *); |
286 | |
287 | static void |
288 | fixarg(Ref *pr, int sz, E *e) |
289 | { |
290 | Ins *i; |
291 | Ref r; |
292 | uint64_t s; |
293 | |
294 | r = *pr; |
295 | if (rtype(r) == RSlot) { |
296 | s = slot(r.val, e); |
297 | if (s > sz * 4095u) { |
298 | i = &(Ins){Oaddr, Kl, TMP(IP0), {r}}; |
299 | emitins(i, e); |
300 | *pr = TMP(IP0); |
301 | } |
302 | } |
303 | } |
304 | |
305 | static void |
306 | emitins(Ins *i, E *e) |
307 | { |
308 | char *rn; |
309 | uint64_t s; |
310 | int o; |
311 | Ref r; |
312 | |
313 | switch (i->op) { |
314 | default: |
315 | if (isload(i->op)) |
316 | fixarg(&i->arg[0], loadsz(i), e); |
317 | if (isstore(i->op)) |
318 | fixarg(&i->arg[1], storesz(i), e); |
319 | Table: |
320 | /* most instructions are just pulled out of |
321 | * the table omap[], some special cases are |
322 | * detailed below */ |
323 | for (o=0;; o++) { |
324 | /* this linear search should really be a binary |
325 | * search */ |
326 | if (omap[o].op == NOp) |
327 | die("no match for %s(%c)" , |
328 | optab[i->op].name, "wlsd" [i->cls]); |
329 | if (omap[o].op == i->op) |
330 | if (omap[o].cls == i->cls || omap[o].cls == Ka |
331 | || (omap[o].cls == Ki && KBASE(i->cls) == 0)) |
332 | break; |
333 | } |
334 | emitf(omap[o].asm, i, e); |
335 | break; |
336 | case Onop: |
337 | break; |
338 | case Ocopy: |
339 | if (req(i->to, i->arg[0])) |
340 | break; |
341 | if (rtype(i->to) == RSlot) { |
342 | r = i->to; |
343 | if (!isreg(i->arg[0])) { |
344 | i->to = TMP(R18); |
345 | emitins(i, e); |
346 | i->arg[0] = i->to; |
347 | } |
348 | i->op = Ostorew + i->cls; |
349 | i->cls = Kw; |
350 | i->arg[1] = r; |
351 | emitins(i, e); |
352 | break; |
353 | } |
354 | assert(isreg(i->to)); |
355 | switch (rtype(i->arg[0])) { |
356 | case RCon: |
357 | loadcon(&e->fn->con[i->arg[0].val], i->to.val, i->cls, e->f); |
358 | break; |
359 | case RSlot: |
360 | i->op = Oload; |
361 | emitins(i, e); |
362 | break; |
363 | default: |
364 | assert(i->to.val != R18); |
365 | goto Table; |
366 | } |
367 | break; |
368 | case Oaddr: |
369 | assert(rtype(i->arg[0]) == RSlot); |
370 | rn = rname(i->to.val, Kl); |
371 | s = slot(i->arg[0].val, e); |
372 | if (s <= 4095) |
373 | fprintf(e->f, "\tadd\t%s, x29, #%" PRIu64"\n" , rn, s); |
374 | else if (s <= 65535) |
375 | fprintf(e->f, |
376 | "\tmov\t%s, #%" PRIu64"\n" |
377 | "\tadd\t%s, x29, %s\n" , |
378 | rn, s, rn, rn |
379 | ); |
380 | else |
381 | fprintf(e->f, |
382 | "\tmov\t%s, #%" PRIu64"\n" |
383 | "\tmovk\t%s, #%" PRIu64", lsl #16\n" |
384 | "\tadd\t%s, x29, %s\n" , |
385 | rn, s & 0xFFFF, rn, s >> 16, rn, rn |
386 | ); |
387 | break; |
388 | case Osalloc: |
389 | emitf("sub sp, sp, %0" , i, e); |
390 | if (!req(i->to, R)) |
391 | emitf("mov %=, sp" , i, e); |
392 | break; |
393 | } |
394 | } |
395 | |
396 | static void |
397 | framelayout(E *e) |
398 | { |
399 | int *r; |
400 | uint o; |
401 | uint64_t f; |
402 | |
403 | for (o=0, r=arm64_rclob; *r>=0; r++) |
404 | o += 1 & (e->fn->reg >> *r); |
405 | f = e->fn->slot; |
406 | f = (f + 3) & -4; |
407 | o += o & 1; |
408 | e->padding = 4*(f-e->fn->slot); |
409 | e->frame = 4*f + 8*o; |
410 | } |
411 | |
412 | /* |
413 | |
414 | Stack-frame layout: |
415 | |
416 | +=============+ |
417 | | varargs | |
418 | | save area | |
419 | +-------------+ |
420 | | callee-save | ^ |
421 | | registers | | |
422 | +-------------+ | |
423 | | ... | | |
424 | | spill slots | | |
425 | | ... | | e->frame |
426 | +-------------+ | |
427 | | ... | | |
428 | | locals | | |
429 | | ... | | |
430 | +-------------+ | |
431 | | e->padding | v |
432 | +-------------+ |
433 | | saved x29 | |
434 | | saved x30 | |
435 | +=============+ <- x29 |
436 | |
437 | */ |
438 | |
439 | void |
440 | arm64_emitfn(Fn *fn, FILE *out) |
441 | { |
442 | static char *ctoa[] = { |
443 | #define X(c, s) [c] = s, |
444 | CMP(X) |
445 | #undef X |
446 | }; |
447 | static int id0; |
448 | int s, n, c, lbl, *r; |
449 | uint64_t o; |
450 | Blk *b, *t; |
451 | Ins *i; |
452 | E *e; |
453 | |
454 | gasemitlnk(fn->name, &fn->lnk, ".text" , out); |
455 | e = &(E){.f = out, .fn = fn}; |
456 | framelayout(e); |
457 | |
458 | if (e->fn->vararg) { |
459 | for (n=7; n>=0; n--) |
460 | fprintf(e->f, "\tstr\tq%d, [sp, -16]!\n" , n); |
461 | for (n=7; n>=0; n-=2) |
462 | fprintf(e->f, "\tstp\tx%d, x%d, [sp, -16]!\n" , n-1, n); |
463 | } |
464 | |
465 | if (e->frame + 16 <= 512) |
466 | fprintf(e->f, |
467 | "\tstp\tx29, x30, [sp, -%" PRIu64"]!\n" , |
468 | e->frame + 16 |
469 | ); |
470 | else if (e->frame <= 4095) |
471 | fprintf(e->f, |
472 | "\tsub\tsp, sp, #%" PRIu64"\n" |
473 | "\tstp\tx29, x30, [sp, -16]!\n" , |
474 | e->frame |
475 | ); |
476 | else if (e->frame <= 65535) |
477 | fprintf(e->f, |
478 | "\tmov\tx16, #%" PRIu64"\n" |
479 | "\tsub\tsp, sp, x16\n" |
480 | "\tstp\tx29, x30, [sp, -16]!\n" , |
481 | e->frame |
482 | ); |
483 | else |
484 | fprintf(e->f, |
485 | "\tmov\tx16, #%" PRIu64"\n" |
486 | "\tmovk\tx16, #%" PRIu64", lsl #16\n" |
487 | "\tsub\tsp, sp, x16\n" |
488 | "\tstp\tx29, x30, [sp, -16]!\n" , |
489 | e->frame & 0xFFFF, e->frame >> 16 |
490 | ); |
491 | fputs("\tmov\tx29, sp\n" , e->f); |
492 | s = (e->frame - e->padding) / 4; |
493 | for (r=arm64_rclob; *r>=0; r++) |
494 | if (e->fn->reg & BIT(*r)) { |
495 | s -= 2; |
496 | i = &(Ins){.arg = {TMP(*r), SLOT(s)}}; |
497 | i->op = *r >= V0 ? Ostored : Ostorel; |
498 | emitins(i, e); |
499 | } |
500 | |
501 | for (lbl=0, b=e->fn->start; b; b=b->link) { |
502 | if (lbl || b->npred > 1) |
503 | fprintf(e->f, ".L%d:\n" , id0+b->id); |
504 | for (i=b->ins; i!=&b->ins[b->nins]; i++) |
505 | emitins(i, e); |
506 | lbl = 1; |
507 | switch (b->jmp.type) { |
508 | case Jret0: |
509 | s = (e->frame - e->padding) / 4; |
510 | for (r=arm64_rclob; *r>=0; r++) |
511 | if (e->fn->reg & BIT(*r)) { |
512 | s -= 2; |
513 | i = &(Ins){Oload, 0, TMP(*r), {SLOT(s)}}; |
514 | i->cls = *r >= V0 ? Kd : Kl; |
515 | emitins(i, e); |
516 | } |
517 | if (e->fn->dynalloc) |
518 | fputs("\tmov sp, x29\n" , e->f); |
519 | o = e->frame + 16; |
520 | if (e->fn->vararg) |
521 | o += 192; |
522 | if (o <= 504) |
523 | fprintf(e->f, |
524 | "\tldp\tx29, x30, [sp], %" PRIu64"\n" , |
525 | o |
526 | ); |
527 | else if (o - 16 <= 4095) |
528 | fprintf(e->f, |
529 | "\tldp\tx29, x30, [sp], 16\n" |
530 | "\tadd\tsp, sp, #%" PRIu64"\n" , |
531 | o - 16 |
532 | ); |
533 | else if (o - 16 <= 65535) |
534 | fprintf(e->f, |
535 | "\tldp\tx29, x30, [sp], 16\n" |
536 | "\tmov\tx16, #%" PRIu64"\n" |
537 | "\tadd\tsp, sp, x16\n" , |
538 | o - 16 |
539 | ); |
540 | else |
541 | fprintf(e->f, |
542 | "\tldp\tx29, x30, [sp], 16\n" |
543 | "\tmov\tx16, #%" PRIu64"\n" |
544 | "\tmovk\tx16, #%" PRIu64", lsl #16\n" |
545 | "\tadd\tsp, sp, x16\n" , |
546 | (o - 16) & 0xFFFF, (o - 16) >> 16 |
547 | ); |
548 | fprintf(e->f, "\tret\n" ); |
549 | break; |
550 | case Jjmp: |
551 | Jmp: |
552 | if (b->s1 != b->link) |
553 | fprintf(e->f, "\tb\t.L%d\n" , id0+b->s1->id); |
554 | else |
555 | lbl = 0; |
556 | break; |
557 | default: |
558 | c = b->jmp.type - Jjf; |
559 | if (c < 0 || c > NCmp) |
560 | die("unhandled jump %d" , b->jmp.type); |
561 | if (b->link == b->s2) { |
562 | t = b->s1; |
563 | b->s1 = b->s2; |
564 | b->s2 = t; |
565 | } else |
566 | c = cmpneg(c); |
567 | fprintf(e->f, "\tb%s\t.L%d\n" , ctoa[c], id0+b->s2->id); |
568 | goto Jmp; |
569 | } |
570 | } |
571 | id0 += e->fn->nblk; |
572 | } |
573 | |