1 | #include "all.h" |
2 | #include <limits.h> |
3 | |
4 | /* For x86_64, do the following: |
5 | * |
6 | * - check that constants are used only in |
7 | * places allowed |
8 | * - ensure immediates always fit in 32b |
9 | * - expose machine register contraints |
10 | * on instructions like division. |
11 | * - implement fast locals (the streak of |
12 | * constant allocX in the first basic block) |
13 | * - recognize complex addressing modes |
14 | * |
15 | * Invariant: the use counts that are used |
16 | * in sel() must be sound. This |
17 | * is not so trivial, maybe the |
18 | * dce should be moved out... |
19 | */ |
20 | |
21 | typedef struct ANum ANum; |
22 | |
23 | struct ANum { |
24 | char n, l, r; |
25 | Ins *i; |
26 | }; |
27 | |
28 | static int amatch(Addr *, Ref, int, ANum *, Fn *); |
29 | |
30 | static int |
31 | noimm(Ref r, Fn *fn) |
32 | { |
33 | int64_t val; |
34 | |
35 | if (rtype(r) != RCon) |
36 | return 0; |
37 | switch (fn->con[r.val].type) { |
38 | case CAddr: |
39 | /* we only support the 'small' |
40 | * code model of the ABI, this |
41 | * means that we can always |
42 | * address data with 32bits |
43 | */ |
44 | return 0; |
45 | case CBits: |
46 | val = fn->con[r.val].bits.i; |
47 | return (val < INT32_MIN || val > INT32_MAX); |
48 | default: |
49 | die("invalid constant" ); |
50 | } |
51 | } |
52 | |
53 | static int |
54 | rslot(Ref r, Fn *fn) |
55 | { |
56 | if (rtype(r) != RTmp) |
57 | return -1; |
58 | return fn->tmp[r.val].slot; |
59 | } |
60 | |
61 | static void |
62 | fixarg(Ref *r, int k, Ins *i, Fn *fn) |
63 | { |
64 | char buf[32]; |
65 | Addr a, *m; |
66 | Ref r0, r1; |
67 | int s, n, op; |
68 | |
69 | r1 = r0 = *r; |
70 | s = rslot(r0, fn); |
71 | op = i ? i->op : Ocopy; |
72 | if (KBASE(k) == 1 && rtype(r0) == RCon) { |
73 | /* load floating points from memory |
74 | * slots, they can't be used as |
75 | * immediates |
76 | */ |
77 | r1 = MEM(fn->nmem); |
78 | vgrow(&fn->mem, ++fn->nmem); |
79 | memset(&a, 0, sizeof a); |
80 | a.offset.type = CAddr; |
81 | a.offset.local = 1; |
82 | n = gasstash(&fn->con[r0.val].bits, KWIDE(k) ? 8 : 4); |
83 | sprintf(buf, "fp%d" , n); |
84 | a.offset.label = intern(buf); |
85 | fn->mem[fn->nmem-1] = a; |
86 | } |
87 | else if (op != Ocopy && k == Kl && noimm(r0, fn)) { |
88 | /* load constants that do not fit in |
89 | * a 32bit signed integer into a |
90 | * long temporary |
91 | */ |
92 | r1 = newtmp("isel" , Kl, fn); |
93 | emit(Ocopy, Kl, r1, r0, R); |
94 | } |
95 | else if (s != -1) { |
96 | /* load fast locals' addresses into |
97 | * temporaries right before the |
98 | * instruction |
99 | */ |
100 | r1 = newtmp("isel" , Kl, fn); |
101 | emit(Oaddr, Kl, r1, SLOT(s), R); |
102 | } |
103 | else if (!(isstore(op) && r == &i->arg[1]) |
104 | && !isload(op) && op != Ocall && rtype(r0) == RCon |
105 | && fn->con[r0.val].type == CAddr) { |
106 | /* apple as does not support 32-bit |
107 | * absolute addressing, use a rip- |
108 | * relative leaq instead |
109 | */ |
110 | r1 = newtmp("isel" , Kl, fn); |
111 | emit(Oaddr, Kl, r1, r0, R); |
112 | } |
113 | else if (rtype(r0) == RMem) { |
114 | /* eliminate memory operands of |
115 | * the form $foo(%rip, ...) |
116 | */ |
117 | m = &fn->mem[r0.val]; |
118 | if (req(m->base, R)) |
119 | if (m->offset.type == CAddr) { |
120 | r0 = newtmp("isel" , Kl, fn); |
121 | emit(Oaddr, Kl, r0, newcon(&m->offset, fn), R); |
122 | m->offset.type = CUndef; |
123 | m->base = r0; |
124 | } |
125 | } |
126 | *r = r1; |
127 | } |
128 | |
129 | static void |
130 | seladdr(Ref *r, ANum *an, Fn *fn) |
131 | { |
132 | Addr a; |
133 | Ref r0; |
134 | |
135 | r0 = *r; |
136 | if (rtype(r0) == RTmp) { |
137 | memset(&a, 0, sizeof a); |
138 | if (!amatch(&a, r0, an[r0.val].n, an, fn)) |
139 | return; |
140 | if (!req(a.base, R)) |
141 | if (a.offset.type == CAddr) { |
142 | /* apple as does not support |
143 | * $foo(%r0, %r1, M); try to |
144 | * rewrite it or bail out if |
145 | * impossible |
146 | */ |
147 | if (!req(a.index, R) || rtype(a.base) != RTmp) |
148 | return; |
149 | else { |
150 | a.index = a.base; |
151 | a.scale = 1; |
152 | a.base = R; |
153 | } |
154 | } |
155 | chuse(r0, -1, fn); |
156 | vgrow(&fn->mem, ++fn->nmem); |
157 | fn->mem[fn->nmem-1] = a; |
158 | chuse(a.base, +1, fn); |
159 | chuse(a.index, +1, fn); |
160 | *r = MEM(fn->nmem-1); |
161 | } |
162 | } |
163 | |
164 | static int |
165 | cmpswap(Ref arg[2], int op) |
166 | { |
167 | switch (op) { |
168 | case NCmpI+Cflt: |
169 | case NCmpI+Cfle: |
170 | return 1; |
171 | case NCmpI+Cfgt: |
172 | case NCmpI+Cfge: |
173 | return 0; |
174 | } |
175 | return rtype(arg[0]) == RCon; |
176 | } |
177 | |
178 | static void |
179 | selcmp(Ref arg[2], int k, int swap, Fn *fn) |
180 | { |
181 | Ref r; |
182 | Ins *icmp; |
183 | |
184 | if (swap) { |
185 | r = arg[1]; |
186 | arg[1] = arg[0]; |
187 | arg[0] = r; |
188 | } |
189 | emit(Oxcmp, k, R, arg[1], arg[0]); |
190 | icmp = curi; |
191 | if (rtype(arg[0]) == RCon) { |
192 | assert(k != Kw); |
193 | icmp->arg[1] = newtmp("isel" , k, fn); |
194 | emit(Ocopy, k, icmp->arg[1], arg[0], R); |
195 | fixarg(&curi->arg[0], k, curi, fn); |
196 | } |
197 | fixarg(&icmp->arg[0], k, icmp, fn); |
198 | fixarg(&icmp->arg[1], k, icmp, fn); |
199 | } |
200 | |
201 | static void |
202 | sel(Ins i, ANum *an, Fn *fn) |
203 | { |
204 | Ref r0, r1, tmp[7]; |
205 | int x, j, k, kc, sh, swap; |
206 | Ins *i0, *i1; |
207 | |
208 | if (rtype(i.to) == RTmp) |
209 | if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1])) |
210 | if (fn->tmp[i.to.val].nuse == 0) { |
211 | chuse(i.arg[0], -1, fn); |
212 | chuse(i.arg[1], -1, fn); |
213 | return; |
214 | } |
215 | i0 = curi; |
216 | k = i.cls; |
217 | switch (i.op) { |
218 | case Odiv: |
219 | case Orem: |
220 | case Oudiv: |
221 | case Ourem: |
222 | if (KBASE(k) == 1) |
223 | goto Emit; |
224 | if (i.op == Odiv || i.op == Oudiv) |
225 | r0 = TMP(RAX), r1 = TMP(RDX); |
226 | else |
227 | r0 = TMP(RDX), r1 = TMP(RAX); |
228 | emit(Ocopy, k, i.to, r0, R); |
229 | emit(Ocopy, k, R, r1, R); |
230 | if (rtype(i.arg[1]) == RCon) { |
231 | /* immediates not allowed for |
232 | * divisions in x86 |
233 | */ |
234 | r0 = newtmp("isel" , k, fn); |
235 | } else |
236 | r0 = i.arg[1]; |
237 | if (fn->tmp[r0.val].slot != -1) |
238 | err("unlikely argument %%%s in %s" , |
239 | fn->tmp[r0.val].name, optab[i.op].name); |
240 | if (i.op == Odiv || i.op == Orem) { |
241 | emit(Oxidiv, k, R, r0, R); |
242 | emit(Osign, k, TMP(RDX), TMP(RAX), R); |
243 | } else { |
244 | emit(Oxdiv, k, R, r0, R); |
245 | emit(Ocopy, k, TMP(RDX), CON_Z, R); |
246 | } |
247 | emit(Ocopy, k, TMP(RAX), i.arg[0], R); |
248 | fixarg(&curi->arg[0], k, curi, fn); |
249 | if (rtype(i.arg[1]) == RCon) |
250 | emit(Ocopy, k, r0, i.arg[1], R); |
251 | break; |
252 | case Osar: |
253 | case Oshr: |
254 | case Oshl: |
255 | r0 = i.arg[1]; |
256 | if (rtype(r0) == RCon) |
257 | goto Emit; |
258 | if (fn->tmp[r0.val].slot != -1) |
259 | err("unlikely argument %%%s in %s" , |
260 | fn->tmp[r0.val].name, optab[i.op].name); |
261 | i.arg[1] = TMP(RCX); |
262 | emit(Ocopy, Kw, R, TMP(RCX), R); |
263 | emiti(i); |
264 | i1 = curi; |
265 | emit(Ocopy, Kw, TMP(RCX), r0, R); |
266 | fixarg(&i1->arg[0], argcls(&i, 0), i1, fn); |
267 | break; |
268 | case Ouwtof: |
269 | r0 = newtmp("utof" , Kl, fn); |
270 | emit(Osltof, k, i.to, r0, R); |
271 | emit(Oextuw, Kl, r0, i.arg[0], R); |
272 | fixarg(&curi->arg[0], k, curi, fn); |
273 | break; |
274 | case Oultof: |
275 | /* %mask =l and %arg.0, 1 |
276 | %isbig =l shr %arg.0, 63 |
277 | %divided =l shr %arg.0, %isbig |
278 | %or =l or %mask, %divided |
279 | %float =d sltof %or |
280 | %cast =l cast %float |
281 | %addend =l shl %isbig, 52 |
282 | %sum =l add %cast, %addend |
283 | %result =d cast %sum |
284 | */ |
285 | r0 = newtmp("utof" , k, fn); |
286 | if (k == Ks) |
287 | kc = Kw, sh = 23; |
288 | else |
289 | kc = Kl, sh = 52; |
290 | for (j=0; j<4; j++) |
291 | tmp[j] = newtmp("utof" , Kl, fn); |
292 | for (; j<7; j++) |
293 | tmp[j] = newtmp("utof" , kc, fn); |
294 | emit(Ocast, k, i.to, tmp[6], R); |
295 | emit(Oadd, kc, tmp[6], tmp[4], tmp[5]); |
296 | emit(Oshl, kc, tmp[5], tmp[1], getcon(sh, fn)); |
297 | emit(Ocast, kc, tmp[4], r0, R); |
298 | emit(Osltof, k, r0, tmp[3], R); |
299 | emit(Oor, Kl, tmp[3], tmp[0], tmp[2]); |
300 | emit(Oshr, Kl, tmp[2], i.arg[0], tmp[1]); |
301 | sel(*curi++, 0, fn); |
302 | emit(Oshr, Kl, tmp[1], i.arg[0], getcon(63, fn)); |
303 | fixarg(&curi->arg[0], Kl, curi, fn); |
304 | emit(Oand, Kl, tmp[0], i.arg[0], getcon(1, fn)); |
305 | fixarg(&curi->arg[0], Kl, curi, fn); |
306 | break; |
307 | case Ostoui: |
308 | i.op = Ostosi; |
309 | kc = Ks; |
310 | tmp[4] = getcon(0xdf000000, fn); |
311 | goto Oftoui; |
312 | case Odtoui: |
313 | i.op = Odtosi; |
314 | kc = Kd; |
315 | tmp[4] = getcon(0xc3e0000000000000, fn); |
316 | Oftoui: |
317 | if (k == Kw) |
318 | goto Emit; |
319 | r0 = newtmp("ftou" , kc, fn); |
320 | for (j=0; j<4; j++) |
321 | tmp[j] = newtmp("ftou" , Kl, fn); |
322 | emit(Oor, Kl, i.to, tmp[0], tmp[3]); |
323 | emit(Oand, Kl, tmp[3], tmp[2], tmp[1]); |
324 | emit(i.op, Kl, tmp[2], r0, R); |
325 | emit(Oadd, kc, r0, tmp[4], i.arg[0]); |
326 | i1 = curi; /* fixarg() can change curi */ |
327 | fixarg(&i1->arg[0], kc, i1, fn); |
328 | fixarg(&i1->arg[1], kc, i1, fn); |
329 | emit(Osar, Kl, tmp[1], tmp[0], getcon(63, fn)); |
330 | emit(i.op, Kl, tmp[0], i.arg[0], R); |
331 | fixarg(&curi->arg[0], Kl, curi, fn); |
332 | break; |
333 | case Onop: |
334 | break; |
335 | case Ostored: |
336 | case Ostores: |
337 | case Ostorel: |
338 | case Ostorew: |
339 | case Ostoreh: |
340 | case Ostoreb: |
341 | if (rtype(i.arg[0]) == RCon) { |
342 | if (i.op == Ostored) |
343 | i.op = Ostorel; |
344 | if (i.op == Ostores) |
345 | i.op = Ostorew; |
346 | } |
347 | seladdr(&i.arg[1], an, fn); |
348 | goto Emit; |
349 | case_Oload: |
350 | seladdr(&i.arg[0], an, fn); |
351 | goto Emit; |
352 | case Ocall: |
353 | case Osalloc: |
354 | case Ocopy: |
355 | case Oadd: |
356 | case Osub: |
357 | case Oneg: |
358 | case Omul: |
359 | case Oand: |
360 | case Oor: |
361 | case Oxor: |
362 | case Oxtest: |
363 | case Ostosi: |
364 | case Odtosi: |
365 | case Oswtof: |
366 | case Osltof: |
367 | case Oexts: |
368 | case Otruncd: |
369 | case Ocast: |
370 | case_OExt: |
371 | Emit: |
372 | emiti(i); |
373 | i1 = curi; /* fixarg() can change curi */ |
374 | fixarg(&i1->arg[0], argcls(&i, 0), i1, fn); |
375 | fixarg(&i1->arg[1], argcls(&i, 1), i1, fn); |
376 | break; |
377 | case Oalloc4: |
378 | case Oalloc8: |
379 | case Oalloc16: |
380 | salloc(i.to, i.arg[0], fn); |
381 | break; |
382 | default: |
383 | if (isext(i.op)) |
384 | goto case_OExt; |
385 | if (isload(i.op)) |
386 | goto case_Oload; |
387 | if (iscmp(i.op, &kc, &x)) { |
388 | switch (x) { |
389 | case NCmpI+Cfeq: |
390 | /* zf is set when operands are |
391 | * unordered, so we may have to |
392 | * check pf |
393 | */ |
394 | r0 = newtmp("isel" , Kw, fn); |
395 | r1 = newtmp("isel" , Kw, fn); |
396 | emit(Oand, Kw, i.to, r0, r1); |
397 | emit(Oflagfo, k, r1, R, R); |
398 | i.to = r0; |
399 | break; |
400 | case NCmpI+Cfne: |
401 | r0 = newtmp("isel" , Kw, fn); |
402 | r1 = newtmp("isel" , Kw, fn); |
403 | emit(Oor, Kw, i.to, r0, r1); |
404 | emit(Oflagfuo, k, r1, R, R); |
405 | i.to = r0; |
406 | break; |
407 | } |
408 | swap = cmpswap(i.arg, x); |
409 | if (swap) |
410 | x = cmpop(x); |
411 | emit(Oflag+x, k, i.to, R, R); |
412 | selcmp(i.arg, kc, swap, fn); |
413 | break; |
414 | } |
415 | die("unknown instruction %s" , optab[i.op].name); |
416 | } |
417 | |
418 | while (i0>curi && --i0) { |
419 | assert(rslot(i0->arg[0], fn) == -1); |
420 | assert(rslot(i0->arg[1], fn) == -1); |
421 | } |
422 | } |
423 | |
424 | static Ins * |
425 | flagi(Ins *i0, Ins *i) |
426 | { |
427 | while (i>i0) { |
428 | i--; |
429 | if (amd64_op[i->op].zflag) |
430 | return i; |
431 | if (amd64_op[i->op].lflag) |
432 | continue; |
433 | return 0; |
434 | } |
435 | return 0; |
436 | } |
437 | |
438 | static void |
439 | seljmp(Blk *b, Fn *fn) |
440 | { |
441 | Ref r; |
442 | int c, k, swap; |
443 | Ins *fi; |
444 | Tmp *t; |
445 | |
446 | if (b->jmp.type == Jret0 || b->jmp.type == Jjmp) |
447 | return; |
448 | assert(b->jmp.type == Jjnz); |
449 | r = b->jmp.arg; |
450 | t = &fn->tmp[r.val]; |
451 | b->jmp.arg = R; |
452 | assert(rtype(r) == RTmp); |
453 | if (b->s1 == b->s2) { |
454 | chuse(r, -1, fn); |
455 | b->jmp.type = Jjmp; |
456 | b->s2 = 0; |
457 | return; |
458 | } |
459 | fi = flagi(b->ins, &b->ins[b->nins]); |
460 | if (!fi || !req(fi->to, r)) { |
461 | selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn); /* todo, long jnz */ |
462 | b->jmp.type = Jjf + Cine; |
463 | } |
464 | else if (iscmp(fi->op, &k, &c) |
465 | && c != NCmpI+Cfeq /* see sel() */ |
466 | && c != NCmpI+Cfne) { |
467 | swap = cmpswap(fi->arg, c); |
468 | if (swap) |
469 | c = cmpop(c); |
470 | if (t->nuse == 1) { |
471 | selcmp(fi->arg, k, swap, fn); |
472 | *fi = (Ins){.op = Onop}; |
473 | } |
474 | b->jmp.type = Jjf + c; |
475 | } |
476 | else if (fi->op == Oand && t->nuse == 1 |
477 | && (rtype(fi->arg[0]) == RTmp || |
478 | rtype(fi->arg[1]) == RTmp)) { |
479 | fi->op = Oxtest; |
480 | fi->to = R; |
481 | b->jmp.type = Jjf + Cine; |
482 | if (rtype(fi->arg[1]) == RCon) { |
483 | r = fi->arg[1]; |
484 | fi->arg[1] = fi->arg[0]; |
485 | fi->arg[0] = r; |
486 | } |
487 | } |
488 | else { |
489 | /* since flags are not tracked in liveness, |
490 | * the result of the flag-setting instruction |
491 | * has to be marked as live |
492 | */ |
493 | if (t->nuse == 1) |
494 | emit(Ocopy, Kw, R, r, R); |
495 | b->jmp.type = Jjf + Cine; |
496 | } |
497 | } |
498 | |
499 | static int |
500 | aref(Ref r, ANum *ai) |
501 | { |
502 | switch (rtype(r)) { |
503 | case RCon: |
504 | return 2; |
505 | case RTmp: |
506 | return ai[r.val].n; |
507 | default: |
508 | die("constant or temporary expected" ); |
509 | } |
510 | } |
511 | |
512 | static int |
513 | ascale(Ref r, Con *con) |
514 | { |
515 | int64_t n; |
516 | |
517 | if (rtype(r) != RCon) |
518 | return 0; |
519 | if (con[r.val].type != CBits) |
520 | return 0; |
521 | n = con[r.val].bits.i; |
522 | return n == 1 || n == 2 || n == 4 || n == 8; |
523 | } |
524 | |
525 | static void |
526 | anumber(ANum *ai, Blk *b, Con *con) |
527 | { |
528 | /* This should be made obsolete by a proper |
529 | * reassoc pass. |
530 | * |
531 | * Rules: |
532 | * |
533 | * RTmp(_) -> 0 tmp |
534 | * ( RTmp(_) -> 1 slot ) |
535 | * RCon(_) -> 2 con |
536 | * 0 * 2 -> 3 s * i (when constant is 1,2,4,8) |
537 | */ |
538 | static char add[10][10] = { |
539 | [2] [4] = 4, [4] [2] = 4, |
540 | [2] [6] = 6, [6] [2] = 6, |
541 | [2] [7] = 7, [7] [2] = 7, |
542 | [0] [2] = 4, [2] [0] = 4, /* 4: o + b */ |
543 | [0] [0] = 5, /* 5: b + s * i */ |
544 | [0] [3] = 5, [3] [0] = 5, |
545 | [2] [3] = 6, [3] [2] = 6, /* 6: o + s * i */ |
546 | [2] [5] = 7, [5] [2] = 7, /* 7: o + b + s * i */ |
547 | [0] [6] = 7, [6] [0] = 7, |
548 | [4] [3] = 7, [3] [4] = 7, |
549 | }; |
550 | int a, a1, a2, n1, n2, t1, t2; |
551 | Ins *i; |
552 | |
553 | for (i=b->ins; i<&b->ins[b->nins]; i++) { |
554 | if (rtype(i->to) == RTmp) |
555 | ai[i->to.val].i = i; |
556 | if (i->op != Oadd && i->op != Omul) |
557 | continue; |
558 | a1 = aref(i->arg[0], ai); |
559 | a2 = aref(i->arg[1], ai); |
560 | t1 = a1 != 1 && a1 != 2; |
561 | t2 = a2 != 1 && a2 != 2; |
562 | if (i->op == Oadd) { |
563 | a = add[n1 = a1][n2 = a2]; |
564 | if (t1 && a < add[0][a2]) |
565 | a = add[n1 = 0][n2 = a2]; |
566 | if (t2 && a < add[a1][0]) |
567 | a = add[n1 = a1][n2 = 0]; |
568 | if (t1 && t2 && a < add[0][0]) |
569 | a = add[n1 = 0][n2 = 0]; |
570 | } else { |
571 | n1 = n2 = a = 0; |
572 | if (ascale(i->arg[0], con) && t2) |
573 | a = 3, n1 = 2, n2 = 0; |
574 | if (t1 && ascale(i->arg[1], con)) |
575 | a = 3, n1 = 0, n2 = 2; |
576 | } |
577 | ai[i->to.val].n = a; |
578 | ai[i->to.val].l = n1; |
579 | ai[i->to.val].r = n2; |
580 | } |
581 | } |
582 | |
583 | static int |
584 | amatch(Addr *a, Ref r, int n, ANum *ai, Fn *fn) |
585 | { |
586 | Ins *i; |
587 | int nl, nr, t, s; |
588 | Ref al, ar; |
589 | |
590 | if (rtype(r) == RCon) { |
591 | if (!addcon(&a->offset, &fn->con[r.val])) |
592 | err("unlikely sum of $%s and $%s" , |
593 | str(a->offset.label), str(fn->con[r.val].label)); |
594 | return 1; |
595 | } |
596 | assert(rtype(r) == RTmp); |
597 | i = ai[r.val].i; |
598 | nl = ai[r.val].l; |
599 | nr = ai[r.val].r; |
600 | if (i) { |
601 | if (nl > nr) { |
602 | al = i->arg[1]; |
603 | ar = i->arg[0]; |
604 | t = nl, nl = nr, nr = t; |
605 | } else { |
606 | al = i->arg[0]; |
607 | ar = i->arg[1]; |
608 | } |
609 | } |
610 | switch (n) { |
611 | case 3: /* s * i */ |
612 | a->index = al; |
613 | a->scale = fn->con[ar.val].bits.i; |
614 | return 0; |
615 | case 5: /* b + s * i */ |
616 | switch (nr) { |
617 | case 0: |
618 | if (fn->tmp[ar.val].slot != -1) { |
619 | al = i->arg[1]; |
620 | ar = i->arg[0]; |
621 | } |
622 | a->index = ar; |
623 | a->scale = 1; |
624 | break; |
625 | case 3: |
626 | amatch(a, ar, nr, ai, fn); |
627 | break; |
628 | } |
629 | r = al; |
630 | /* fall through */ |
631 | case 0: |
632 | s = fn->tmp[r.val].slot; |
633 | if (s != -1) |
634 | r = SLOT(s); |
635 | a->base = r; |
636 | return n || s != -1; |
637 | case 2: /* constants */ |
638 | case 4: /* o + b */ |
639 | case 6: /* o + s * i */ |
640 | case 7: /* o + b + s * i */ |
641 | amatch(a, ar, nr, ai, fn); |
642 | amatch(a, al, nl, ai, fn); |
643 | return 1; |
644 | default: |
645 | die("unreachable" ); |
646 | } |
647 | } |
648 | |
649 | /* instruction selection |
650 | * requires use counts (as given by parsing) |
651 | */ |
652 | void |
653 | amd64_isel(Fn *fn) |
654 | { |
655 | Blk *b, **sb; |
656 | Ins *i; |
657 | Phi *p; |
658 | uint a; |
659 | int n, al; |
660 | int64_t sz; |
661 | ANum *ainfo; |
662 | |
663 | /* assign slots to fast allocs */ |
664 | b = fn->start; |
665 | /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */ |
666 | for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2) |
667 | for (i=b->ins; i<&b->ins[b->nins]; i++) |
668 | if (i->op == al) { |
669 | if (rtype(i->arg[0]) != RCon) |
670 | break; |
671 | sz = fn->con[i->arg[0].val].bits.i; |
672 | if (sz < 0 || sz >= INT_MAX-15) |
673 | err("invalid alloc size %" PRId64, sz); |
674 | sz = (sz + n-1) & -n; |
675 | sz /= 4; |
676 | if (sz > INT_MAX - fn->slot) |
677 | die("alloc too large" ); |
678 | fn->tmp[i->to.val].slot = fn->slot; |
679 | fn->slot += sz; |
680 | *i = (Ins){.op = Onop}; |
681 | } |
682 | |
683 | /* process basic blocks */ |
684 | n = fn->ntmp; |
685 | ainfo = emalloc(n * sizeof ainfo[0]); |
686 | for (b=fn->start; b; b=b->link) { |
687 | curi = &insb[NIns]; |
688 | for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) |
689 | for (p=(*sb)->phi; p; p=p->link) { |
690 | for (a=0; p->blk[a] != b; a++) |
691 | assert(a+1 < p->narg); |
692 | fixarg(&p->arg[a], p->cls, 0, fn); |
693 | } |
694 | memset(ainfo, 0, n * sizeof ainfo[0]); |
695 | anumber(ainfo, b, fn->con); |
696 | seljmp(b, fn); |
697 | for (i=&b->ins[b->nins]; i!=b->ins;) |
698 | sel(*--i, ainfo, fn); |
699 | b->nins = &insb[NIns] - curi; |
700 | idup(&b->ins, curi, b->nins); |
701 | } |
702 | free(ainfo); |
703 | |
704 | if (debug['I']) { |
705 | fprintf(stderr, "\n> After instruction selection:\n" ); |
706 | printfn(fn, stderr); |
707 | } |
708 | } |
709 | |