1 | #include "all.h" |
2 | |
3 | static int |
4 | memarg(Ref *r, int op, Ins *i) |
5 | { |
6 | if (isload(op) || op == Ocall) |
7 | return r == &i->arg[0]; |
8 | if (isstore(op)) |
9 | return r == &i->arg[1]; |
10 | return 0; |
11 | } |
12 | |
13 | static int |
14 | immarg(Ref *r, int op, Ins *i) |
15 | { |
16 | return rv64_op[op].imm && r == &i->arg[1]; |
17 | } |
18 | |
19 | static void |
20 | fixarg(Ref *r, int k, Ins *i, Fn *fn) |
21 | { |
22 | char buf[32]; |
23 | Ref r0, r1; |
24 | int s, n, op; |
25 | Con *c; |
26 | |
27 | r0 = r1 = *r; |
28 | op = i ? i->op : Ocopy; |
29 | switch (rtype(r0)) { |
30 | case RCon: |
31 | c = &fn->con[r0.val]; |
32 | if (c->type == CAddr && memarg(r, op, i)) |
33 | break; |
34 | if (c->type == CBits && immarg(r, op, i)) |
35 | if (-2048 <= c->bits.i && c->bits.i < 2048) |
36 | break; |
37 | r1 = newtmp("isel" , k, fn); |
38 | if (KBASE(k) == 1) { |
39 | /* load floating points from memory |
40 | * slots, they can't be used as |
41 | * immediates |
42 | */ |
43 | assert(c->type == CBits); |
44 | n = gasstash(&c->bits, KWIDE(k) ? 8 : 4); |
45 | vgrow(&fn->con, ++fn->ncon); |
46 | c = &fn->con[fn->ncon-1]; |
47 | sprintf(buf, "fp%d" , n); |
48 | *c = (Con){.type = CAddr, .local = 1}; |
49 | c->label = intern(buf); |
50 | emit(Oload, k, r1, CON(c-fn->con), R); |
51 | break; |
52 | } |
53 | emit(Ocopy, k, r1, r0, R); |
54 | break; |
55 | case RTmp: |
56 | if (isreg(r0)) |
57 | break; |
58 | s = fn->tmp[r0.val].slot; |
59 | if (s != -1) { |
60 | /* aggregate passed by value on |
61 | * stack, or fast local address, |
62 | * replace with slot if we can |
63 | */ |
64 | if (memarg(r, op, i)) { |
65 | r1 = SLOT(s); |
66 | break; |
67 | } |
68 | r1 = newtmp("isel" , k, fn); |
69 | emit(Oaddr, k, r1, SLOT(s), R); |
70 | break; |
71 | } |
72 | if (k == Kw && fn->tmp[r0.val].cls == Kl) { |
73 | /* TODO: this sign extension isn't needed |
74 | * for 32-bit arithmetic instructions |
75 | */ |
76 | r1 = newtmp("isel" , k, fn); |
77 | emit(Oextsw, Kl, r1, r0, R); |
78 | } else { |
79 | assert(k == fn->tmp[r0.val].cls); |
80 | } |
81 | break; |
82 | } |
83 | *r = r1; |
84 | } |
85 | |
86 | static void |
87 | negate(Ref *pr, Fn *fn) |
88 | { |
89 | Ref r; |
90 | |
91 | r = newtmp("isel" , Kw, fn); |
92 | emit(Oxor, Kw, *pr, r, getcon(1, fn)); |
93 | *pr = r; |
94 | } |
95 | |
96 | static void |
97 | selcmp(Ins i, int k, int op, Fn *fn) |
98 | { |
99 | Ins *icmp; |
100 | Ref r, r0, r1; |
101 | int sign, swap, neg; |
102 | |
103 | switch (op) { |
104 | case Cieq: |
105 | r = newtmp("isel" , k, fn); |
106 | emit(Oreqz, i.cls, i.to, r, R); |
107 | emit(Oxor, k, r, i.arg[0], i.arg[1]); |
108 | icmp = curi; |
109 | fixarg(&icmp->arg[0], k, icmp, fn); |
110 | fixarg(&icmp->arg[1], k, icmp, fn); |
111 | return; |
112 | case Cine: |
113 | r = newtmp("isel" , k, fn); |
114 | emit(Ornez, i.cls, i.to, r, R); |
115 | emit(Oxor, k, r, i.arg[0], i.arg[1]); |
116 | icmp = curi; |
117 | fixarg(&icmp->arg[0], k, icmp, fn); |
118 | fixarg(&icmp->arg[1], k, icmp, fn); |
119 | return; |
120 | case Cisge: sign = 1, swap = 0, neg = 1; break; |
121 | case Cisgt: sign = 1, swap = 1, neg = 0; break; |
122 | case Cisle: sign = 1, swap = 1, neg = 1; break; |
123 | case Cislt: sign = 1, swap = 0, neg = 0; break; |
124 | case Ciuge: sign = 0, swap = 0, neg = 1; break; |
125 | case Ciugt: sign = 0, swap = 1, neg = 0; break; |
126 | case Ciule: sign = 0, swap = 1, neg = 1; break; |
127 | case Ciult: sign = 0, swap = 0, neg = 0; break; |
128 | case NCmpI+Cfeq: |
129 | case NCmpI+Cfge: |
130 | case NCmpI+Cfgt: |
131 | case NCmpI+Cfle: |
132 | case NCmpI+Cflt: |
133 | swap = 0, neg = 0; |
134 | break; |
135 | case NCmpI+Cfuo: |
136 | negate(&i.to, fn); |
137 | /* fall through */ |
138 | case NCmpI+Cfo: |
139 | r0 = newtmp("isel" , i.cls, fn); |
140 | r1 = newtmp("isel" , i.cls, fn); |
141 | emit(Oand, i.cls, i.to, r0, r1); |
142 | op = KWIDE(k) ? Oceqd : Oceqs; |
143 | emit(op, i.cls, r0, i.arg[0], i.arg[0]); |
144 | icmp = curi; |
145 | fixarg(&icmp->arg[0], k, icmp, fn); |
146 | fixarg(&icmp->arg[1], k, icmp, fn); |
147 | emit(op, i.cls, r1, i.arg[1], i.arg[1]); |
148 | icmp = curi; |
149 | fixarg(&icmp->arg[0], k, icmp, fn); |
150 | fixarg(&icmp->arg[1], k, icmp, fn); |
151 | return; |
152 | case NCmpI+Cfne: |
153 | swap = 0, neg = 1; |
154 | i.op = KWIDE(k) ? Oceqd : Oceqs; |
155 | break; |
156 | default: |
157 | assert(0 && "unknown comparison" ); |
158 | } |
159 | if (op < NCmpI) |
160 | i.op = sign ? Ocsltl : Ocultl; |
161 | if (swap) { |
162 | r = i.arg[0]; |
163 | i.arg[0] = i.arg[1]; |
164 | i.arg[1] = r; |
165 | } |
166 | if (neg) |
167 | negate(&i.to, fn); |
168 | emiti(i); |
169 | icmp = curi; |
170 | fixarg(&icmp->arg[0], k, icmp, fn); |
171 | fixarg(&icmp->arg[1], k, icmp, fn); |
172 | } |
173 | |
174 | static void |
175 | sel(Ins i, Fn *fn) |
176 | { |
177 | Ins *i0; |
178 | int ck, cc; |
179 | |
180 | if (INRANGE(i.op, Oalloc, Oalloc1)) { |
181 | i0 = curi - 1; |
182 | salloc(i.to, i.arg[0], fn); |
183 | fixarg(&i0->arg[0], Kl, i0, fn); |
184 | return; |
185 | } |
186 | if (iscmp(i.op, &ck, &cc)) { |
187 | selcmp(i, ck, cc, fn); |
188 | return; |
189 | } |
190 | if (i.op != Onop) { |
191 | emiti(i); |
192 | i0 = curi; /* fixarg() can change curi */ |
193 | fixarg(&i0->arg[0], argcls(&i, 0), i0, fn); |
194 | fixarg(&i0->arg[1], argcls(&i, 1), i0, fn); |
195 | } |
196 | } |
197 | |
198 | static void |
199 | seljmp(Blk *b, Fn *fn) |
200 | { |
201 | /* TODO: replace cmp+jnz with beq/bne/blt[u]/bge[u] */ |
202 | if (b->jmp.type == Jjnz) |
203 | fixarg(&b->jmp.arg, Kw, 0, fn); |
204 | } |
205 | |
206 | void |
207 | rv64_isel(Fn *fn) |
208 | { |
209 | Blk *b, **sb; |
210 | Ins *i; |
211 | Phi *p; |
212 | uint n; |
213 | int al; |
214 | int64_t sz; |
215 | |
216 | /* assign slots to fast allocs */ |
217 | b = fn->start; |
218 | /* specific to NAlign == 3 */ /* or change n=4 and sz /= 4 below */ |
219 | for (al=Oalloc, n=4; al<=Oalloc1; al++, n*=2) |
220 | for (i=b->ins; i<&b->ins[b->nins]; i++) |
221 | if (i->op == al) { |
222 | if (rtype(i->arg[0]) != RCon) |
223 | break; |
224 | sz = fn->con[i->arg[0].val].bits.i; |
225 | if (sz < 0 || sz >= INT_MAX-15) |
226 | err("invalid alloc size %" PRId64, sz); |
227 | sz = (sz + n-1) & -n; |
228 | sz /= 4; |
229 | if (sz > INT_MAX - fn->slot) |
230 | die("alloc too large" ); |
231 | fn->tmp[i->to.val].slot = fn->slot; |
232 | fn->slot += sz; |
233 | *i = (Ins){.op = Onop}; |
234 | } |
235 | |
236 | for (b=fn->start; b; b=b->link) { |
237 | curi = &insb[NIns]; |
238 | for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++) |
239 | for (p=(*sb)->phi; p; p=p->link) { |
240 | for (n=0; p->blk[n] != b; n++) |
241 | assert(n+1 < p->narg); |
242 | fixarg(&p->arg[n], p->cls, 0, fn); |
243 | } |
244 | seljmp(b, fn); |
245 | for (i=&b->ins[b->nins]; i!=b->ins;) |
246 | sel(*--i, fn); |
247 | b->nins = &insb[NIns] - curi; |
248 | idup(&b->ins, curi, b->nins); |
249 | } |
250 | |
251 | if (debug['I']) { |
252 | fprintf(stderr, "\n> After instruction selection:\n" ); |
253 | printfn(fn, stderr); |
254 | } |
255 | } |
256 | |