1 | /* |
2 | ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. |
3 | ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h |
4 | */ |
5 | |
6 | #define lj_opt_split_c |
7 | #define LUA_CORE |
8 | |
9 | #include "lj_obj.h" |
10 | |
11 | #if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) |
12 | |
13 | #include "lj_err.h" |
14 | #include "lj_buf.h" |
15 | #include "lj_ir.h" |
16 | #include "lj_jit.h" |
17 | #include "lj_ircall.h" |
18 | #include "lj_iropt.h" |
19 | #include "lj_dispatch.h" |
20 | #include "lj_vm.h" |
21 | |
22 | /* SPLIT pass: |
23 | ** |
24 | ** This pass splits up 64 bit IR instructions into multiple 32 bit IR |
25 | ** instructions. It's only active for soft-float targets or for 32 bit CPUs |
26 | ** which lack native 64 bit integer operations (the FFI is currently the |
27 | ** only emitter for 64 bit integer instructions). |
28 | ** |
29 | ** Splitting the IR in a separate pass keeps each 32 bit IR assembler |
30 | ** backend simple. Only a small amount of extra functionality needs to be |
31 | ** implemented. This is much easier than adding support for allocating |
32 | ** register pairs to each backend (believe me, I tried). A few simple, but |
33 | ** important optimizations can be performed by the SPLIT pass, which would |
34 | ** be tedious to do in the backend. |
35 | ** |
36 | ** The basic idea is to replace each 64 bit IR instruction with its 32 bit |
37 | ** equivalent plus an extra HIOP instruction. The splitted IR is not passed |
38 | ** through FOLD or any other optimizations, so each HIOP is guaranteed to |
39 | ** immediately follow it's counterpart. The actual functionality of HIOP is |
40 | ** inferred from the previous instruction. |
41 | ** |
42 | ** The operands of HIOP hold the hiword input references. The output of HIOP |
43 | ** is the hiword output reference, which is also used to hold the hiword |
44 | ** register or spill slot information. The register allocator treats this |
45 | ** instruction independently of any other instruction, which improves code |
46 | ** quality compared to using fixed register pairs. |
47 | ** |
48 | ** It's easier to split up some instructions into two regular 32 bit |
49 | ** instructions. E.g. XLOAD is split up into two XLOADs with two different |
50 | ** addresses. Obviously 64 bit constants need to be split up into two 32 bit |
51 | ** constants, too. Some hiword instructions can be entirely omitted, e.g. |
52 | ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls |
53 | ** are split up into two 32 bit arguments each. |
54 | ** |
55 | ** On soft-float targets, floating-point instructions are directly converted |
56 | ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX). |
57 | ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump). |
58 | ** |
59 | ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with |
60 | ** two int64_t fields: |
61 | ** |
62 | ** 0100 p32 ADD base +8 |
63 | ** 0101 i64 XLOAD 0100 |
64 | ** 0102 i64 ADD 0101 +1 |
65 | ** 0103 p32 ADD base +16 |
66 | ** 0104 i64 XSTORE 0103 0102 |
67 | ** |
68 | ** mov rax, [esi+0x8] |
69 | ** add rax, +0x01 |
70 | ** mov [esi+0x10], rax |
71 | ** |
72 | ** Here's the transformed IR and the x86 machine code after the SPLIT pass: |
73 | ** |
74 | ** 0100 p32 ADD base +8 |
75 | ** 0101 int XLOAD 0100 |
76 | ** 0102 p32 ADD base +12 |
77 | ** 0103 int XLOAD 0102 |
78 | ** 0104 int ADD 0101 +1 |
79 | ** 0105 int HIOP 0103 +0 |
80 | ** 0106 p32 ADD base +16 |
81 | ** 0107 int XSTORE 0106 0104 |
82 | ** 0108 int HIOP 0106 0105 |
83 | ** |
84 | ** mov eax, [esi+0x8] |
85 | ** mov ecx, [esi+0xc] |
86 | ** add eax, +0x01 |
87 | ** adc ecx, +0x00 |
88 | ** mov [esi+0x10], eax |
89 | ** mov [esi+0x14], ecx |
90 | ** |
91 | ** You may notice the reassociated hiword address computation, which is |
92 | ** later fused into the mov operands by the assembler. |
93 | */ |
94 | |
95 | /* Some local macros to save typing. Undef'd at the end. */ |
96 | #define IR(ref) (&J->cur.ir[(ref)]) |
97 | |
98 | /* Directly emit the transformed IR without updating chains etc. */ |
99 | static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2) |
100 | { |
101 | IRRef nref = lj_ir_nextins(J); |
102 | IRIns *ir = IR(nref); |
103 | ir->ot = ot; |
104 | ir->op1 = op1; |
105 | ir->op2 = op2; |
106 | return nref; |
107 | } |
108 | |
109 | #if LJ_SOFTFP |
110 | /* Emit a (checked) number to integer conversion. */ |
111 | static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check) |
112 | { |
113 | IRRef tmp, res; |
114 | #if LJ_LE |
115 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi); |
116 | #else |
117 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo); |
118 | #endif |
119 | res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i); |
120 | if (check) { |
121 | tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d); |
122 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
123 | split_emit(J, IRTGI(IR_EQ), tmp, lo); |
124 | split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi); |
125 | } |
126 | return res; |
127 | } |
128 | |
129 | /* Emit a CALLN with one split 64 bit argument. */ |
130 | static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
131 | IRIns *ir, IRCallID id) |
132 | { |
133 | IRRef tmp, op1 = ir->op1; |
134 | J->cur.nins--; |
135 | #if LJ_LE |
136 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
137 | #else |
138 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
139 | #endif |
140 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
141 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
142 | } |
143 | #endif |
144 | |
145 | /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ |
146 | static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
147 | IRIns *ir, IRCallID id) |
148 | { |
149 | IRRef tmp, op1 = ir->op1, op2 = ir->op2; |
150 | J->cur.nins--; |
151 | #if LJ_LE |
152 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
153 | #else |
154 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
155 | #endif |
156 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); |
157 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
158 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
159 | } |
160 | |
161 | /* Emit a CALLN with two split 64 bit arguments. */ |
162 | static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
163 | IRIns *ir, IRCallID id) |
164 | { |
165 | IRRef tmp, op1 = ir->op1, op2 = ir->op2; |
166 | J->cur.nins--; |
167 | #if LJ_LE |
168 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
169 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); |
170 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); |
171 | #else |
172 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
173 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); |
174 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); |
175 | #endif |
176 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
177 | return split_emit(J, |
178 | IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), |
179 | tmp, tmp); |
180 | } |
181 | |
182 | /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ |
183 | static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) |
184 | { |
185 | IRRef nref = oir[ref].prev; |
186 | IRIns *ir = IR(nref); |
187 | int32_t ofs = 4; |
188 | if (ir->o == IR_KPTR) |
189 | return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs); |
190 | if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) { |
191 | /* Reassociate address. */ |
192 | ofs += IR(ir->op2)->i; |
193 | nref = ir->op1; |
194 | if (ofs == 0) return nref; |
195 | } |
196 | return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs)); |
197 | } |
198 | |
199 | #if LJ_HASFFI |
200 | static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, |
201 | IRIns *oir, IRIns *nir, IRIns *ir) |
202 | { |
203 | IROp op = ir->o; |
204 | IRRef kref = nir->op2; |
205 | if (irref_isk(kref)) { /* Optimize constant shifts. */ |
206 | int32_t k = (IR(kref)->i & 63); |
207 | IRRef lo = nir->op1, hi = hisubst[ir->op1]; |
208 | if (op == IR_BROL || op == IR_BROR) { |
209 | if (op == IR_BROR) k = (-k & 63); |
210 | if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; } |
211 | if (k == 0) { |
212 | passthrough: |
213 | J->cur.nins--; |
214 | ir->prev = lo; |
215 | return hi; |
216 | } else { |
217 | TRef k1, k2; |
218 | IRRef t1, t2, t3, t4; |
219 | J->cur.nins--; |
220 | k1 = lj_ir_kint(J, k); |
221 | k2 = lj_ir_kint(J, (-k & 31)); |
222 | t1 = split_emit(J, IRTI(IR_BSHL), lo, k1); |
223 | t2 = split_emit(J, IRTI(IR_BSHL), hi, k1); |
224 | t3 = split_emit(J, IRTI(IR_BSHR), lo, k2); |
225 | t4 = split_emit(J, IRTI(IR_BSHR), hi, k2); |
226 | ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4); |
227 | return split_emit(J, IRTI(IR_BOR), t2, t3); |
228 | } |
229 | } else if (k == 0) { |
230 | goto passthrough; |
231 | } else if (k < 32) { |
232 | if (op == IR_BSHL) { |
233 | IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref); |
234 | IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31))); |
235 | return split_emit(J, IRTI(IR_BOR), t1, t2); |
236 | } else { |
237 | IRRef t1 = ir->prev, t2; |
238 | lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage" ); |
239 | nir->o = IR_BSHR; |
240 | t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); |
241 | ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); |
242 | return split_emit(J, IRTI(op), hi, kref); |
243 | } |
244 | } else { |
245 | if (op == IR_BSHL) { |
246 | if (k == 32) |
247 | J->cur.nins--; |
248 | else |
249 | lo = ir->prev; |
250 | ir->prev = lj_ir_kint(J, 0); |
251 | return lo; |
252 | } else { |
253 | lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage" ); |
254 | if (k == 32) { |
255 | J->cur.nins--; |
256 | ir->prev = hi; |
257 | } else { |
258 | nir->op1 = hi; |
259 | } |
260 | if (op == IR_BSHR) |
261 | return lj_ir_kint(J, 0); |
262 | else |
263 | return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31)); |
264 | } |
265 | } |
266 | } |
267 | return split_call_li(J, hisubst, oir, ir, |
268 | op - IR_BSHL + IRCALL_lj_carith_shl64); |
269 | } |
270 | |
271 | static IRRef split_bitop(jit_State *J, IRRef1 *hisubst, |
272 | IRIns *nir, IRIns *ir) |
273 | { |
274 | IROp op = ir->o; |
275 | IRRef hi, kref = nir->op2; |
276 | if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */ |
277 | int32_t k = IR(kref)->i; |
278 | if (k == 0 || k == -1) { |
279 | if (op == IR_BAND) k = ~k; |
280 | if (k == 0) { |
281 | J->cur.nins--; |
282 | ir->prev = nir->op1; |
283 | } else if (op == IR_BXOR) { |
284 | nir->o = IR_BNOT; |
285 | nir->op2 = 0; |
286 | } else { |
287 | J->cur.nins--; |
288 | ir->prev = kref; |
289 | } |
290 | } |
291 | } |
292 | hi = hisubst[ir->op1]; |
293 | kref = hisubst[ir->op2]; |
294 | if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */ |
295 | int32_t k = IR(kref)->i; |
296 | if (k == 0 || k == -1) { |
297 | if (op == IR_BAND) k = ~k; |
298 | if (k == 0) { |
299 | return hi; |
300 | } else if (op == IR_BXOR) { |
301 | return split_emit(J, IRTI(IR_BNOT), hi, 0); |
302 | } else { |
303 | return kref; |
304 | } |
305 | } |
306 | } |
307 | return split_emit(J, IRTI(op), hi, kref); |
308 | } |
309 | #endif |
310 | |
311 | /* Substitute references of a snapshot. */ |
312 | static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) |
313 | { |
314 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; |
315 | MSize n, nent = snap->nent; |
316 | for (n = 0; n < nent; n++) { |
317 | SnapEntry sn = map[n]; |
318 | IRIns *ir = &oir[snap_ref(sn)]; |
319 | if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn)))) |
320 | map[n] = ((sn & 0xffff0000) | ir->prev); |
321 | } |
322 | } |
323 | |
324 | /* Transform the old IR to the new IR. */ |
325 | static void split_ir(jit_State *J) |
326 | { |
327 | IRRef nins = J->cur.nins, nk = J->cur.nk; |
328 | MSize irlen = nins - nk; |
329 | MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); |
330 | IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need); |
331 | IRRef1 *hisubst; |
332 | IRRef ref, snref; |
333 | SnapShot *snap; |
334 | |
335 | /* Copy old IR to buffer. */ |
336 | memcpy(oir, IR(nk), irlen*sizeof(IRIns)); |
337 | /* Bias hiword substitution table and old IR. Loword kept in field prev. */ |
338 | hisubst = (IRRef1 *)&oir[irlen] - nk; |
339 | oir -= nk; |
340 | |
341 | /* Remove all IR instructions, but retain IR constants. */ |
342 | J->cur.nins = REF_FIRST; |
343 | J->loopref = 0; |
344 | |
345 | /* Process constants and fixed references. */ |
346 | for (ref = nk; ref <= REF_BASE; ref++) { |
347 | IRIns *ir = &oir[ref]; |
348 | if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { |
349 | /* Split up 64 bit constant. */ |
350 | TValue tv = *ir_k64(ir); |
351 | ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); |
352 | hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); |
353 | } else { |
354 | ir->prev = ref; /* Identity substitution for loword. */ |
355 | hisubst[ref] = 0; |
356 | } |
357 | if (irt_is64(ir->t) && ir->o != IR_KNULL) |
358 | ref++; |
359 | } |
360 | |
361 | /* Process old IR instructions. */ |
362 | snap = J->cur.snap; |
363 | snref = snap->ref; |
364 | for (ref = REF_FIRST; ref < nins; ref++) { |
365 | IRIns *ir = &oir[ref]; |
366 | IRRef nref = lj_ir_nextins(J); |
367 | IRIns *nir = IR(nref); |
368 | IRRef hi = 0; |
369 | |
370 | if (ref >= snref) { |
371 | snap->ref = nref; |
372 | split_subst_snap(J, snap++, oir); |
373 | snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; |
374 | } |
375 | |
376 | /* Copy-substitute old instruction to new instruction. */ |
377 | nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; |
378 | nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; |
379 | ir->prev = nref; /* Loword substitution. */ |
380 | nir->o = ir->o; |
381 | nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); |
382 | hisubst[ref] = 0; |
383 | |
384 | /* Split 64 bit instructions. */ |
385 | #if LJ_SOFTFP |
386 | if (irt_isnum(ir->t)) { |
387 | nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ |
388 | /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ |
389 | switch (ir->o) { |
390 | case IR_ADD: |
391 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); |
392 | break; |
393 | case IR_SUB: |
394 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); |
395 | break; |
396 | case IR_MUL: |
397 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); |
398 | break; |
399 | case IR_DIV: |
400 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); |
401 | break; |
402 | case IR_POW: |
403 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); |
404 | break; |
405 | case IR_FPMATH: |
406 | hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); |
407 | break; |
408 | case IR_LDEXP: |
409 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); |
410 | break; |
411 | case IR_NEG: case IR_ABS: |
412 | nir->o = IR_CONV; /* Pass through loword. */ |
413 | nir->op2 = (IRT_INT << 5) | IRT_INT; |
414 | hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), |
415 | hisubst[ir->op1], |
416 | lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG)))); |
417 | break; |
418 | case IR_SLOAD: |
419 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ |
420 | nir->op2 &= ~IRSLOAD_CONVERT; |
421 | ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, |
422 | IRCALL_softfp_i2d); |
423 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
424 | break; |
425 | } |
426 | /* fallthrough */ |
427 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: |
428 | case IR_STRTO: |
429 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
430 | break; |
431 | case IR_FLOAD: |
432 | lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State" ); |
433 | hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); |
434 | nir->op2 += LJ_BE*4; |
435 | break; |
436 | case IR_XLOAD: { |
437 | IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ |
438 | J->cur.nins--; |
439 | hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ |
440 | #if LJ_BE |
441 | hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2); |
442 | inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD); |
443 | #endif |
444 | nref = lj_ir_nextins(J); |
445 | nir = IR(nref); |
446 | *nir = inslo; /* Re-emit lo XLOAD. */ |
447 | #if LJ_LE |
448 | hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); |
449 | ir->prev = nref; |
450 | #else |
451 | ir->prev = hi; hi = nref; |
452 | #endif |
453 | break; |
454 | } |
455 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: |
456 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); |
457 | break; |
458 | case IR_CONV: { /* Conversion to number. Others handled below. */ |
459 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
460 | UNUSED(st); |
461 | #if LJ_32 && LJ_HASFFI |
462 | if (st == IRT_I64 || st == IRT_U64) { |
463 | hi = split_call_l(J, hisubst, oir, ir, |
464 | st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); |
465 | break; |
466 | } |
467 | #endif |
468 | lj_assertJ(st == IRT_INT || |
469 | (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)), |
470 | "bad source type for CONV" ); |
471 | nir->o = IR_CALLN; |
472 | #if LJ_32 && LJ_HASFFI |
473 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : |
474 | st == IRT_FLOAT ? IRCALL_softfp_f2d : |
475 | IRCALL_softfp_ui2d; |
476 | #else |
477 | nir->op2 = IRCALL_softfp_i2d; |
478 | #endif |
479 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
480 | break; |
481 | } |
482 | case IR_CALLN: |
483 | case IR_CALLL: |
484 | case IR_CALLS: |
485 | case IR_CALLXS: |
486 | goto split_call; |
487 | case IR_PHI: |
488 | if (nir->op1 == nir->op2) |
489 | J->cur.nins--; /* Drop useless PHIs. */ |
490 | if (hisubst[ir->op1] != hisubst[ir->op2]) |
491 | split_emit(J, IRT(IR_PHI, IRT_SOFTFP), |
492 | hisubst[ir->op1], hisubst[ir->op2]); |
493 | break; |
494 | case IR_HIOP: |
495 | J->cur.nins--; /* Drop joining HIOP. */ |
496 | ir->prev = nir->op1; |
497 | hi = nir->op2; |
498 | break; |
499 | default: |
500 | lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX, |
501 | "bad IR op %d" , ir->o); |
502 | hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), |
503 | hisubst[ir->op1], hisubst[ir->op2]); |
504 | break; |
505 | } |
506 | } else |
507 | #endif |
508 | #if LJ_32 && LJ_HASFFI |
509 | if (irt_isint64(ir->t)) { |
510 | IRRef hiref = hisubst[ir->op1]; |
511 | nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ |
512 | switch (ir->o) { |
513 | case IR_ADD: |
514 | case IR_SUB: |
515 | /* Use plain op for hiword if loword cannot produce a carry/borrow. */ |
516 | if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { |
517 | ir->prev = nir->op1; /* Pass through loword. */ |
518 | nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; |
519 | hi = nref; |
520 | break; |
521 | } |
522 | /* fallthrough */ |
523 | case IR_NEG: |
524 | hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); |
525 | break; |
526 | case IR_MUL: |
527 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); |
528 | break; |
529 | case IR_DIV: |
530 | hi = split_call_ll(J, hisubst, oir, ir, |
531 | irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : |
532 | IRCALL_lj_carith_divu64); |
533 | break; |
534 | case IR_MOD: |
535 | hi = split_call_ll(J, hisubst, oir, ir, |
536 | irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : |
537 | IRCALL_lj_carith_modu64); |
538 | break; |
539 | case IR_POW: |
540 | hi = split_call_ll(J, hisubst, oir, ir, |
541 | irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : |
542 | IRCALL_lj_carith_powu64); |
543 | break; |
544 | case IR_BNOT: |
545 | hi = split_emit(J, IRTI(IR_BNOT), hiref, 0); |
546 | break; |
547 | case IR_BSWAP: |
548 | ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0); |
549 | hi = nref; |
550 | break; |
551 | case IR_BAND: case IR_BOR: case IR_BXOR: |
552 | hi = split_bitop(J, hisubst, nir, ir); |
553 | break; |
554 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: |
555 | hi = split_bitshift(J, hisubst, oir, nir, ir); |
556 | break; |
557 | case IR_FLOAD: |
558 | lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported" ); |
559 | hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); |
560 | #if LJ_BE |
561 | ir->prev = hi; hi = nref; |
562 | #endif |
563 | break; |
564 | case IR_XLOAD: |
565 | hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); |
566 | #if LJ_BE |
567 | ir->prev = hi; hi = nref; |
568 | #endif |
569 | break; |
570 | case IR_XSTORE: |
571 | split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); |
572 | break; |
573 | case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ |
574 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
575 | #if LJ_SOFTFP |
576 | if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ |
577 | hi = split_call_l(J, hisubst, oir, ir, |
578 | irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); |
579 | } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ |
580 | nir->o = IR_CALLN; |
581 | nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; |
582 | hi = split_emit(J, IRTI(IR_HIOP), nref, nref); |
583 | } |
584 | #else |
585 | if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ |
586 | hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); |
587 | } |
588 | #endif |
589 | else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ |
590 | /* Drop cast, since assembler doesn't care. But fwd both parts. */ |
591 | hi = hiref; |
592 | goto fwdlo; |
593 | } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ |
594 | IRRef k31 = lj_ir_kint(J, 31); |
595 | nir = IR(nref); /* May have been reallocated. */ |
596 | ir->prev = nir->op1; /* Pass through loword. */ |
597 | nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ |
598 | nir->op2 = k31; |
599 | hi = nref; |
600 | } else { /* Zero-extend to 64 bit. */ |
601 | hi = lj_ir_kint(J, 0); |
602 | goto fwdlo; |
603 | } |
604 | break; |
605 | } |
606 | case IR_CALLXS: |
607 | goto split_call; |
608 | case IR_PHI: { |
609 | IRRef hiref2; |
610 | if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || |
611 | nir->op1 == nir->op2) |
612 | J->cur.nins--; /* Drop useless PHIs. */ |
613 | hiref2 = hisubst[ir->op2]; |
614 | if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) |
615 | split_emit(J, IRTI(IR_PHI), hiref, hiref2); |
616 | break; |
617 | } |
618 | case IR_HIOP: |
619 | J->cur.nins--; /* Drop joining HIOP. */ |
620 | ir->prev = nir->op1; |
621 | hi = nir->op2; |
622 | break; |
623 | default: |
624 | lj_assertJ(ir->o <= IR_NE, "bad IR op %d" , ir->o); /* Comparisons. */ |
625 | split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); |
626 | break; |
627 | } |
628 | } else |
629 | #endif |
630 | #if LJ_SOFTFP |
631 | if (ir->o == IR_SLOAD) { |
632 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ |
633 | nir->op2 &= ~IRSLOAD_CONVERT; |
634 | if (!(nir->op2 & IRSLOAD_TYPECHECK)) |
635 | nir->t.irt = IRT_INT; /* Drop guard. */ |
636 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
637 | ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); |
638 | } |
639 | } else if (ir->o == IR_TOBIT) { |
640 | IRRef tmp, op1 = ir->op1; |
641 | J->cur.nins--; |
642 | #if LJ_LE |
643 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
644 | #else |
645 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
646 | #endif |
647 | ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); |
648 | } else if (ir->o == IR_TOSTR) { |
649 | if (hisubst[ir->op1]) { |
650 | if (irref_isk(ir->op1)) |
651 | nir->op1 = ir->op1; |
652 | else |
653 | split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); |
654 | } |
655 | } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { |
656 | if (irref_isk(ir->op2) && hisubst[ir->op2]) |
657 | nir->op2 = ir->op2; |
658 | } else |
659 | #endif |
660 | if (ir->o == IR_CONV) { /* See above, too. */ |
661 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
662 | #if LJ_32 && LJ_HASFFI |
663 | if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ |
664 | #if LJ_SOFTFP |
665 | if (irt_isfloat(ir->t)) { |
666 | split_call_l(J, hisubst, oir, ir, |
667 | st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); |
668 | J->cur.nins--; /* Drop unused HIOP. */ |
669 | } |
670 | #else |
671 | if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ |
672 | ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), |
673 | hisubst[ir->op1], nref); |
674 | } |
675 | #endif |
676 | else { /* Truncate to lower 32 bits. */ |
677 | fwdlo: |
678 | ir->prev = nir->op1; /* Forward loword. */ |
679 | /* Replace with NOP to avoid messing up the snapshot logic. */ |
680 | nir->ot = IRT(IR_NOP, IRT_NIL); |
681 | nir->op1 = nir->op2 = 0; |
682 | } |
683 | } |
684 | #endif |
685 | #if LJ_SOFTFP && LJ_32 && LJ_HASFFI |
686 | else if (irt_isfloat(ir->t)) { |
687 | if (st == IRT_NUM) { |
688 | split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); |
689 | J->cur.nins--; /* Drop unused HIOP. */ |
690 | } else { |
691 | nir->o = IR_CALLN; |
692 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; |
693 | } |
694 | } else if (st == IRT_FLOAT) { |
695 | nir->o = IR_CALLN; |
696 | nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; |
697 | } else |
698 | #endif |
699 | #if LJ_SOFTFP |
700 | if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { |
701 | if (irt_isguard(ir->t)) { |
702 | lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types" ); |
703 | J->cur.nins--; |
704 | ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); |
705 | } else { |
706 | split_call_l(J, hisubst, oir, ir, |
707 | #if LJ_32 && LJ_HASFFI |
708 | st == IRT_NUM ? |
709 | (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : |
710 | (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) |
711 | #else |
712 | IRCALL_softfp_d2i |
713 | #endif |
714 | ); |
715 | J->cur.nins--; /* Drop unused HIOP. */ |
716 | } |
717 | } |
718 | #endif |
719 | } else if (ir->o == IR_CALLXS) { |
720 | IRRef hiref; |
721 | split_call: |
722 | hiref = hisubst[ir->op1]; |
723 | if (hiref) { |
724 | IROpT ot = nir->ot; |
725 | IRRef op2 = nir->op2; |
726 | nir->ot = IRT(IR_CARG, IRT_NIL); |
727 | #if LJ_LE |
728 | nir->op2 = hiref; |
729 | #else |
730 | nir->op2 = nir->op1; nir->op1 = hiref; |
731 | #endif |
732 | ir->prev = nref = split_emit(J, ot, nref, op2); |
733 | } |
734 | if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) |
735 | hi = split_emit(J, |
736 | IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), |
737 | nref, nref); |
738 | } else if (ir->o == IR_CARG) { |
739 | IRRef hiref = hisubst[ir->op1]; |
740 | if (hiref) { |
741 | IRRef op2 = nir->op2; |
742 | #if LJ_LE |
743 | nir->op2 = hiref; |
744 | #else |
745 | nir->op2 = nir->op1; nir->op1 = hiref; |
746 | #endif |
747 | ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); |
748 | nir = IR(nref); |
749 | } |
750 | hiref = hisubst[ir->op2]; |
751 | if (hiref) { |
752 | #if !LJ_TARGET_X86 |
753 | int carg = 0; |
754 | IRIns *cir; |
755 | for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) |
756 | carg++; |
757 | if ((carg & 1) == 0) { /* Align 64 bit arguments. */ |
758 | IRRef op2 = nir->op2; |
759 | nir->op2 = REF_NIL; |
760 | nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); |
761 | nir = IR(nref); |
762 | } |
763 | #endif |
764 | #if LJ_BE |
765 | { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } |
766 | #endif |
767 | ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); |
768 | } |
769 | } else if (ir->o == IR_CNEWI) { |
770 | if (hisubst[ir->op2]) |
771 | split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); |
772 | } else if (ir->o == IR_LOOP) { |
773 | J->loopref = nref; /* Needed by assembler. */ |
774 | } |
775 | hisubst[ref] = hi; /* Store hiword substitution. */ |
776 | } |
777 | if (snref == nins) { /* Substitution for last snapshot. */ |
778 | snap->ref = J->cur.nins; |
779 | split_subst_snap(J, snap, oir); |
780 | } |
781 | |
782 | /* Add PHI marks. */ |
783 | for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { |
784 | IRIns *ir = IR(ref); |
785 | if (ir->o != IR_PHI) break; |
786 | if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); |
787 | if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); |
788 | } |
789 | } |
790 | |
791 | /* Protected callback for split pass. */ |
792 | static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud) |
793 | { |
794 | jit_State *J = (jit_State *)ud; |
795 | split_ir(J); |
796 | UNUSED(L); UNUSED(dummy); |
797 | return NULL; |
798 | } |
799 | |
800 | #if defined(LUA_USE_ASSERT) || LJ_SOFTFP |
801 | /* Slow, but sure way to check whether a SPLIT pass is needed. */ |
802 | static int split_needsplit(jit_State *J) |
803 | { |
804 | IRIns *ir, *irend; |
805 | IRRef ref; |
806 | for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) |
807 | if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t)) |
808 | return 1; |
809 | if (LJ_SOFTFP) { |
810 | for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev) |
811 | if ((IR(ref)->op2 & IRSLOAD_CONVERT)) |
812 | return 1; |
813 | if (J->chain[IR_TOBIT]) |
814 | return 1; |
815 | } |
816 | for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) { |
817 | IRType st = (IR(ref)->op2 & IRCONV_SRCMASK); |
818 | if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) || |
819 | st == IRT_I64 || st == IRT_U64) |
820 | return 1; |
821 | } |
822 | return 0; /* Nope. */ |
823 | } |
824 | #endif |
825 | |
826 | /* SPLIT pass. */ |
827 | void lj_opt_split(jit_State *J) |
828 | { |
829 | #if LJ_SOFTFP |
830 | if (!J->needsplit) |
831 | J->needsplit = split_needsplit(J); |
832 | #else |
833 | lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state" ); |
834 | #endif |
835 | if (J->needsplit) { |
836 | int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); |
837 | if (errcode) { |
838 | /* Completely reset the trace to avoid inconsistent dump on abort. */ |
839 | J->cur.nins = J->cur.nk = REF_BASE; |
840 | J->cur.nsnap = 0; |
841 | lj_err_throw(J->L, errcode); /* Propagate errors. */ |
842 | } |
843 | } |
844 | } |
845 | |
846 | #undef IR |
847 | |
848 | #endif |
849 | |