1 | /* |
2 | ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. |
3 | ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h |
4 | */ |
5 | |
6 | #define lj_opt_split_c |
7 | #define LUA_CORE |
8 | |
9 | #include "lj_obj.h" |
10 | |
11 | #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) |
12 | |
13 | #include "lj_err.h" |
14 | #include "lj_str.h" |
15 | #include "lj_ir.h" |
16 | #include "lj_jit.h" |
17 | #include "lj_ircall.h" |
18 | #include "lj_iropt.h" |
19 | #include "lj_vm.h" |
20 | |
21 | /* SPLIT pass: |
22 | ** |
23 | ** This pass splits up 64 bit IR instructions into multiple 32 bit IR |
24 | ** instructions. It's only active for soft-float targets or for 32 bit CPUs |
25 | ** which lack native 64 bit integer operations (the FFI is currently the |
26 | ** only emitter for 64 bit integer instructions). |
27 | ** |
28 | ** Splitting the IR in a separate pass keeps each 32 bit IR assembler |
29 | ** backend simple. Only a small amount of extra functionality needs to be |
30 | ** implemented. This is much easier than adding support for allocating |
31 | ** register pairs to each backend (believe me, I tried). A few simple, but |
32 | ** important optimizations can be performed by the SPLIT pass, which would |
33 | ** be tedious to do in the backend. |
34 | ** |
35 | ** The basic idea is to replace each 64 bit IR instruction with its 32 bit |
36 | ** equivalent plus an extra HIOP instruction. The splitted IR is not passed |
37 | ** through FOLD or any other optimizations, so each HIOP is guaranteed to |
38 | ** immediately follow it's counterpart. The actual functionality of HIOP is |
39 | ** inferred from the previous instruction. |
40 | ** |
41 | ** The operands of HIOP hold the hiword input references. The output of HIOP |
42 | ** is the hiword output reference, which is also used to hold the hiword |
43 | ** register or spill slot information. The register allocator treats this |
44 | ** instruction independently of any other instruction, which improves code |
45 | ** quality compared to using fixed register pairs. |
46 | ** |
47 | ** It's easier to split up some instructions into two regular 32 bit |
48 | ** instructions. E.g. XLOAD is split up into two XLOADs with two different |
49 | ** addresses. Obviously 64 bit constants need to be split up into two 32 bit |
50 | ** constants, too. Some hiword instructions can be entirely omitted, e.g. |
51 | ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls |
52 | ** are split up into two 32 bit arguments each. |
53 | ** |
54 | ** On soft-float targets, floating-point instructions are directly converted |
55 | ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX). |
56 | ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump). |
57 | ** |
58 | ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with |
59 | ** two int64_t fields: |
60 | ** |
61 | ** 0100 p32 ADD base +8 |
62 | ** 0101 i64 XLOAD 0100 |
63 | ** 0102 i64 ADD 0101 +1 |
64 | ** 0103 p32 ADD base +16 |
65 | ** 0104 i64 XSTORE 0103 0102 |
66 | ** |
67 | ** mov rax, [esi+0x8] |
68 | ** add rax, +0x01 |
69 | ** mov [esi+0x10], rax |
70 | ** |
71 | ** Here's the transformed IR and the x86 machine code after the SPLIT pass: |
72 | ** |
73 | ** 0100 p32 ADD base +8 |
74 | ** 0101 int XLOAD 0100 |
75 | ** 0102 p32 ADD base +12 |
76 | ** 0103 int XLOAD 0102 |
77 | ** 0104 int ADD 0101 +1 |
78 | ** 0105 int HIOP 0103 +0 |
79 | ** 0106 p32 ADD base +16 |
80 | ** 0107 int XSTORE 0106 0104 |
81 | ** 0108 int HIOP 0106 0105 |
82 | ** |
83 | ** mov eax, [esi+0x8] |
84 | ** mov ecx, [esi+0xc] |
85 | ** add eax, +0x01 |
86 | ** adc ecx, +0x00 |
87 | ** mov [esi+0x10], eax |
88 | ** mov [esi+0x14], ecx |
89 | ** |
90 | ** You may notice the reassociated hiword address computation, which is |
91 | ** later fused into the mov operands by the assembler. |
92 | */ |
93 | |
94 | /* Some local macros to save typing. Undef'd at the end. */ |
95 | #define IR(ref) (&J->cur.ir[(ref)]) |
96 | |
97 | /* Directly emit the transformed IR without updating chains etc. */ |
98 | static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2) |
99 | { |
100 | IRRef nref = lj_ir_nextins(J); |
101 | IRIns *ir = IR(nref); |
102 | ir->ot = ot; |
103 | ir->op1 = op1; |
104 | ir->op2 = op2; |
105 | return nref; |
106 | } |
107 | |
108 | #if LJ_SOFTFP |
109 | /* Emit a (checked) number to integer conversion. */ |
110 | static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check) |
111 | { |
112 | IRRef tmp, res; |
113 | #if LJ_LE |
114 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi); |
115 | #else |
116 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo); |
117 | #endif |
118 | res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i); |
119 | if (check) { |
120 | tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d); |
121 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
122 | split_emit(J, IRTGI(IR_EQ), tmp, lo); |
123 | split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi); |
124 | } |
125 | return res; |
126 | } |
127 | |
128 | /* Emit a CALLN with one split 64 bit argument. */ |
129 | static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
130 | IRIns *ir, IRCallID id) |
131 | { |
132 | IRRef tmp, op1 = ir->op1; |
133 | J->cur.nins--; |
134 | #if LJ_LE |
135 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
136 | #else |
137 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
138 | #endif |
139 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
140 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
141 | } |
142 | |
143 | /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ |
144 | static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
145 | IRIns *ir, IRCallID id) |
146 | { |
147 | IRRef tmp, op1 = ir->op1, op2 = ir->op2; |
148 | J->cur.nins--; |
149 | #if LJ_LE |
150 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
151 | #else |
152 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
153 | #endif |
154 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); |
155 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
156 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
157 | } |
158 | #endif |
159 | |
160 | /* Emit a CALLN with two split 64 bit arguments. */ |
161 | static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
162 | IRIns *ir, IRCallID id) |
163 | { |
164 | IRRef tmp, op1 = ir->op1, op2 = ir->op2; |
165 | J->cur.nins--; |
166 | #if LJ_LE |
167 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
168 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); |
169 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); |
170 | #else |
171 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
172 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); |
173 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); |
174 | #endif |
175 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
176 | return split_emit(J, |
177 | IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), |
178 | tmp, tmp); |
179 | } |
180 | |
181 | /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ |
182 | static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) |
183 | { |
184 | IRRef nref = oir[ref].prev; |
185 | IRIns *ir = IR(nref); |
186 | int32_t ofs = 4; |
187 | if (ir->o == IR_KPTR) |
188 | return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs); |
189 | if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) { |
190 | /* Reassociate address. */ |
191 | ofs += IR(ir->op2)->i; |
192 | nref = ir->op1; |
193 | if (ofs == 0) return nref; |
194 | } |
195 | return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); |
196 | } |
197 | |
198 | /* Substitute references of a snapshot. */ |
199 | static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) |
200 | { |
201 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; |
202 | MSize n, nent = snap->nent; |
203 | for (n = 0; n < nent; n++) { |
204 | SnapEntry sn = map[n]; |
205 | IRIns *ir = &oir[snap_ref(sn)]; |
206 | if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn)))) |
207 | map[n] = ((sn & 0xffff0000) | ir->prev); |
208 | } |
209 | } |
210 | |
211 | /* Transform the old IR to the new IR. */ |
212 | static void split_ir(jit_State *J) |
213 | { |
214 | IRRef nins = J->cur.nins, nk = J->cur.nk; |
215 | MSize irlen = nins - nk; |
216 | MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); |
217 | IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); |
218 | IRRef1 *hisubst; |
219 | IRRef ref, snref; |
220 | SnapShot *snap; |
221 | |
222 | /* Copy old IR to buffer. */ |
223 | memcpy(oir, IR(nk), irlen*sizeof(IRIns)); |
224 | /* Bias hiword substitution table and old IR. Loword kept in field prev. */ |
225 | hisubst = (IRRef1 *)&oir[irlen] - nk; |
226 | oir -= nk; |
227 | |
228 | /* Remove all IR instructions, but retain IR constants. */ |
229 | J->cur.nins = REF_FIRST; |
230 | J->loopref = 0; |
231 | |
232 | /* Process constants and fixed references. */ |
233 | for (ref = nk; ref <= REF_BASE; ref++) { |
234 | IRIns *ir = &oir[ref]; |
235 | if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { |
236 | /* Split up 64 bit constant. */ |
237 | TValue tv = *ir_k64(ir); |
238 | ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); |
239 | hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); |
240 | } else { |
241 | ir->prev = ref; /* Identity substitution for loword. */ |
242 | hisubst[ref] = 0; |
243 | } |
244 | } |
245 | |
246 | /* Process old IR instructions. */ |
247 | snap = J->cur.snap; |
248 | snref = snap->ref; |
249 | for (ref = REF_FIRST; ref < nins; ref++) { |
250 | IRIns *ir = &oir[ref]; |
251 | IRRef nref = lj_ir_nextins(J); |
252 | IRIns *nir = IR(nref); |
253 | IRRef hi = 0; |
254 | |
255 | if (ref >= snref) { |
256 | snap->ref = nref; |
257 | split_subst_snap(J, snap++, oir); |
258 | snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; |
259 | } |
260 | |
261 | /* Copy-substitute old instruction to new instruction. */ |
262 | nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; |
263 | nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; |
264 | ir->prev = nref; /* Loword substitution. */ |
265 | nir->o = ir->o; |
266 | nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); |
267 | hisubst[ref] = 0; |
268 | |
269 | /* Split 64 bit instructions. */ |
270 | #if LJ_SOFTFP |
271 | if (irt_isnum(ir->t)) { |
272 | nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ |
273 | /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ |
274 | switch (ir->o) { |
275 | case IR_ADD: |
276 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); |
277 | break; |
278 | case IR_SUB: |
279 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); |
280 | break; |
281 | case IR_MUL: |
282 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); |
283 | break; |
284 | case IR_DIV: |
285 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); |
286 | break; |
287 | case IR_POW: |
288 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); |
289 | break; |
290 | case IR_FPMATH: |
291 | /* Try to rejoin pow from EXP2, MUL and LOG2. */ |
292 | if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { |
293 | IRIns *irp = IR(nir->op1); |
294 | if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { |
295 | IRIns *irm4 = IR(irp->op1); |
296 | IRIns *irm3 = IR(irm4->op1); |
297 | IRIns *irm12 = IR(irm3->op1); |
298 | IRIns *irl1 = IR(irm12->op1); |
299 | if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && |
300 | irl1->op2 == IRCALL_lj_vm_log2) { |
301 | IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ |
302 | IRRef arg3 = irm3->op2, arg4 = irm4->op2; |
303 | J->cur.nins--; |
304 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); |
305 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); |
306 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); |
307 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
308 | break; |
309 | } |
310 | } |
311 | } |
312 | hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); |
313 | break; |
314 | case IR_ATAN2: |
315 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); |
316 | break; |
317 | case IR_LDEXP: |
318 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); |
319 | break; |
320 | case IR_NEG: case IR_ABS: |
321 | nir->o = IR_CONV; /* Pass through loword. */ |
322 | nir->op2 = (IRT_INT << 5) | IRT_INT; |
323 | hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), |
324 | hisubst[ir->op1], hisubst[ir->op2]); |
325 | break; |
326 | case IR_SLOAD: |
327 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ |
328 | nir->op2 &= ~IRSLOAD_CONVERT; |
329 | ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, |
330 | IRCALL_softfp_i2d); |
331 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
332 | break; |
333 | } |
334 | /* fallthrough */ |
335 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: |
336 | case IR_STRTO: |
337 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
338 | break; |
339 | case IR_XLOAD: { |
340 | IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ |
341 | J->cur.nins--; |
342 | hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ |
343 | nref = lj_ir_nextins(J); |
344 | nir = IR(nref); |
345 | *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ |
346 | hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); |
347 | #if LJ_LE |
348 | ir->prev = nref; |
349 | #else |
350 | ir->prev = hi; hi = nref; |
351 | #endif |
352 | break; |
353 | } |
354 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: |
355 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); |
356 | break; |
357 | case IR_CONV: { /* Conversion to number. Others handled below. */ |
358 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
359 | UNUSED(st); |
360 | #if LJ_32 && LJ_HASFFI |
361 | if (st == IRT_I64 || st == IRT_U64) { |
362 | hi = split_call_l(J, hisubst, oir, ir, |
363 | st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); |
364 | break; |
365 | } |
366 | #endif |
367 | lua_assert(st == IRT_INT || |
368 | (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); |
369 | nir->o = IR_CALLN; |
370 | #if LJ_32 && LJ_HASFFI |
371 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : |
372 | st == IRT_FLOAT ? IRCALL_softfp_f2d : |
373 | IRCALL_softfp_ui2d; |
374 | #else |
375 | nir->op2 = IRCALL_softfp_i2d; |
376 | #endif |
377 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
378 | break; |
379 | } |
380 | case IR_CALLN: |
381 | case IR_CALLL: |
382 | case IR_CALLS: |
383 | case IR_CALLXS: |
384 | goto split_call; |
385 | case IR_PHI: |
386 | if (nir->op1 == nir->op2) |
387 | J->cur.nins--; /* Drop useless PHIs. */ |
388 | if (hisubst[ir->op1] != hisubst[ir->op2]) |
389 | split_emit(J, IRT(IR_PHI, IRT_SOFTFP), |
390 | hisubst[ir->op1], hisubst[ir->op2]); |
391 | break; |
392 | case IR_HIOP: |
393 | J->cur.nins--; /* Drop joining HIOP. */ |
394 | ir->prev = nir->op1; |
395 | hi = nir->op2; |
396 | break; |
397 | default: |
398 | lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); |
399 | hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), |
400 | hisubst[ir->op1], hisubst[ir->op2]); |
401 | break; |
402 | } |
403 | } else |
404 | #endif |
405 | #if LJ_32 && LJ_HASFFI |
406 | if (irt_isint64(ir->t)) { |
407 | IRRef hiref = hisubst[ir->op1]; |
408 | nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ |
409 | switch (ir->o) { |
410 | case IR_ADD: |
411 | case IR_SUB: |
412 | /* Use plain op for hiword if loword cannot produce a carry/borrow. */ |
413 | if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { |
414 | ir->prev = nir->op1; /* Pass through loword. */ |
415 | nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; |
416 | hi = nref; |
417 | break; |
418 | } |
419 | /* fallthrough */ |
420 | case IR_NEG: |
421 | hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); |
422 | break; |
423 | case IR_MUL: |
424 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); |
425 | break; |
426 | case IR_DIV: |
427 | hi = split_call_ll(J, hisubst, oir, ir, |
428 | irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : |
429 | IRCALL_lj_carith_divu64); |
430 | break; |
431 | case IR_MOD: |
432 | hi = split_call_ll(J, hisubst, oir, ir, |
433 | irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : |
434 | IRCALL_lj_carith_modu64); |
435 | break; |
436 | case IR_POW: |
437 | hi = split_call_ll(J, hisubst, oir, ir, |
438 | irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : |
439 | IRCALL_lj_carith_powu64); |
440 | break; |
441 | case IR_FLOAD: |
442 | lua_assert(ir->op2 == IRFL_CDATA_INT64); |
443 | hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); |
444 | #if LJ_BE |
445 | ir->prev = hi; hi = nref; |
446 | #endif |
447 | break; |
448 | case IR_XLOAD: |
449 | hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); |
450 | #if LJ_BE |
451 | ir->prev = hi; hi = nref; |
452 | #endif |
453 | break; |
454 | case IR_XSTORE: |
455 | split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); |
456 | break; |
457 | case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ |
458 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
459 | #if LJ_SOFTFP |
460 | if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ |
461 | hi = split_call_l(J, hisubst, oir, ir, |
462 | irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); |
463 | } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ |
464 | nir->o = IR_CALLN; |
465 | nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; |
466 | hi = split_emit(J, IRTI(IR_HIOP), nref, nref); |
467 | } |
468 | #else |
469 | if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ |
470 | hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); |
471 | } |
472 | #endif |
473 | else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ |
474 | /* Drop cast, since assembler doesn't care. */ |
475 | goto fwdlo; |
476 | } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ |
477 | IRRef k31 = lj_ir_kint(J, 31); |
478 | nir = IR(nref); /* May have been reallocated. */ |
479 | ir->prev = nir->op1; /* Pass through loword. */ |
480 | nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ |
481 | nir->op2 = k31; |
482 | hi = nref; |
483 | } else { /* Zero-extend to 64 bit. */ |
484 | hi = lj_ir_kint(J, 0); |
485 | goto fwdlo; |
486 | } |
487 | break; |
488 | } |
489 | case IR_CALLXS: |
490 | goto split_call; |
491 | case IR_PHI: { |
492 | IRRef hiref2; |
493 | if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || |
494 | nir->op1 == nir->op2) |
495 | J->cur.nins--; /* Drop useless PHIs. */ |
496 | hiref2 = hisubst[ir->op2]; |
497 | if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) |
498 | split_emit(J, IRTI(IR_PHI), hiref, hiref2); |
499 | break; |
500 | } |
501 | case IR_HIOP: |
502 | J->cur.nins--; /* Drop joining HIOP. */ |
503 | ir->prev = nir->op1; |
504 | hi = nir->op2; |
505 | break; |
506 | default: |
507 | lua_assert(ir->o <= IR_NE); /* Comparisons. */ |
508 | split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); |
509 | break; |
510 | } |
511 | } else |
512 | #endif |
513 | #if LJ_SOFTFP |
514 | if (ir->o == IR_SLOAD) { |
515 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ |
516 | nir->op2 &= ~IRSLOAD_CONVERT; |
517 | if (!(nir->op2 & IRSLOAD_TYPECHECK)) |
518 | nir->t.irt = IRT_INT; /* Drop guard. */ |
519 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
520 | ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); |
521 | } |
522 | } else if (ir->o == IR_TOBIT) { |
523 | IRRef tmp, op1 = ir->op1; |
524 | J->cur.nins--; |
525 | #if LJ_LE |
526 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
527 | #else |
528 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
529 | #endif |
530 | ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); |
531 | } else if (ir->o == IR_TOSTR) { |
532 | if (hisubst[ir->op1]) { |
533 | if (irref_isk(ir->op1)) |
534 | nir->op1 = ir->op1; |
535 | else |
536 | split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); |
537 | } |
538 | } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { |
539 | if (irref_isk(ir->op2) && hisubst[ir->op2]) |
540 | nir->op2 = ir->op2; |
541 | } else |
542 | #endif |
543 | if (ir->o == IR_CONV) { /* See above, too. */ |
544 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
545 | #if LJ_32 && LJ_HASFFI |
546 | if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ |
547 | #if LJ_SOFTFP |
548 | if (irt_isfloat(ir->t)) { |
549 | split_call_l(J, hisubst, oir, ir, |
550 | st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); |
551 | J->cur.nins--; /* Drop unused HIOP. */ |
552 | } |
553 | #else |
554 | if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ |
555 | ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), |
556 | hisubst[ir->op1], nref); |
557 | } |
558 | #endif |
559 | else { /* Truncate to lower 32 bits. */ |
560 | fwdlo: |
561 | ir->prev = nir->op1; /* Forward loword. */ |
562 | /* Replace with NOP to avoid messing up the snapshot logic. */ |
563 | nir->ot = IRT(IR_NOP, IRT_NIL); |
564 | nir->op1 = nir->op2 = 0; |
565 | } |
566 | } |
567 | #endif |
568 | #if LJ_SOFTFP && LJ_32 && LJ_HASFFI |
569 | else if (irt_isfloat(ir->t)) { |
570 | if (st == IRT_NUM) { |
571 | split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); |
572 | J->cur.nins--; /* Drop unused HIOP. */ |
573 | } else { |
574 | nir->o = IR_CALLN; |
575 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; |
576 | } |
577 | } else if (st == IRT_FLOAT) { |
578 | nir->o = IR_CALLN; |
579 | nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; |
580 | } else |
581 | #endif |
582 | #if LJ_SOFTFP |
583 | if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { |
584 | if (irt_isguard(ir->t)) { |
585 | lua_assert(st == IRT_NUM && irt_isint(ir->t)); |
586 | J->cur.nins--; |
587 | ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); |
588 | } else { |
589 | split_call_l(J, hisubst, oir, ir, |
590 | #if LJ_32 && LJ_HASFFI |
591 | st == IRT_NUM ? |
592 | (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : |
593 | (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) |
594 | #else |
595 | IRCALL_softfp_d2i |
596 | #endif |
597 | ); |
598 | J->cur.nins--; /* Drop unused HIOP. */ |
599 | } |
600 | } |
601 | #endif |
602 | } else if (ir->o == IR_CALLXS) { |
603 | IRRef hiref; |
604 | split_call: |
605 | hiref = hisubst[ir->op1]; |
606 | if (hiref) { |
607 | IROpT ot = nir->ot; |
608 | IRRef op2 = nir->op2; |
609 | nir->ot = IRT(IR_CARG, IRT_NIL); |
610 | #if LJ_LE |
611 | nir->op2 = hiref; |
612 | #else |
613 | nir->op2 = nir->op1; nir->op1 = hiref; |
614 | #endif |
615 | ir->prev = nref = split_emit(J, ot, nref, op2); |
616 | } |
617 | if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) |
618 | hi = split_emit(J, |
619 | IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), |
620 | nref, nref); |
621 | } else if (ir->o == IR_CARG) { |
622 | IRRef hiref = hisubst[ir->op1]; |
623 | if (hiref) { |
624 | IRRef op2 = nir->op2; |
625 | #if LJ_LE |
626 | nir->op2 = hiref; |
627 | #else |
628 | nir->op2 = nir->op1; nir->op1 = hiref; |
629 | #endif |
630 | ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); |
631 | nir = IR(nref); |
632 | } |
633 | hiref = hisubst[ir->op2]; |
634 | if (hiref) { |
635 | #if !LJ_TARGET_X86 |
636 | int carg = 0; |
637 | IRIns *cir; |
638 | for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) |
639 | carg++; |
640 | if ((carg & 1) == 0) { /* Align 64 bit arguments. */ |
641 | IRRef op2 = nir->op2; |
642 | nir->op2 = REF_NIL; |
643 | nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); |
644 | nir = IR(nref); |
645 | } |
646 | #endif |
647 | #if LJ_BE |
648 | { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } |
649 | #endif |
650 | ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); |
651 | } |
652 | } else if (ir->o == IR_CNEWI) { |
653 | if (hisubst[ir->op2]) |
654 | split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); |
655 | } else if (ir->o == IR_LOOP) { |
656 | J->loopref = nref; /* Needed by assembler. */ |
657 | } |
658 | hisubst[ref] = hi; /* Store hiword substitution. */ |
659 | } |
660 | if (snref == nins) { /* Substitution for last snapshot. */ |
661 | snap->ref = J->cur.nins; |
662 | split_subst_snap(J, snap, oir); |
663 | } |
664 | |
665 | /* Add PHI marks. */ |
666 | for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { |
667 | IRIns *ir = IR(ref); |
668 | if (ir->o != IR_PHI) break; |
669 | if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); |
670 | if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); |
671 | } |
672 | } |
673 | |
674 | /* Protected callback for split pass. */ |
675 | static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud) |
676 | { |
677 | jit_State *J = (jit_State *)ud; |
678 | split_ir(J); |
679 | UNUSED(L); UNUSED(dummy); |
680 | return NULL; |
681 | } |
682 | |
683 | #if defined(LUA_USE_ASSERT) || LJ_SOFTFP |
684 | /* Slow, but sure way to check whether a SPLIT pass is needed. */ |
685 | static int split_needsplit(jit_State *J) |
686 | { |
687 | IRIns *ir, *irend; |
688 | IRRef ref; |
689 | for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) |
690 | if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t)) |
691 | return 1; |
692 | if (LJ_SOFTFP) { |
693 | for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev) |
694 | if ((IR(ref)->op2 & IRSLOAD_CONVERT)) |
695 | return 1; |
696 | if (J->chain[IR_TOBIT]) |
697 | return 1; |
698 | } |
699 | for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) { |
700 | IRType st = (IR(ref)->op2 & IRCONV_SRCMASK); |
701 | if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) || |
702 | st == IRT_I64 || st == IRT_U64) |
703 | return 1; |
704 | } |
705 | return 0; /* Nope. */ |
706 | } |
707 | #endif |
708 | |
709 | /* SPLIT pass. */ |
710 | void lj_opt_split(jit_State *J) |
711 | { |
712 | #if LJ_SOFTFP |
713 | if (!J->needsplit) |
714 | J->needsplit = split_needsplit(J); |
715 | #else |
716 | lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ |
717 | #endif |
718 | if (J->needsplit) { |
719 | int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); |
720 | if (errcode) { |
721 | /* Completely reset the trace to avoid inconsistent dump on abort. */ |
722 | J->cur.nins = J->cur.nk = REF_BASE; |
723 | J->cur.nsnap = 0; |
724 | lj_err_throw(J->L, errcode); /* Propagate errors. */ |
725 | } |
726 | } |
727 | } |
728 | |
729 | #undef IR |
730 | |
731 | #endif |
732 | |