1/*
2** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
3** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_opt_split_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
12
13#include "lj_err.h"
14#include "lj_str.h"
15#include "lj_ir.h"
16#include "lj_jit.h"
17#include "lj_ircall.h"
18#include "lj_iropt.h"
19#include "lj_vm.h"
20
21/* SPLIT pass:
22**
23** This pass splits up 64 bit IR instructions into multiple 32 bit IR
24** instructions. It's only active for soft-float targets or for 32 bit CPUs
25** which lack native 64 bit integer operations (the FFI is currently the
26** only emitter for 64 bit integer instructions).
27**
28** Splitting the IR in a separate pass keeps each 32 bit IR assembler
29** backend simple. Only a small amount of extra functionality needs to be
30** implemented. This is much easier than adding support for allocating
31** register pairs to each backend (believe me, I tried). A few simple, but
32** important optimizations can be performed by the SPLIT pass, which would
33** be tedious to do in the backend.
34**
35** The basic idea is to replace each 64 bit IR instruction with its 32 bit
36** equivalent plus an extra HIOP instruction. The splitted IR is not passed
37** through FOLD or any other optimizations, so each HIOP is guaranteed to
38** immediately follow it's counterpart. The actual functionality of HIOP is
39** inferred from the previous instruction.
40**
41** The operands of HIOP hold the hiword input references. The output of HIOP
42** is the hiword output reference, which is also used to hold the hiword
43** register or spill slot information. The register allocator treats this
44** instruction independently of any other instruction, which improves code
45** quality compared to using fixed register pairs.
46**
47** It's easier to split up some instructions into two regular 32 bit
48** instructions. E.g. XLOAD is split up into two XLOADs with two different
49** addresses. Obviously 64 bit constants need to be split up into two 32 bit
50** constants, too. Some hiword instructions can be entirely omitted, e.g.
51** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
52** are split up into two 32 bit arguments each.
53**
54** On soft-float targets, floating-point instructions are directly converted
55** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
56** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
57**
58** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
59** two int64_t fields:
60**
61** 0100 p32 ADD base +8
62** 0101 i64 XLOAD 0100
63** 0102 i64 ADD 0101 +1
64** 0103 p32 ADD base +16
65** 0104 i64 XSTORE 0103 0102
66**
67** mov rax, [esi+0x8]
68** add rax, +0x01
69** mov [esi+0x10], rax
70**
71** Here's the transformed IR and the x86 machine code after the SPLIT pass:
72**
73** 0100 p32 ADD base +8
74** 0101 int XLOAD 0100
75** 0102 p32 ADD base +12
76** 0103 int XLOAD 0102
77** 0104 int ADD 0101 +1
78** 0105 int HIOP 0103 +0
79** 0106 p32 ADD base +16
80** 0107 int XSTORE 0106 0104
81** 0108 int HIOP 0106 0105
82**
83** mov eax, [esi+0x8]
84** mov ecx, [esi+0xc]
85** add eax, +0x01
86** adc ecx, +0x00
87** mov [esi+0x10], eax
88** mov [esi+0x14], ecx
89**
90** You may notice the reassociated hiword address computation, which is
91** later fused into the mov operands by the assembler.
92*/
93
94/* Some local macros to save typing. Undef'd at the end. */
95#define IR(ref) (&J->cur.ir[(ref)])
96
97/* Directly emit the transformed IR without updating chains etc. */
98static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
99{
100 IRRef nref = lj_ir_nextins(J);
101 IRIns *ir = IR(nref);
102 ir->ot = ot;
103 ir->op1 = op1;
104 ir->op2 = op2;
105 return nref;
106}
107
108#if LJ_SOFTFP
109/* Emit a (checked) number to integer conversion. */
110static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
111{
112 IRRef tmp, res;
113#if LJ_LE
114 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
115#else
116 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
117#endif
118 res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
119 if (check) {
120 tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
121 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
122 split_emit(J, IRTGI(IR_EQ), tmp, lo);
123 split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
124 }
125 return res;
126}
127
128/* Emit a CALLN with one split 64 bit argument. */
129static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
130 IRIns *ir, IRCallID id)
131{
132 IRRef tmp, op1 = ir->op1;
133 J->cur.nins--;
134#if LJ_LE
135 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
136#else
137 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
138#endif
139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
141}
142
143/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
144static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
145 IRIns *ir, IRCallID id)
146{
147 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
148 J->cur.nins--;
149#if LJ_LE
150 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
151#else
152 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
153#endif
154 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
155 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
156 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
157}
158#endif
159
160/* Emit a CALLN with two split 64 bit arguments. */
161static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
162 IRIns *ir, IRCallID id)
163{
164 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
165 J->cur.nins--;
166#if LJ_LE
167 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
168 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
169 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
170#else
171 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
172 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
173 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
174#endif
175 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
176 return split_emit(J,
177 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
178 tmp, tmp);
179}
180
181/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
182static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
183{
184 IRRef nref = oir[ref].prev;
185 IRIns *ir = IR(nref);
186 int32_t ofs = 4;
187 if (ir->o == IR_KPTR)
188 return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
189 if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
190 /* Reassociate address. */
191 ofs += IR(ir->op2)->i;
192 nref = ir->op1;
193 if (ofs == 0) return nref;
194 }
195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
196}
197
198/* Substitute references of a snapshot. */
199static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
200{
201 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
202 MSize n, nent = snap->nent;
203 for (n = 0; n < nent; n++) {
204 SnapEntry sn = map[n];
205 IRIns *ir = &oir[snap_ref(sn)];
206 if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
207 map[n] = ((sn & 0xffff0000) | ir->prev);
208 }
209}
210
211/* Transform the old IR to the new IR. */
212static void split_ir(jit_State *J)
213{
214 IRRef nins = J->cur.nins, nk = J->cur.nk;
215 MSize irlen = nins - nk;
216 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
217 IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need);
218 IRRef1 *hisubst;
219 IRRef ref, snref;
220 SnapShot *snap;
221
222 /* Copy old IR to buffer. */
223 memcpy(oir, IR(nk), irlen*sizeof(IRIns));
224 /* Bias hiword substitution table and old IR. Loword kept in field prev. */
225 hisubst = (IRRef1 *)&oir[irlen] - nk;
226 oir -= nk;
227
228 /* Remove all IR instructions, but retain IR constants. */
229 J->cur.nins = REF_FIRST;
230 J->loopref = 0;
231
232 /* Process constants and fixed references. */
233 for (ref = nk; ref <= REF_BASE; ref++) {
234 IRIns *ir = &oir[ref];
235 if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
236 /* Split up 64 bit constant. */
237 TValue tv = *ir_k64(ir);
238 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
239 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
240 } else {
241 ir->prev = ref; /* Identity substitution for loword. */
242 hisubst[ref] = 0;
243 }
244 }
245
246 /* Process old IR instructions. */
247 snap = J->cur.snap;
248 snref = snap->ref;
249 for (ref = REF_FIRST; ref < nins; ref++) {
250 IRIns *ir = &oir[ref];
251 IRRef nref = lj_ir_nextins(J);
252 IRIns *nir = IR(nref);
253 IRRef hi = 0;
254
255 if (ref >= snref) {
256 snap->ref = nref;
257 split_subst_snap(J, snap++, oir);
258 snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
259 }
260
261 /* Copy-substitute old instruction to new instruction. */
262 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
263 nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
264 ir->prev = nref; /* Loword substitution. */
265 nir->o = ir->o;
266 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
267 hisubst[ref] = 0;
268
269 /* Split 64 bit instructions. */
270#if LJ_SOFTFP
271 if (irt_isnum(ir->t)) {
272 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
273 /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
274 switch (ir->o) {
275 case IR_ADD:
276 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
277 break;
278 case IR_SUB:
279 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
280 break;
281 case IR_MUL:
282 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
283 break;
284 case IR_DIV:
285 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
286 break;
287 case IR_POW:
288 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
289 break;
290 case IR_FPMATH:
291 /* Try to rejoin pow from EXP2, MUL and LOG2. */
292 if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
293 IRIns *irp = IR(nir->op1);
294 if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
295 IRIns *irm4 = IR(irp->op1);
296 IRIns *irm3 = IR(irm4->op1);
297 IRIns *irm12 = IR(irm3->op1);
298 IRIns *irl1 = IR(irm12->op1);
299 if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
300 irl1->op2 == IRCALL_lj_vm_log2) {
301 IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */
302 IRRef arg3 = irm3->op2, arg4 = irm4->op2;
303 J->cur.nins--;
304 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
305 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
306 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
307 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
308 break;
309 }
310 }
311 }
312 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
313 break;
314 case IR_ATAN2:
315 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
316 break;
317 case IR_LDEXP:
318 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
319 break;
320 case IR_NEG: case IR_ABS:
321 nir->o = IR_CONV; /* Pass through loword. */
322 nir->op2 = (IRT_INT << 5) | IRT_INT;
323 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
324 hisubst[ir->op1], hisubst[ir->op2]);
325 break;
326 case IR_SLOAD:
327 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
328 nir->op2 &= ~IRSLOAD_CONVERT;
329 ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
330 IRCALL_softfp_i2d);
331 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
332 break;
333 }
334 /* fallthrough */
335 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
336 case IR_STRTO:
337 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
338 break;
339 case IR_XLOAD: {
340 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
341 J->cur.nins--;
342 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
343 nref = lj_ir_nextins(J);
344 nir = IR(nref);
345 *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */
346 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
347#if LJ_LE
348 ir->prev = nref;
349#else
350 ir->prev = hi; hi = nref;
351#endif
352 break;
353 }
354 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
355 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
356 break;
357 case IR_CONV: { /* Conversion to number. Others handled below. */
358 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
359 UNUSED(st);
360#if LJ_32 && LJ_HASFFI
361 if (st == IRT_I64 || st == IRT_U64) {
362 hi = split_call_l(J, hisubst, oir, ir,
363 st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
364 break;
365 }
366#endif
367 lua_assert(st == IRT_INT ||
368 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
369 nir->o = IR_CALLN;
370#if LJ_32 && LJ_HASFFI
371 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
372 st == IRT_FLOAT ? IRCALL_softfp_f2d :
373 IRCALL_softfp_ui2d;
374#else
375 nir->op2 = IRCALL_softfp_i2d;
376#endif
377 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
378 break;
379 }
380 case IR_CALLN:
381 case IR_CALLL:
382 case IR_CALLS:
383 case IR_CALLXS:
384 goto split_call;
385 case IR_PHI:
386 if (nir->op1 == nir->op2)
387 J->cur.nins--; /* Drop useless PHIs. */
388 if (hisubst[ir->op1] != hisubst[ir->op2])
389 split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
390 hisubst[ir->op1], hisubst[ir->op2]);
391 break;
392 case IR_HIOP:
393 J->cur.nins--; /* Drop joining HIOP. */
394 ir->prev = nir->op1;
395 hi = nir->op2;
396 break;
397 default:
398 lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
399 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
400 hisubst[ir->op1], hisubst[ir->op2]);
401 break;
402 }
403 } else
404#endif
405#if LJ_32 && LJ_HASFFI
406 if (irt_isint64(ir->t)) {
407 IRRef hiref = hisubst[ir->op1];
408 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
409 switch (ir->o) {
410 case IR_ADD:
411 case IR_SUB:
412 /* Use plain op for hiword if loword cannot produce a carry/borrow. */
413 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
414 ir->prev = nir->op1; /* Pass through loword. */
415 nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
416 hi = nref;
417 break;
418 }
419 /* fallthrough */
420 case IR_NEG:
421 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
422 break;
423 case IR_MUL:
424 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
425 break;
426 case IR_DIV:
427 hi = split_call_ll(J, hisubst, oir, ir,
428 irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
429 IRCALL_lj_carith_divu64);
430 break;
431 case IR_MOD:
432 hi = split_call_ll(J, hisubst, oir, ir,
433 irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
434 IRCALL_lj_carith_modu64);
435 break;
436 case IR_POW:
437 hi = split_call_ll(J, hisubst, oir, ir,
438 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
439 IRCALL_lj_carith_powu64);
440 break;
441 case IR_FLOAD:
442 lua_assert(ir->op2 == IRFL_CDATA_INT64);
443 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
444#if LJ_BE
445 ir->prev = hi; hi = nref;
446#endif
447 break;
448 case IR_XLOAD:
449 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
450#if LJ_BE
451 ir->prev = hi; hi = nref;
452#endif
453 break;
454 case IR_XSTORE:
455 split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
456 break;
457 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
458 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
459#if LJ_SOFTFP
460 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
461 hi = split_call_l(J, hisubst, oir, ir,
462 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
463 } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
464 nir->o = IR_CALLN;
465 nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
466 hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
467 }
468#else
469 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
470 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
471 }
472#endif
473 else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
474 /* Drop cast, since assembler doesn't care. */
475 goto fwdlo;
476 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
477 IRRef k31 = lj_ir_kint(J, 31);
478 nir = IR(nref); /* May have been reallocated. */
479 ir->prev = nir->op1; /* Pass through loword. */
480 nir->o = IR_BSAR; /* hi = bsar(lo, 31). */
481 nir->op2 = k31;
482 hi = nref;
483 } else { /* Zero-extend to 64 bit. */
484 hi = lj_ir_kint(J, 0);
485 goto fwdlo;
486 }
487 break;
488 }
489 case IR_CALLXS:
490 goto split_call;
491 case IR_PHI: {
492 IRRef hiref2;
493 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
494 nir->op1 == nir->op2)
495 J->cur.nins--; /* Drop useless PHIs. */
496 hiref2 = hisubst[ir->op2];
497 if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
498 split_emit(J, IRTI(IR_PHI), hiref, hiref2);
499 break;
500 }
501 case IR_HIOP:
502 J->cur.nins--; /* Drop joining HIOP. */
503 ir->prev = nir->op1;
504 hi = nir->op2;
505 break;
506 default:
507 lua_assert(ir->o <= IR_NE); /* Comparisons. */
508 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
509 break;
510 }
511 } else
512#endif
513#if LJ_SOFTFP
514 if (ir->o == IR_SLOAD) {
515 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */
516 nir->op2 &= ~IRSLOAD_CONVERT;
517 if (!(nir->op2 & IRSLOAD_TYPECHECK))
518 nir->t.irt = IRT_INT; /* Drop guard. */
519 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
520 ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
521 }
522 } else if (ir->o == IR_TOBIT) {
523 IRRef tmp, op1 = ir->op1;
524 J->cur.nins--;
525#if LJ_LE
526 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
527#else
528 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
529#endif
530 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
531 } else if (ir->o == IR_TOSTR) {
532 if (hisubst[ir->op1]) {
533 if (irref_isk(ir->op1))
534 nir->op1 = ir->op1;
535 else
536 split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
537 }
538 } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
539 if (irref_isk(ir->op2) && hisubst[ir->op2])
540 nir->op2 = ir->op2;
541 } else
542#endif
543 if (ir->o == IR_CONV) { /* See above, too. */
544 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
545#if LJ_32 && LJ_HASFFI
546 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
547#if LJ_SOFTFP
548 if (irt_isfloat(ir->t)) {
549 split_call_l(J, hisubst, oir, ir,
550 st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
551 J->cur.nins--; /* Drop unused HIOP. */
552 }
553#else
554 if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
555 ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
556 hisubst[ir->op1], nref);
557 }
558#endif
559 else { /* Truncate to lower 32 bits. */
560 fwdlo:
561 ir->prev = nir->op1; /* Forward loword. */
562 /* Replace with NOP to avoid messing up the snapshot logic. */
563 nir->ot = IRT(IR_NOP, IRT_NIL);
564 nir->op1 = nir->op2 = 0;
565 }
566 }
567#endif
568#if LJ_SOFTFP && LJ_32 && LJ_HASFFI
569 else if (irt_isfloat(ir->t)) {
570 if (st == IRT_NUM) {
571 split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
572 J->cur.nins--; /* Drop unused HIOP. */
573 } else {
574 nir->o = IR_CALLN;
575 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
576 }
577 } else if (st == IRT_FLOAT) {
578 nir->o = IR_CALLN;
579 nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
580 } else
581#endif
582#if LJ_SOFTFP
583 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
584 if (irt_isguard(ir->t)) {
585 lua_assert(st == IRT_NUM && irt_isint(ir->t));
586 J->cur.nins--;
587 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
588 } else {
589 split_call_l(J, hisubst, oir, ir,
590#if LJ_32 && LJ_HASFFI
591 st == IRT_NUM ?
592 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
593 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
594#else
595 IRCALL_softfp_d2i
596#endif
597 );
598 J->cur.nins--; /* Drop unused HIOP. */
599 }
600 }
601#endif
602 } else if (ir->o == IR_CALLXS) {
603 IRRef hiref;
604 split_call:
605 hiref = hisubst[ir->op1];
606 if (hiref) {
607 IROpT ot = nir->ot;
608 IRRef op2 = nir->op2;
609 nir->ot = IRT(IR_CARG, IRT_NIL);
610#if LJ_LE
611 nir->op2 = hiref;
612#else
613 nir->op2 = nir->op1; nir->op1 = hiref;
614#endif
615 ir->prev = nref = split_emit(J, ot, nref, op2);
616 }
617 if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
618 hi = split_emit(J,
619 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
620 nref, nref);
621 } else if (ir->o == IR_CARG) {
622 IRRef hiref = hisubst[ir->op1];
623 if (hiref) {
624 IRRef op2 = nir->op2;
625#if LJ_LE
626 nir->op2 = hiref;
627#else
628 nir->op2 = nir->op1; nir->op1 = hiref;
629#endif
630 ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
631 nir = IR(nref);
632 }
633 hiref = hisubst[ir->op2];
634 if (hiref) {
635#if !LJ_TARGET_X86
636 int carg = 0;
637 IRIns *cir;
638 for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
639 carg++;
640 if ((carg & 1) == 0) { /* Align 64 bit arguments. */
641 IRRef op2 = nir->op2;
642 nir->op2 = REF_NIL;
643 nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
644 nir = IR(nref);
645 }
646#endif
647#if LJ_BE
648 { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
649#endif
650 ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
651 }
652 } else if (ir->o == IR_CNEWI) {
653 if (hisubst[ir->op2])
654 split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
655 } else if (ir->o == IR_LOOP) {
656 J->loopref = nref; /* Needed by assembler. */
657 }
658 hisubst[ref] = hi; /* Store hiword substitution. */
659 }
660 if (snref == nins) { /* Substitution for last snapshot. */
661 snap->ref = J->cur.nins;
662 split_subst_snap(J, snap, oir);
663 }
664
665 /* Add PHI marks. */
666 for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
667 IRIns *ir = IR(ref);
668 if (ir->o != IR_PHI) break;
669 if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
670 if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
671 }
672}
673
674/* Protected callback for split pass. */
675static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
676{
677 jit_State *J = (jit_State *)ud;
678 split_ir(J);
679 UNUSED(L); UNUSED(dummy);
680 return NULL;
681}
682
683#if defined(LUA_USE_ASSERT) || LJ_SOFTFP
684/* Slow, but sure way to check whether a SPLIT pass is needed. */
685static int split_needsplit(jit_State *J)
686{
687 IRIns *ir, *irend;
688 IRRef ref;
689 for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
690 if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
691 return 1;
692 if (LJ_SOFTFP) {
693 for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
694 if ((IR(ref)->op2 & IRSLOAD_CONVERT))
695 return 1;
696 if (J->chain[IR_TOBIT])
697 return 1;
698 }
699 for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
700 IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
701 if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
702 st == IRT_I64 || st == IRT_U64)
703 return 1;
704 }
705 return 0; /* Nope. */
706}
707#endif
708
709/* SPLIT pass. */
710void lj_opt_split(jit_State *J)
711{
712#if LJ_SOFTFP
713 if (!J->needsplit)
714 J->needsplit = split_needsplit(J);
715#else
716 lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
717#endif
718 if (J->needsplit) {
719 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
720 if (errcode) {
721 /* Completely reset the trace to avoid inconsistent dump on abort. */
722 J->cur.nins = J->cur.nk = REF_BASE;
723 J->cur.nsnap = 0;
724 lj_err_throw(J->L, errcode); /* Propagate errors. */
725 }
726 }
727}
728
729#undef IR
730
731#endif
732