1/*
2** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_opt_split_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
12
13#include "lj_err.h"
14#include "lj_buf.h"
15#include "lj_ir.h"
16#include "lj_jit.h"
17#include "lj_ircall.h"
18#include "lj_iropt.h"
19#include "lj_dispatch.h"
20#include "lj_vm.h"
21
22/* SPLIT pass:
23**
24** This pass splits up 64 bit IR instructions into multiple 32 bit IR
25** instructions. It's only active for soft-float targets or for 32 bit CPUs
26** which lack native 64 bit integer operations (the FFI is currently the
27** only emitter for 64 bit integer instructions).
28**
29** Splitting the IR in a separate pass keeps each 32 bit IR assembler
30** backend simple. Only a small amount of extra functionality needs to be
31** implemented. This is much easier than adding support for allocating
32** register pairs to each backend (believe me, I tried). A few simple, but
33** important optimizations can be performed by the SPLIT pass, which would
34** be tedious to do in the backend.
35**
36** The basic idea is to replace each 64 bit IR instruction with its 32 bit
37** equivalent plus an extra HIOP instruction. The splitted IR is not passed
38** through FOLD or any other optimizations, so each HIOP is guaranteed to
39** immediately follow it's counterpart. The actual functionality of HIOP is
40** inferred from the previous instruction.
41**
42** The operands of HIOP hold the hiword input references. The output of HIOP
43** is the hiword output reference, which is also used to hold the hiword
44** register or spill slot information. The register allocator treats this
45** instruction independently of any other instruction, which improves code
46** quality compared to using fixed register pairs.
47**
48** It's easier to split up some instructions into two regular 32 bit
49** instructions. E.g. XLOAD is split up into two XLOADs with two different
50** addresses. Obviously 64 bit constants need to be split up into two 32 bit
51** constants, too. Some hiword instructions can be entirely omitted, e.g.
52** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
53** are split up into two 32 bit arguments each.
54**
55** On soft-float targets, floating-point instructions are directly converted
56** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
57** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
58**
59** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
60** two int64_t fields:
61**
62** 0100 p32 ADD base +8
63** 0101 i64 XLOAD 0100
64** 0102 i64 ADD 0101 +1
65** 0103 p32 ADD base +16
66** 0104 i64 XSTORE 0103 0102
67**
68** mov rax, [esi+0x8]
69** add rax, +0x01
70** mov [esi+0x10], rax
71**
72** Here's the transformed IR and the x86 machine code after the SPLIT pass:
73**
74** 0100 p32 ADD base +8
75** 0101 int XLOAD 0100
76** 0102 p32 ADD base +12
77** 0103 int XLOAD 0102
78** 0104 int ADD 0101 +1
79** 0105 int HIOP 0103 +0
80** 0106 p32 ADD base +16
81** 0107 int XSTORE 0106 0104
82** 0108 int HIOP 0106 0105
83**
84** mov eax, [esi+0x8]
85** mov ecx, [esi+0xc]
86** add eax, +0x01
87** adc ecx, +0x00
88** mov [esi+0x10], eax
89** mov [esi+0x14], ecx
90**
91** You may notice the reassociated hiword address computation, which is
92** later fused into the mov operands by the assembler.
93*/
94
95/* Some local macros to save typing. Undef'd at the end. */
96#define IR(ref) (&J->cur.ir[(ref)])
97
98/* Directly emit the transformed IR without updating chains etc. */
99static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
100{
101 IRRef nref = lj_ir_nextins(J);
102 IRIns *ir = IR(nref);
103 ir->ot = ot;
104 ir->op1 = op1;
105 ir->op2 = op2;
106 return nref;
107}
108
109#if LJ_SOFTFP
110/* Emit a (checked) number to integer conversion. */
111static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
112{
113 IRRef tmp, res;
114#if LJ_LE
115 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
116#else
117 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
118#endif
119 res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
120 if (check) {
121 tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
122 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
123 split_emit(J, IRTGI(IR_EQ), tmp, lo);
124 split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
125 }
126 return res;
127}
128
129/* Emit a CALLN with one split 64 bit argument. */
130static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
131 IRIns *ir, IRCallID id)
132{
133 IRRef tmp, op1 = ir->op1;
134 J->cur.nins--;
135#if LJ_LE
136 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
137#else
138 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
139#endif
140 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
141 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
142}
143#endif
144
145/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
146static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
147 IRIns *ir, IRCallID id)
148{
149 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
150 J->cur.nins--;
151#if LJ_LE
152 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
153#else
154 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
155#endif
156 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
157 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
158 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
159}
160
161/* Emit a CALLN with two split 64 bit arguments. */
162static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
163 IRIns *ir, IRCallID id)
164{
165 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
166 J->cur.nins--;
167#if LJ_LE
168 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
169 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
170 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
171#else
172 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
173 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
174 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
175#endif
176 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
177 return split_emit(J,
178 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
179 tmp, tmp);
180}
181
182/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
183static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
184{
185 IRRef nref = oir[ref].prev;
186 IRIns *ir = IR(nref);
187 int32_t ofs = 4;
188 if (ir->o == IR_KPTR)
189 return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
190 if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
191 /* Reassociate address. */
192 ofs += IR(ir->op2)->i;
193 nref = ir->op1;
194 if (ofs == 0) return nref;
195 }
196 return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
197}
198
199#if LJ_HASFFI
200static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
201 IRIns *oir, IRIns *nir, IRIns *ir)
202{
203 IROp op = ir->o;
204 IRRef kref = nir->op2;
205 if (irref_isk(kref)) { /* Optimize constant shifts. */
206 int32_t k = (IR(kref)->i & 63);
207 IRRef lo = nir->op1, hi = hisubst[ir->op1];
208 if (op == IR_BROL || op == IR_BROR) {
209 if (op == IR_BROR) k = (-k & 63);
210 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
211 if (k == 0) {
212 passthrough:
213 J->cur.nins--;
214 ir->prev = lo;
215 return hi;
216 } else {
217 TRef k1, k2;
218 IRRef t1, t2, t3, t4;
219 J->cur.nins--;
220 k1 = lj_ir_kint(J, k);
221 k2 = lj_ir_kint(J, (-k & 31));
222 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
223 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
224 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
225 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
226 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
227 return split_emit(J, IRTI(IR_BOR), t2, t3);
228 }
229 } else if (k == 0) {
230 goto passthrough;
231 } else if (k < 32) {
232 if (op == IR_BSHL) {
233 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
234 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
235 return split_emit(J, IRTI(IR_BOR), t1, t2);
236 } else {
237 IRRef t1 = ir->prev, t2;
238 lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
239 nir->o = IR_BSHR;
240 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
241 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
242 return split_emit(J, IRTI(op), hi, kref);
243 }
244 } else {
245 if (op == IR_BSHL) {
246 if (k == 32)
247 J->cur.nins--;
248 else
249 lo = ir->prev;
250 ir->prev = lj_ir_kint(J, 0);
251 return lo;
252 } else {
253 lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
254 if (k == 32) {
255 J->cur.nins--;
256 ir->prev = hi;
257 } else {
258 nir->op1 = hi;
259 }
260 if (op == IR_BSHR)
261 return lj_ir_kint(J, 0);
262 else
263 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
264 }
265 }
266 }
267 return split_call_li(J, hisubst, oir, ir,
268 op - IR_BSHL + IRCALL_lj_carith_shl64);
269}
270
271static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
272 IRIns *nir, IRIns *ir)
273{
274 IROp op = ir->o;
275 IRRef hi, kref = nir->op2;
276 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
277 int32_t k = IR(kref)->i;
278 if (k == 0 || k == -1) {
279 if (op == IR_BAND) k = ~k;
280 if (k == 0) {
281 J->cur.nins--;
282 ir->prev = nir->op1;
283 } else if (op == IR_BXOR) {
284 nir->o = IR_BNOT;
285 nir->op2 = 0;
286 } else {
287 J->cur.nins--;
288 ir->prev = kref;
289 }
290 }
291 }
292 hi = hisubst[ir->op1];
293 kref = hisubst[ir->op2];
294 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
295 int32_t k = IR(kref)->i;
296 if (k == 0 || k == -1) {
297 if (op == IR_BAND) k = ~k;
298 if (k == 0) {
299 return hi;
300 } else if (op == IR_BXOR) {
301 return split_emit(J, IRTI(IR_BNOT), hi, 0);
302 } else {
303 return kref;
304 }
305 }
306 }
307 return split_emit(J, IRTI(op), hi, kref);
308}
309#endif
310
311/* Substitute references of a snapshot. */
312static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
313{
314 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
315 MSize n, nent = snap->nent;
316 for (n = 0; n < nent; n++) {
317 SnapEntry sn = map[n];
318 IRIns *ir = &oir[snap_ref(sn)];
319 if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
320 map[n] = ((sn & 0xffff0000) | ir->prev);
321 }
322}
323
324/* Transform the old IR to the new IR. */
325static void split_ir(jit_State *J)
326{
327 IRRef nins = J->cur.nins, nk = J->cur.nk;
328 MSize irlen = nins - nk;
329 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
330 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
331 IRRef1 *hisubst;
332 IRRef ref, snref;
333 SnapShot *snap;
334
335 /* Copy old IR to buffer. */
336 memcpy(oir, IR(nk), irlen*sizeof(IRIns));
337 /* Bias hiword substitution table and old IR. Loword kept in field prev. */
338 hisubst = (IRRef1 *)&oir[irlen] - nk;
339 oir -= nk;
340
341 /* Remove all IR instructions, but retain IR constants. */
342 J->cur.nins = REF_FIRST;
343 J->loopref = 0;
344
345 /* Process constants and fixed references. */
346 for (ref = nk; ref <= REF_BASE; ref++) {
347 IRIns *ir = &oir[ref];
348 if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
349 /* Split up 64 bit constant. */
350 TValue tv = *ir_k64(ir);
351 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
352 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
353 } else {
354 ir->prev = ref; /* Identity substitution for loword. */
355 hisubst[ref] = 0;
356 }
357 if (irt_is64(ir->t) && ir->o != IR_KNULL)
358 ref++;
359 }
360
361 /* Process old IR instructions. */
362 snap = J->cur.snap;
363 snref = snap->ref;
364 for (ref = REF_FIRST; ref < nins; ref++) {
365 IRIns *ir = &oir[ref];
366 IRRef nref = lj_ir_nextins(J);
367 IRIns *nir = IR(nref);
368 IRRef hi = 0;
369
370 if (ref >= snref) {
371 snap->ref = nref;
372 split_subst_snap(J, snap++, oir);
373 snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
374 }
375
376 /* Copy-substitute old instruction to new instruction. */
377 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
378 nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
379 ir->prev = nref; /* Loword substitution. */
380 nir->o = ir->o;
381 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
382 hisubst[ref] = 0;
383
384 /* Split 64 bit instructions. */
385#if LJ_SOFTFP
386 if (irt_isnum(ir->t)) {
387 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
388 /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
389 switch (ir->o) {
390 case IR_ADD:
391 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
392 break;
393 case IR_SUB:
394 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
395 break;
396 case IR_MUL:
397 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
398 break;
399 case IR_DIV:
400 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
401 break;
402 case IR_POW:
403 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
404 break;
405 case IR_FPMATH:
406 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
407 break;
408 case IR_LDEXP:
409 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
410 break;
411 case IR_NEG: case IR_ABS:
412 nir->o = IR_CONV; /* Pass through loword. */
413 nir->op2 = (IRT_INT << 5) | IRT_INT;
414 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
415 hisubst[ir->op1],
416 lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG))));
417 break;
418 case IR_SLOAD:
419 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
420 nir->op2 &= ~IRSLOAD_CONVERT;
421 ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
422 IRCALL_softfp_i2d);
423 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
424 break;
425 }
426 /* fallthrough */
427 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
428 case IR_STRTO:
429 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
430 break;
431 case IR_FLOAD:
432 lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State");
433 hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
434 nir->op2 += LJ_BE*4;
435 break;
436 case IR_XLOAD: {
437 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
438 J->cur.nins--;
439 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
440#if LJ_BE
441 hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
442 inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
443#endif
444 nref = lj_ir_nextins(J);
445 nir = IR(nref);
446 *nir = inslo; /* Re-emit lo XLOAD. */
447#if LJ_LE
448 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
449 ir->prev = nref;
450#else
451 ir->prev = hi; hi = nref;
452#endif
453 break;
454 }
455 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
456 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
457 break;
458 case IR_CONV: { /* Conversion to number. Others handled below. */
459 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
460 UNUSED(st);
461#if LJ_32 && LJ_HASFFI
462 if (st == IRT_I64 || st == IRT_U64) {
463 hi = split_call_l(J, hisubst, oir, ir,
464 st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
465 break;
466 }
467#endif
468 lj_assertJ(st == IRT_INT ||
469 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)),
470 "bad source type for CONV");
471 nir->o = IR_CALLN;
472#if LJ_32 && LJ_HASFFI
473 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
474 st == IRT_FLOAT ? IRCALL_softfp_f2d :
475 IRCALL_softfp_ui2d;
476#else
477 nir->op2 = IRCALL_softfp_i2d;
478#endif
479 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
480 break;
481 }
482 case IR_CALLN:
483 case IR_CALLL:
484 case IR_CALLS:
485 case IR_CALLXS:
486 goto split_call;
487 case IR_PHI:
488 if (nir->op1 == nir->op2)
489 J->cur.nins--; /* Drop useless PHIs. */
490 if (hisubst[ir->op1] != hisubst[ir->op2])
491 split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
492 hisubst[ir->op1], hisubst[ir->op2]);
493 break;
494 case IR_HIOP:
495 J->cur.nins--; /* Drop joining HIOP. */
496 ir->prev = nir->op1;
497 hi = nir->op2;
498 break;
499 default:
500 lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX,
501 "bad IR op %d", ir->o);
502 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
503 hisubst[ir->op1], hisubst[ir->op2]);
504 break;
505 }
506 } else
507#endif
508#if LJ_32 && LJ_HASFFI
509 if (irt_isint64(ir->t)) {
510 IRRef hiref = hisubst[ir->op1];
511 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
512 switch (ir->o) {
513 case IR_ADD:
514 case IR_SUB:
515 /* Use plain op for hiword if loword cannot produce a carry/borrow. */
516 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
517 ir->prev = nir->op1; /* Pass through loword. */
518 nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
519 hi = nref;
520 break;
521 }
522 /* fallthrough */
523 case IR_NEG:
524 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
525 break;
526 case IR_MUL:
527 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
528 break;
529 case IR_DIV:
530 hi = split_call_ll(J, hisubst, oir, ir,
531 irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
532 IRCALL_lj_carith_divu64);
533 break;
534 case IR_MOD:
535 hi = split_call_ll(J, hisubst, oir, ir,
536 irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
537 IRCALL_lj_carith_modu64);
538 break;
539 case IR_POW:
540 hi = split_call_ll(J, hisubst, oir, ir,
541 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
542 IRCALL_lj_carith_powu64);
543 break;
544 case IR_BNOT:
545 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
546 break;
547 case IR_BSWAP:
548 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
549 hi = nref;
550 break;
551 case IR_BAND: case IR_BOR: case IR_BXOR:
552 hi = split_bitop(J, hisubst, nir, ir);
553 break;
554 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
555 hi = split_bitshift(J, hisubst, oir, nir, ir);
556 break;
557 case IR_FLOAD:
558 lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported");
559 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
560#if LJ_BE
561 ir->prev = hi; hi = nref;
562#endif
563 break;
564 case IR_XLOAD:
565 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
566#if LJ_BE
567 ir->prev = hi; hi = nref;
568#endif
569 break;
570 case IR_XSTORE:
571 split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
572 break;
573 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
574 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
575#if LJ_SOFTFP
576 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
577 hi = split_call_l(J, hisubst, oir, ir,
578 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
579 } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
580 nir->o = IR_CALLN;
581 nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
582 hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
583 }
584#else
585 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
586 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
587 }
588#endif
589 else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
590 /* Drop cast, since assembler doesn't care. But fwd both parts. */
591 hi = hiref;
592 goto fwdlo;
593 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
594 IRRef k31 = lj_ir_kint(J, 31);
595 nir = IR(nref); /* May have been reallocated. */
596 ir->prev = nir->op1; /* Pass through loword. */
597 nir->o = IR_BSAR; /* hi = bsar(lo, 31). */
598 nir->op2 = k31;
599 hi = nref;
600 } else { /* Zero-extend to 64 bit. */
601 hi = lj_ir_kint(J, 0);
602 goto fwdlo;
603 }
604 break;
605 }
606 case IR_CALLXS:
607 goto split_call;
608 case IR_PHI: {
609 IRRef hiref2;
610 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
611 nir->op1 == nir->op2)
612 J->cur.nins--; /* Drop useless PHIs. */
613 hiref2 = hisubst[ir->op2];
614 if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
615 split_emit(J, IRTI(IR_PHI), hiref, hiref2);
616 break;
617 }
618 case IR_HIOP:
619 J->cur.nins--; /* Drop joining HIOP. */
620 ir->prev = nir->op1;
621 hi = nir->op2;
622 break;
623 default:
624 lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */
625 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
626 break;
627 }
628 } else
629#endif
630#if LJ_SOFTFP
631 if (ir->o == IR_SLOAD) {
632 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */
633 nir->op2 &= ~IRSLOAD_CONVERT;
634 if (!(nir->op2 & IRSLOAD_TYPECHECK))
635 nir->t.irt = IRT_INT; /* Drop guard. */
636 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
637 ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
638 }
639 } else if (ir->o == IR_TOBIT) {
640 IRRef tmp, op1 = ir->op1;
641 J->cur.nins--;
642#if LJ_LE
643 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
644#else
645 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
646#endif
647 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
648 } else if (ir->o == IR_TOSTR) {
649 if (hisubst[ir->op1]) {
650 if (irref_isk(ir->op1))
651 nir->op1 = ir->op1;
652 else
653 split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
654 }
655 } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
656 if (irref_isk(ir->op2) && hisubst[ir->op2])
657 nir->op2 = ir->op2;
658 } else
659#endif
660 if (ir->o == IR_CONV) { /* See above, too. */
661 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
662#if LJ_32 && LJ_HASFFI
663 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
664#if LJ_SOFTFP
665 if (irt_isfloat(ir->t)) {
666 split_call_l(J, hisubst, oir, ir,
667 st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
668 J->cur.nins--; /* Drop unused HIOP. */
669 }
670#else
671 if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
672 ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
673 hisubst[ir->op1], nref);
674 }
675#endif
676 else { /* Truncate to lower 32 bits. */
677 fwdlo:
678 ir->prev = nir->op1; /* Forward loword. */
679 /* Replace with NOP to avoid messing up the snapshot logic. */
680 nir->ot = IRT(IR_NOP, IRT_NIL);
681 nir->op1 = nir->op2 = 0;
682 }
683 }
684#endif
685#if LJ_SOFTFP && LJ_32 && LJ_HASFFI
686 else if (irt_isfloat(ir->t)) {
687 if (st == IRT_NUM) {
688 split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
689 J->cur.nins--; /* Drop unused HIOP. */
690 } else {
691 nir->o = IR_CALLN;
692 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
693 }
694 } else if (st == IRT_FLOAT) {
695 nir->o = IR_CALLN;
696 nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
697 } else
698#endif
699#if LJ_SOFTFP
700 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
701 if (irt_isguard(ir->t)) {
702 lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types");
703 J->cur.nins--;
704 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
705 } else {
706 split_call_l(J, hisubst, oir, ir,
707#if LJ_32 && LJ_HASFFI
708 st == IRT_NUM ?
709 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
710 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
711#else
712 IRCALL_softfp_d2i
713#endif
714 );
715 J->cur.nins--; /* Drop unused HIOP. */
716 }
717 }
718#endif
719 } else if (ir->o == IR_CALLXS) {
720 IRRef hiref;
721 split_call:
722 hiref = hisubst[ir->op1];
723 if (hiref) {
724 IROpT ot = nir->ot;
725 IRRef op2 = nir->op2;
726 nir->ot = IRT(IR_CARG, IRT_NIL);
727#if LJ_LE
728 nir->op2 = hiref;
729#else
730 nir->op2 = nir->op1; nir->op1 = hiref;
731#endif
732 ir->prev = nref = split_emit(J, ot, nref, op2);
733 }
734 if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
735 hi = split_emit(J,
736 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
737 nref, nref);
738 } else if (ir->o == IR_CARG) {
739 IRRef hiref = hisubst[ir->op1];
740 if (hiref) {
741 IRRef op2 = nir->op2;
742#if LJ_LE
743 nir->op2 = hiref;
744#else
745 nir->op2 = nir->op1; nir->op1 = hiref;
746#endif
747 ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
748 nir = IR(nref);
749 }
750 hiref = hisubst[ir->op2];
751 if (hiref) {
752#if !LJ_TARGET_X86
753 int carg = 0;
754 IRIns *cir;
755 for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
756 carg++;
757 if ((carg & 1) == 0) { /* Align 64 bit arguments. */
758 IRRef op2 = nir->op2;
759 nir->op2 = REF_NIL;
760 nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
761 nir = IR(nref);
762 }
763#endif
764#if LJ_BE
765 { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
766#endif
767 ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
768 }
769 } else if (ir->o == IR_CNEWI) {
770 if (hisubst[ir->op2])
771 split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
772 } else if (ir->o == IR_LOOP) {
773 J->loopref = nref; /* Needed by assembler. */
774 }
775 hisubst[ref] = hi; /* Store hiword substitution. */
776 }
777 if (snref == nins) { /* Substitution for last snapshot. */
778 snap->ref = J->cur.nins;
779 split_subst_snap(J, snap, oir);
780 }
781
782 /* Add PHI marks. */
783 for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
784 IRIns *ir = IR(ref);
785 if (ir->o != IR_PHI) break;
786 if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
787 if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
788 }
789}
790
791/* Protected callback for split pass. */
792static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
793{
794 jit_State *J = (jit_State *)ud;
795 split_ir(J);
796 UNUSED(L); UNUSED(dummy);
797 return NULL;
798}
799
800#if defined(LUA_USE_ASSERT) || LJ_SOFTFP
801/* Slow, but sure way to check whether a SPLIT pass is needed. */
802static int split_needsplit(jit_State *J)
803{
804 IRIns *ir, *irend;
805 IRRef ref;
806 for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
807 if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
808 return 1;
809 if (LJ_SOFTFP) {
810 for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
811 if ((IR(ref)->op2 & IRSLOAD_CONVERT))
812 return 1;
813 if (J->chain[IR_TOBIT])
814 return 1;
815 }
816 for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
817 IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
818 if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
819 st == IRT_I64 || st == IRT_U64)
820 return 1;
821 }
822 return 0; /* Nope. */
823}
824#endif
825
826/* SPLIT pass. */
827void lj_opt_split(jit_State *J)
828{
829#if LJ_SOFTFP
830 if (!J->needsplit)
831 J->needsplit = split_needsplit(J);
832#else
833 lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state");
834#endif
835 if (J->needsplit) {
836 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
837 if (errcode) {
838 /* Completely reset the trace to avoid inconsistent dump on abort. */
839 J->cur.nins = J->cur.nk = REF_BASE;
840 J->cur.nsnap = 0;
841 lj_err_throw(J->L, errcode); /* Propagate errors. */
842 }
843 }
844}
845
846#undef IR
847
848#endif
849