| 1 | /* |
| 2 | ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. |
| 3 | ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h |
| 4 | */ |
| 5 | |
| 6 | #define lj_opt_split_c |
| 7 | #define LUA_CORE |
| 8 | |
| 9 | #include "lj_obj.h" |
| 10 | |
| 11 | #if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) |
| 12 | |
| 13 | #include "lj_err.h" |
| 14 | #include "lj_buf.h" |
| 15 | #include "lj_ir.h" |
| 16 | #include "lj_jit.h" |
| 17 | #include "lj_ircall.h" |
| 18 | #include "lj_iropt.h" |
| 19 | #include "lj_dispatch.h" |
| 20 | #include "lj_vm.h" |
| 21 | |
| 22 | /* SPLIT pass: |
| 23 | ** |
| 24 | ** This pass splits up 64 bit IR instructions into multiple 32 bit IR |
| 25 | ** instructions. It's only active for soft-float targets or for 32 bit CPUs |
| 26 | ** which lack native 64 bit integer operations (the FFI is currently the |
| 27 | ** only emitter for 64 bit integer instructions). |
| 28 | ** |
| 29 | ** Splitting the IR in a separate pass keeps each 32 bit IR assembler |
| 30 | ** backend simple. Only a small amount of extra functionality needs to be |
| 31 | ** implemented. This is much easier than adding support for allocating |
| 32 | ** register pairs to each backend (believe me, I tried). A few simple, but |
| 33 | ** important optimizations can be performed by the SPLIT pass, which would |
| 34 | ** be tedious to do in the backend. |
| 35 | ** |
| 36 | ** The basic idea is to replace each 64 bit IR instruction with its 32 bit |
| 37 | ** equivalent plus an extra HIOP instruction. The splitted IR is not passed |
| 38 | ** through FOLD or any other optimizations, so each HIOP is guaranteed to |
| 39 | ** immediately follow it's counterpart. The actual functionality of HIOP is |
| 40 | ** inferred from the previous instruction. |
| 41 | ** |
| 42 | ** The operands of HIOP hold the hiword input references. The output of HIOP |
| 43 | ** is the hiword output reference, which is also used to hold the hiword |
| 44 | ** register or spill slot information. The register allocator treats this |
| 45 | ** instruction independently of any other instruction, which improves code |
| 46 | ** quality compared to using fixed register pairs. |
| 47 | ** |
| 48 | ** It's easier to split up some instructions into two regular 32 bit |
| 49 | ** instructions. E.g. XLOAD is split up into two XLOADs with two different |
| 50 | ** addresses. Obviously 64 bit constants need to be split up into two 32 bit |
| 51 | ** constants, too. Some hiword instructions can be entirely omitted, e.g. |
| 52 | ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls |
| 53 | ** are split up into two 32 bit arguments each. |
| 54 | ** |
| 55 | ** On soft-float targets, floating-point instructions are directly converted |
| 56 | ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX). |
| 57 | ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump). |
| 58 | ** |
| 59 | ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with |
| 60 | ** two int64_t fields: |
| 61 | ** |
| 62 | ** 0100 p32 ADD base +8 |
| 63 | ** 0101 i64 XLOAD 0100 |
| 64 | ** 0102 i64 ADD 0101 +1 |
| 65 | ** 0103 p32 ADD base +16 |
| 66 | ** 0104 i64 XSTORE 0103 0102 |
| 67 | ** |
| 68 | ** mov rax, [esi+0x8] |
| 69 | ** add rax, +0x01 |
| 70 | ** mov [esi+0x10], rax |
| 71 | ** |
| 72 | ** Here's the transformed IR and the x86 machine code after the SPLIT pass: |
| 73 | ** |
| 74 | ** 0100 p32 ADD base +8 |
| 75 | ** 0101 int XLOAD 0100 |
| 76 | ** 0102 p32 ADD base +12 |
| 77 | ** 0103 int XLOAD 0102 |
| 78 | ** 0104 int ADD 0101 +1 |
| 79 | ** 0105 int HIOP 0103 +0 |
| 80 | ** 0106 p32 ADD base +16 |
| 81 | ** 0107 int XSTORE 0106 0104 |
| 82 | ** 0108 int HIOP 0106 0105 |
| 83 | ** |
| 84 | ** mov eax, [esi+0x8] |
| 85 | ** mov ecx, [esi+0xc] |
| 86 | ** add eax, +0x01 |
| 87 | ** adc ecx, +0x00 |
| 88 | ** mov [esi+0x10], eax |
| 89 | ** mov [esi+0x14], ecx |
| 90 | ** |
| 91 | ** You may notice the reassociated hiword address computation, which is |
| 92 | ** later fused into the mov operands by the assembler. |
| 93 | */ |
| 94 | |
| 95 | /* Some local macros to save typing. Undef'd at the end. */ |
| 96 | #define IR(ref) (&J->cur.ir[(ref)]) |
| 97 | |
| 98 | /* Directly emit the transformed IR without updating chains etc. */ |
| 99 | static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2) |
| 100 | { |
| 101 | IRRef nref = lj_ir_nextins(J); |
| 102 | IRIns *ir = IR(nref); |
| 103 | ir->ot = ot; |
| 104 | ir->op1 = op1; |
| 105 | ir->op2 = op2; |
| 106 | return nref; |
| 107 | } |
| 108 | |
| 109 | #if LJ_SOFTFP |
| 110 | /* Emit a (checked) number to integer conversion. */ |
| 111 | static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check) |
| 112 | { |
| 113 | IRRef tmp, res; |
| 114 | #if LJ_LE |
| 115 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi); |
| 116 | #else |
| 117 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo); |
| 118 | #endif |
| 119 | res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i); |
| 120 | if (check) { |
| 121 | tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d); |
| 122 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
| 123 | split_emit(J, IRTGI(IR_EQ), tmp, lo); |
| 124 | split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi); |
| 125 | } |
| 126 | return res; |
| 127 | } |
| 128 | |
| 129 | /* Emit a CALLN with one split 64 bit argument. */ |
| 130 | static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
| 131 | IRIns *ir, IRCallID id) |
| 132 | { |
| 133 | IRRef tmp, op1 = ir->op1; |
| 134 | J->cur.nins--; |
| 135 | #if LJ_LE |
| 136 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
| 137 | #else |
| 138 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
| 139 | #endif |
| 140 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
| 141 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
| 142 | } |
| 143 | #endif |
| 144 | |
| 145 | /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ |
| 146 | static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
| 147 | IRIns *ir, IRCallID id) |
| 148 | { |
| 149 | IRRef tmp, op1 = ir->op1, op2 = ir->op2; |
| 150 | J->cur.nins--; |
| 151 | #if LJ_LE |
| 152 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
| 153 | #else |
| 154 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
| 155 | #endif |
| 156 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); |
| 157 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
| 158 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); |
| 159 | } |
| 160 | |
| 161 | /* Emit a CALLN with two split 64 bit arguments. */ |
| 162 | static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, |
| 163 | IRIns *ir, IRCallID id) |
| 164 | { |
| 165 | IRRef tmp, op1 = ir->op1, op2 = ir->op2; |
| 166 | J->cur.nins--; |
| 167 | #if LJ_LE |
| 168 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
| 169 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); |
| 170 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); |
| 171 | #else |
| 172 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
| 173 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); |
| 174 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); |
| 175 | #endif |
| 176 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); |
| 177 | return split_emit(J, |
| 178 | IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), |
| 179 | tmp, tmp); |
| 180 | } |
| 181 | |
| 182 | /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ |
| 183 | static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) |
| 184 | { |
| 185 | IRRef nref = oir[ref].prev; |
| 186 | IRIns *ir = IR(nref); |
| 187 | int32_t ofs = 4; |
| 188 | if (ir->o == IR_KPTR) |
| 189 | return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs); |
| 190 | if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) { |
| 191 | /* Reassociate address. */ |
| 192 | ofs += IR(ir->op2)->i; |
| 193 | nref = ir->op1; |
| 194 | if (ofs == 0) return nref; |
| 195 | } |
| 196 | return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs)); |
| 197 | } |
| 198 | |
| 199 | #if LJ_HASFFI |
| 200 | static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, |
| 201 | IRIns *oir, IRIns *nir, IRIns *ir) |
| 202 | { |
| 203 | IROp op = ir->o; |
| 204 | IRRef kref = nir->op2; |
| 205 | if (irref_isk(kref)) { /* Optimize constant shifts. */ |
| 206 | int32_t k = (IR(kref)->i & 63); |
| 207 | IRRef lo = nir->op1, hi = hisubst[ir->op1]; |
| 208 | if (op == IR_BROL || op == IR_BROR) { |
| 209 | if (op == IR_BROR) k = (-k & 63); |
| 210 | if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; } |
| 211 | if (k == 0) { |
| 212 | passthrough: |
| 213 | J->cur.nins--; |
| 214 | ir->prev = lo; |
| 215 | return hi; |
| 216 | } else { |
| 217 | TRef k1, k2; |
| 218 | IRRef t1, t2, t3, t4; |
| 219 | J->cur.nins--; |
| 220 | k1 = lj_ir_kint(J, k); |
| 221 | k2 = lj_ir_kint(J, (-k & 31)); |
| 222 | t1 = split_emit(J, IRTI(IR_BSHL), lo, k1); |
| 223 | t2 = split_emit(J, IRTI(IR_BSHL), hi, k1); |
| 224 | t3 = split_emit(J, IRTI(IR_BSHR), lo, k2); |
| 225 | t4 = split_emit(J, IRTI(IR_BSHR), hi, k2); |
| 226 | ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4); |
| 227 | return split_emit(J, IRTI(IR_BOR), t2, t3); |
| 228 | } |
| 229 | } else if (k == 0) { |
| 230 | goto passthrough; |
| 231 | } else if (k < 32) { |
| 232 | if (op == IR_BSHL) { |
| 233 | IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref); |
| 234 | IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31))); |
| 235 | return split_emit(J, IRTI(IR_BOR), t1, t2); |
| 236 | } else { |
| 237 | IRRef t1 = ir->prev, t2; |
| 238 | lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage" ); |
| 239 | nir->o = IR_BSHR; |
| 240 | t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); |
| 241 | ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); |
| 242 | return split_emit(J, IRTI(op), hi, kref); |
| 243 | } |
| 244 | } else { |
| 245 | if (op == IR_BSHL) { |
| 246 | if (k == 32) |
| 247 | J->cur.nins--; |
| 248 | else |
| 249 | lo = ir->prev; |
| 250 | ir->prev = lj_ir_kint(J, 0); |
| 251 | return lo; |
| 252 | } else { |
| 253 | lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage" ); |
| 254 | if (k == 32) { |
| 255 | J->cur.nins--; |
| 256 | ir->prev = hi; |
| 257 | } else { |
| 258 | nir->op1 = hi; |
| 259 | } |
| 260 | if (op == IR_BSHR) |
| 261 | return lj_ir_kint(J, 0); |
| 262 | else |
| 263 | return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31)); |
| 264 | } |
| 265 | } |
| 266 | } |
| 267 | return split_call_li(J, hisubst, oir, ir, |
| 268 | op - IR_BSHL + IRCALL_lj_carith_shl64); |
| 269 | } |
| 270 | |
| 271 | static IRRef split_bitop(jit_State *J, IRRef1 *hisubst, |
| 272 | IRIns *nir, IRIns *ir) |
| 273 | { |
| 274 | IROp op = ir->o; |
| 275 | IRRef hi, kref = nir->op2; |
| 276 | if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */ |
| 277 | int32_t k = IR(kref)->i; |
| 278 | if (k == 0 || k == -1) { |
| 279 | if (op == IR_BAND) k = ~k; |
| 280 | if (k == 0) { |
| 281 | J->cur.nins--; |
| 282 | ir->prev = nir->op1; |
| 283 | } else if (op == IR_BXOR) { |
| 284 | nir->o = IR_BNOT; |
| 285 | nir->op2 = 0; |
| 286 | } else { |
| 287 | J->cur.nins--; |
| 288 | ir->prev = kref; |
| 289 | } |
| 290 | } |
| 291 | } |
| 292 | hi = hisubst[ir->op1]; |
| 293 | kref = hisubst[ir->op2]; |
| 294 | if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */ |
| 295 | int32_t k = IR(kref)->i; |
| 296 | if (k == 0 || k == -1) { |
| 297 | if (op == IR_BAND) k = ~k; |
| 298 | if (k == 0) { |
| 299 | return hi; |
| 300 | } else if (op == IR_BXOR) { |
| 301 | return split_emit(J, IRTI(IR_BNOT), hi, 0); |
| 302 | } else { |
| 303 | return kref; |
| 304 | } |
| 305 | } |
| 306 | } |
| 307 | return split_emit(J, IRTI(op), hi, kref); |
| 308 | } |
| 309 | #endif |
| 310 | |
| 311 | /* Substitute references of a snapshot. */ |
| 312 | static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) |
| 313 | { |
| 314 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; |
| 315 | MSize n, nent = snap->nent; |
| 316 | for (n = 0; n < nent; n++) { |
| 317 | SnapEntry sn = map[n]; |
| 318 | IRIns *ir = &oir[snap_ref(sn)]; |
| 319 | if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn)))) |
| 320 | map[n] = ((sn & 0xffff0000) | ir->prev); |
| 321 | } |
| 322 | } |
| 323 | |
| 324 | /* Transform the old IR to the new IR. */ |
| 325 | static void split_ir(jit_State *J) |
| 326 | { |
| 327 | IRRef nins = J->cur.nins, nk = J->cur.nk; |
| 328 | MSize irlen = nins - nk; |
| 329 | MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); |
| 330 | IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need); |
| 331 | IRRef1 *hisubst; |
| 332 | IRRef ref, snref; |
| 333 | SnapShot *snap; |
| 334 | |
| 335 | /* Copy old IR to buffer. */ |
| 336 | memcpy(oir, IR(nk), irlen*sizeof(IRIns)); |
| 337 | /* Bias hiword substitution table and old IR. Loword kept in field prev. */ |
| 338 | hisubst = (IRRef1 *)&oir[irlen] - nk; |
| 339 | oir -= nk; |
| 340 | |
| 341 | /* Remove all IR instructions, but retain IR constants. */ |
| 342 | J->cur.nins = REF_FIRST; |
| 343 | J->loopref = 0; |
| 344 | |
| 345 | /* Process constants and fixed references. */ |
| 346 | for (ref = nk; ref <= REF_BASE; ref++) { |
| 347 | IRIns *ir = &oir[ref]; |
| 348 | if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { |
| 349 | /* Split up 64 bit constant. */ |
| 350 | TValue tv = *ir_k64(ir); |
| 351 | ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); |
| 352 | hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); |
| 353 | } else { |
| 354 | ir->prev = ref; /* Identity substitution for loword. */ |
| 355 | hisubst[ref] = 0; |
| 356 | } |
| 357 | if (irt_is64(ir->t) && ir->o != IR_KNULL) |
| 358 | ref++; |
| 359 | } |
| 360 | |
| 361 | /* Process old IR instructions. */ |
| 362 | snap = J->cur.snap; |
| 363 | snref = snap->ref; |
| 364 | for (ref = REF_FIRST; ref < nins; ref++) { |
| 365 | IRIns *ir = &oir[ref]; |
| 366 | IRRef nref = lj_ir_nextins(J); |
| 367 | IRIns *nir = IR(nref); |
| 368 | IRRef hi = 0; |
| 369 | |
| 370 | if (ref >= snref) { |
| 371 | snap->ref = nref; |
| 372 | split_subst_snap(J, snap++, oir); |
| 373 | snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; |
| 374 | } |
| 375 | |
| 376 | /* Copy-substitute old instruction to new instruction. */ |
| 377 | nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; |
| 378 | nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; |
| 379 | ir->prev = nref; /* Loword substitution. */ |
| 380 | nir->o = ir->o; |
| 381 | nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); |
| 382 | hisubst[ref] = 0; |
| 383 | |
| 384 | /* Split 64 bit instructions. */ |
| 385 | #if LJ_SOFTFP |
| 386 | if (irt_isnum(ir->t)) { |
| 387 | nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ |
| 388 | /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ |
| 389 | switch (ir->o) { |
| 390 | case IR_ADD: |
| 391 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); |
| 392 | break; |
| 393 | case IR_SUB: |
| 394 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); |
| 395 | break; |
| 396 | case IR_MUL: |
| 397 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); |
| 398 | break; |
| 399 | case IR_DIV: |
| 400 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); |
| 401 | break; |
| 402 | case IR_POW: |
| 403 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); |
| 404 | break; |
| 405 | case IR_FPMATH: |
| 406 | hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); |
| 407 | break; |
| 408 | case IR_LDEXP: |
| 409 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); |
| 410 | break; |
| 411 | case IR_NEG: case IR_ABS: |
| 412 | nir->o = IR_CONV; /* Pass through loword. */ |
| 413 | nir->op2 = (IRT_INT << 5) | IRT_INT; |
| 414 | hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), |
| 415 | hisubst[ir->op1], |
| 416 | lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG)))); |
| 417 | break; |
| 418 | case IR_SLOAD: |
| 419 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ |
| 420 | nir->op2 &= ~IRSLOAD_CONVERT; |
| 421 | ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, |
| 422 | IRCALL_softfp_i2d); |
| 423 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
| 424 | break; |
| 425 | } |
| 426 | /* fallthrough */ |
| 427 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: |
| 428 | case IR_STRTO: |
| 429 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
| 430 | break; |
| 431 | case IR_FLOAD: |
| 432 | lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State" ); |
| 433 | hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); |
| 434 | nir->op2 += LJ_BE*4; |
| 435 | break; |
| 436 | case IR_XLOAD: { |
| 437 | IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ |
| 438 | J->cur.nins--; |
| 439 | hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ |
| 440 | #if LJ_BE |
| 441 | hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2); |
| 442 | inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD); |
| 443 | #endif |
| 444 | nref = lj_ir_nextins(J); |
| 445 | nir = IR(nref); |
| 446 | *nir = inslo; /* Re-emit lo XLOAD. */ |
| 447 | #if LJ_LE |
| 448 | hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); |
| 449 | ir->prev = nref; |
| 450 | #else |
| 451 | ir->prev = hi; hi = nref; |
| 452 | #endif |
| 453 | break; |
| 454 | } |
| 455 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: |
| 456 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); |
| 457 | break; |
| 458 | case IR_CONV: { /* Conversion to number. Others handled below. */ |
| 459 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
| 460 | UNUSED(st); |
| 461 | #if LJ_32 && LJ_HASFFI |
| 462 | if (st == IRT_I64 || st == IRT_U64) { |
| 463 | hi = split_call_l(J, hisubst, oir, ir, |
| 464 | st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); |
| 465 | break; |
| 466 | } |
| 467 | #endif |
| 468 | lj_assertJ(st == IRT_INT || |
| 469 | (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)), |
| 470 | "bad source type for CONV" ); |
| 471 | nir->o = IR_CALLN; |
| 472 | #if LJ_32 && LJ_HASFFI |
| 473 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : |
| 474 | st == IRT_FLOAT ? IRCALL_softfp_f2d : |
| 475 | IRCALL_softfp_ui2d; |
| 476 | #else |
| 477 | nir->op2 = IRCALL_softfp_i2d; |
| 478 | #endif |
| 479 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
| 480 | break; |
| 481 | } |
| 482 | case IR_CALLN: |
| 483 | case IR_CALLL: |
| 484 | case IR_CALLS: |
| 485 | case IR_CALLXS: |
| 486 | goto split_call; |
| 487 | case IR_PHI: |
| 488 | if (nir->op1 == nir->op2) |
| 489 | J->cur.nins--; /* Drop useless PHIs. */ |
| 490 | if (hisubst[ir->op1] != hisubst[ir->op2]) |
| 491 | split_emit(J, IRT(IR_PHI, IRT_SOFTFP), |
| 492 | hisubst[ir->op1], hisubst[ir->op2]); |
| 493 | break; |
| 494 | case IR_HIOP: |
| 495 | J->cur.nins--; /* Drop joining HIOP. */ |
| 496 | ir->prev = nir->op1; |
| 497 | hi = nir->op2; |
| 498 | break; |
| 499 | default: |
| 500 | lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX, |
| 501 | "bad IR op %d" , ir->o); |
| 502 | hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), |
| 503 | hisubst[ir->op1], hisubst[ir->op2]); |
| 504 | break; |
| 505 | } |
| 506 | } else |
| 507 | #endif |
| 508 | #if LJ_32 && LJ_HASFFI |
| 509 | if (irt_isint64(ir->t)) { |
| 510 | IRRef hiref = hisubst[ir->op1]; |
| 511 | nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ |
| 512 | switch (ir->o) { |
| 513 | case IR_ADD: |
| 514 | case IR_SUB: |
| 515 | /* Use plain op for hiword if loword cannot produce a carry/borrow. */ |
| 516 | if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { |
| 517 | ir->prev = nir->op1; /* Pass through loword. */ |
| 518 | nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; |
| 519 | hi = nref; |
| 520 | break; |
| 521 | } |
| 522 | /* fallthrough */ |
| 523 | case IR_NEG: |
| 524 | hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); |
| 525 | break; |
| 526 | case IR_MUL: |
| 527 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); |
| 528 | break; |
| 529 | case IR_DIV: |
| 530 | hi = split_call_ll(J, hisubst, oir, ir, |
| 531 | irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : |
| 532 | IRCALL_lj_carith_divu64); |
| 533 | break; |
| 534 | case IR_MOD: |
| 535 | hi = split_call_ll(J, hisubst, oir, ir, |
| 536 | irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : |
| 537 | IRCALL_lj_carith_modu64); |
| 538 | break; |
| 539 | case IR_POW: |
| 540 | hi = split_call_ll(J, hisubst, oir, ir, |
| 541 | irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : |
| 542 | IRCALL_lj_carith_powu64); |
| 543 | break; |
| 544 | case IR_BNOT: |
| 545 | hi = split_emit(J, IRTI(IR_BNOT), hiref, 0); |
| 546 | break; |
| 547 | case IR_BSWAP: |
| 548 | ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0); |
| 549 | hi = nref; |
| 550 | break; |
| 551 | case IR_BAND: case IR_BOR: case IR_BXOR: |
| 552 | hi = split_bitop(J, hisubst, nir, ir); |
| 553 | break; |
| 554 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: |
| 555 | hi = split_bitshift(J, hisubst, oir, nir, ir); |
| 556 | break; |
| 557 | case IR_FLOAD: |
| 558 | lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported" ); |
| 559 | hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); |
| 560 | #if LJ_BE |
| 561 | ir->prev = hi; hi = nref; |
| 562 | #endif |
| 563 | break; |
| 564 | case IR_XLOAD: |
| 565 | hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); |
| 566 | #if LJ_BE |
| 567 | ir->prev = hi; hi = nref; |
| 568 | #endif |
| 569 | break; |
| 570 | case IR_XSTORE: |
| 571 | split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); |
| 572 | break; |
| 573 | case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ |
| 574 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
| 575 | #if LJ_SOFTFP |
| 576 | if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ |
| 577 | hi = split_call_l(J, hisubst, oir, ir, |
| 578 | irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); |
| 579 | } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ |
| 580 | nir->o = IR_CALLN; |
| 581 | nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; |
| 582 | hi = split_emit(J, IRTI(IR_HIOP), nref, nref); |
| 583 | } |
| 584 | #else |
| 585 | if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ |
| 586 | hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); |
| 587 | } |
| 588 | #endif |
| 589 | else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ |
| 590 | /* Drop cast, since assembler doesn't care. But fwd both parts. */ |
| 591 | hi = hiref; |
| 592 | goto fwdlo; |
| 593 | } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ |
| 594 | IRRef k31 = lj_ir_kint(J, 31); |
| 595 | nir = IR(nref); /* May have been reallocated. */ |
| 596 | ir->prev = nir->op1; /* Pass through loword. */ |
| 597 | nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ |
| 598 | nir->op2 = k31; |
| 599 | hi = nref; |
| 600 | } else { /* Zero-extend to 64 bit. */ |
| 601 | hi = lj_ir_kint(J, 0); |
| 602 | goto fwdlo; |
| 603 | } |
| 604 | break; |
| 605 | } |
| 606 | case IR_CALLXS: |
| 607 | goto split_call; |
| 608 | case IR_PHI: { |
| 609 | IRRef hiref2; |
| 610 | if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || |
| 611 | nir->op1 == nir->op2) |
| 612 | J->cur.nins--; /* Drop useless PHIs. */ |
| 613 | hiref2 = hisubst[ir->op2]; |
| 614 | if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) |
| 615 | split_emit(J, IRTI(IR_PHI), hiref, hiref2); |
| 616 | break; |
| 617 | } |
| 618 | case IR_HIOP: |
| 619 | J->cur.nins--; /* Drop joining HIOP. */ |
| 620 | ir->prev = nir->op1; |
| 621 | hi = nir->op2; |
| 622 | break; |
| 623 | default: |
| 624 | lj_assertJ(ir->o <= IR_NE, "bad IR op %d" , ir->o); /* Comparisons. */ |
| 625 | split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); |
| 626 | break; |
| 627 | } |
| 628 | } else |
| 629 | #endif |
| 630 | #if LJ_SOFTFP |
| 631 | if (ir->o == IR_SLOAD) { |
| 632 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ |
| 633 | nir->op2 &= ~IRSLOAD_CONVERT; |
| 634 | if (!(nir->op2 & IRSLOAD_TYPECHECK)) |
| 635 | nir->t.irt = IRT_INT; /* Drop guard. */ |
| 636 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); |
| 637 | ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); |
| 638 | } |
| 639 | } else if (ir->o == IR_TOBIT) { |
| 640 | IRRef tmp, op1 = ir->op1; |
| 641 | J->cur.nins--; |
| 642 | #if LJ_LE |
| 643 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); |
| 644 | #else |
| 645 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); |
| 646 | #endif |
| 647 | ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); |
| 648 | } else if (ir->o == IR_TOSTR) { |
| 649 | if (hisubst[ir->op1]) { |
| 650 | if (irref_isk(ir->op1)) |
| 651 | nir->op1 = ir->op1; |
| 652 | else |
| 653 | split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); |
| 654 | } |
| 655 | } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { |
| 656 | if (irref_isk(ir->op2) && hisubst[ir->op2]) |
| 657 | nir->op2 = ir->op2; |
| 658 | } else |
| 659 | #endif |
| 660 | if (ir->o == IR_CONV) { /* See above, too. */ |
| 661 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
| 662 | #if LJ_32 && LJ_HASFFI |
| 663 | if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ |
| 664 | #if LJ_SOFTFP |
| 665 | if (irt_isfloat(ir->t)) { |
| 666 | split_call_l(J, hisubst, oir, ir, |
| 667 | st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); |
| 668 | J->cur.nins--; /* Drop unused HIOP. */ |
| 669 | } |
| 670 | #else |
| 671 | if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ |
| 672 | ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), |
| 673 | hisubst[ir->op1], nref); |
| 674 | } |
| 675 | #endif |
| 676 | else { /* Truncate to lower 32 bits. */ |
| 677 | fwdlo: |
| 678 | ir->prev = nir->op1; /* Forward loword. */ |
| 679 | /* Replace with NOP to avoid messing up the snapshot logic. */ |
| 680 | nir->ot = IRT(IR_NOP, IRT_NIL); |
| 681 | nir->op1 = nir->op2 = 0; |
| 682 | } |
| 683 | } |
| 684 | #endif |
| 685 | #if LJ_SOFTFP && LJ_32 && LJ_HASFFI |
| 686 | else if (irt_isfloat(ir->t)) { |
| 687 | if (st == IRT_NUM) { |
| 688 | split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); |
| 689 | J->cur.nins--; /* Drop unused HIOP. */ |
| 690 | } else { |
| 691 | nir->o = IR_CALLN; |
| 692 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; |
| 693 | } |
| 694 | } else if (st == IRT_FLOAT) { |
| 695 | nir->o = IR_CALLN; |
| 696 | nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; |
| 697 | } else |
| 698 | #endif |
| 699 | #if LJ_SOFTFP |
| 700 | if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { |
| 701 | if (irt_isguard(ir->t)) { |
| 702 | lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types" ); |
| 703 | J->cur.nins--; |
| 704 | ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); |
| 705 | } else { |
| 706 | split_call_l(J, hisubst, oir, ir, |
| 707 | #if LJ_32 && LJ_HASFFI |
| 708 | st == IRT_NUM ? |
| 709 | (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : |
| 710 | (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) |
| 711 | #else |
| 712 | IRCALL_softfp_d2i |
| 713 | #endif |
| 714 | ); |
| 715 | J->cur.nins--; /* Drop unused HIOP. */ |
| 716 | } |
| 717 | } |
| 718 | #endif |
| 719 | } else if (ir->o == IR_CALLXS) { |
| 720 | IRRef hiref; |
| 721 | split_call: |
| 722 | hiref = hisubst[ir->op1]; |
| 723 | if (hiref) { |
| 724 | IROpT ot = nir->ot; |
| 725 | IRRef op2 = nir->op2; |
| 726 | nir->ot = IRT(IR_CARG, IRT_NIL); |
| 727 | #if LJ_LE |
| 728 | nir->op2 = hiref; |
| 729 | #else |
| 730 | nir->op2 = nir->op1; nir->op1 = hiref; |
| 731 | #endif |
| 732 | ir->prev = nref = split_emit(J, ot, nref, op2); |
| 733 | } |
| 734 | if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) |
| 735 | hi = split_emit(J, |
| 736 | IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), |
| 737 | nref, nref); |
| 738 | } else if (ir->o == IR_CARG) { |
| 739 | IRRef hiref = hisubst[ir->op1]; |
| 740 | if (hiref) { |
| 741 | IRRef op2 = nir->op2; |
| 742 | #if LJ_LE |
| 743 | nir->op2 = hiref; |
| 744 | #else |
| 745 | nir->op2 = nir->op1; nir->op1 = hiref; |
| 746 | #endif |
| 747 | ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); |
| 748 | nir = IR(nref); |
| 749 | } |
| 750 | hiref = hisubst[ir->op2]; |
| 751 | if (hiref) { |
| 752 | #if !LJ_TARGET_X86 |
| 753 | int carg = 0; |
| 754 | IRIns *cir; |
| 755 | for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) |
| 756 | carg++; |
| 757 | if ((carg & 1) == 0) { /* Align 64 bit arguments. */ |
| 758 | IRRef op2 = nir->op2; |
| 759 | nir->op2 = REF_NIL; |
| 760 | nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); |
| 761 | nir = IR(nref); |
| 762 | } |
| 763 | #endif |
| 764 | #if LJ_BE |
| 765 | { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } |
| 766 | #endif |
| 767 | ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); |
| 768 | } |
| 769 | } else if (ir->o == IR_CNEWI) { |
| 770 | if (hisubst[ir->op2]) |
| 771 | split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); |
| 772 | } else if (ir->o == IR_LOOP) { |
| 773 | J->loopref = nref; /* Needed by assembler. */ |
| 774 | } |
| 775 | hisubst[ref] = hi; /* Store hiword substitution. */ |
| 776 | } |
| 777 | if (snref == nins) { /* Substitution for last snapshot. */ |
| 778 | snap->ref = J->cur.nins; |
| 779 | split_subst_snap(J, snap, oir); |
| 780 | } |
| 781 | |
| 782 | /* Add PHI marks. */ |
| 783 | for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { |
| 784 | IRIns *ir = IR(ref); |
| 785 | if (ir->o != IR_PHI) break; |
| 786 | if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); |
| 787 | if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); |
| 788 | } |
| 789 | } |
| 790 | |
| 791 | /* Protected callback for split pass. */ |
| 792 | static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud) |
| 793 | { |
| 794 | jit_State *J = (jit_State *)ud; |
| 795 | split_ir(J); |
| 796 | UNUSED(L); UNUSED(dummy); |
| 797 | return NULL; |
| 798 | } |
| 799 | |
| 800 | #if defined(LUA_USE_ASSERT) || LJ_SOFTFP |
| 801 | /* Slow, but sure way to check whether a SPLIT pass is needed. */ |
| 802 | static int split_needsplit(jit_State *J) |
| 803 | { |
| 804 | IRIns *ir, *irend; |
| 805 | IRRef ref; |
| 806 | for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) |
| 807 | if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t)) |
| 808 | return 1; |
| 809 | if (LJ_SOFTFP) { |
| 810 | for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev) |
| 811 | if ((IR(ref)->op2 & IRSLOAD_CONVERT)) |
| 812 | return 1; |
| 813 | if (J->chain[IR_TOBIT]) |
| 814 | return 1; |
| 815 | } |
| 816 | for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) { |
| 817 | IRType st = (IR(ref)->op2 & IRCONV_SRCMASK); |
| 818 | if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) || |
| 819 | st == IRT_I64 || st == IRT_U64) |
| 820 | return 1; |
| 821 | } |
| 822 | return 0; /* Nope. */ |
| 823 | } |
| 824 | #endif |
| 825 | |
| 826 | /* SPLIT pass. */ |
| 827 | void lj_opt_split(jit_State *J) |
| 828 | { |
| 829 | #if LJ_SOFTFP |
| 830 | if (!J->needsplit) |
| 831 | J->needsplit = split_needsplit(J); |
| 832 | #else |
| 833 | lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state" ); |
| 834 | #endif |
| 835 | if (J->needsplit) { |
| 836 | int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); |
| 837 | if (errcode) { |
| 838 | /* Completely reset the trace to avoid inconsistent dump on abort. */ |
| 839 | J->cur.nins = J->cur.nk = REF_BASE; |
| 840 | J->cur.nsnap = 0; |
| 841 | lj_err_throw(J->L, errcode); /* Propagate errors. */ |
| 842 | } |
| 843 | } |
| 844 | } |
| 845 | |
| 846 | #undef IR |
| 847 | |
| 848 | #endif |
| 849 | |