| 1 | /* |
| 2 | ** Snapshot handling. |
| 3 | ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h |
| 4 | */ |
| 5 | |
| 6 | #define lj_snap_c |
| 7 | #define LUA_CORE |
| 8 | |
| 9 | #include "lj_obj.h" |
| 10 | |
| 11 | #if LJ_HASJIT |
| 12 | |
| 13 | #include "lj_gc.h" |
| 14 | #include "lj_tab.h" |
| 15 | #include "lj_state.h" |
| 16 | #include "lj_frame.h" |
| 17 | #include "lj_bc.h" |
| 18 | #include "lj_ir.h" |
| 19 | #include "lj_jit.h" |
| 20 | #include "lj_iropt.h" |
| 21 | #include "lj_trace.h" |
| 22 | #include "lj_snap.h" |
| 23 | #include "lj_target.h" |
| 24 | #if LJ_HASFFI |
| 25 | #include "lj_ctype.h" |
| 26 | #include "lj_cdata.h" |
| 27 | #endif |
| 28 | |
| 29 | /* Pass IR on to next optimization in chain (FOLD). */ |
| 30 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) |
| 31 | |
| 32 | /* Emit raw IR without passing through optimizations. */ |
| 33 | #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) |
| 34 | |
| 35 | /* -- Snapshot buffer allocation ------------------------------------------ */ |
| 36 | |
| 37 | /* Grow snapshot buffer. */ |
| 38 | void lj_snap_grow_buf_(jit_State *J, MSize need) |
| 39 | { |
| 40 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; |
| 41 | if (need > maxsnap) |
| 42 | lj_trace_err(J, LJ_TRERR_SNAPOV); |
| 43 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); |
| 44 | J->cur.snap = J->snapbuf; |
| 45 | } |
| 46 | |
| 47 | /* Grow snapshot map buffer. */ |
| 48 | void lj_snap_grow_map_(jit_State *J, MSize need) |
| 49 | { |
| 50 | if (need < 2*J->sizesnapmap) |
| 51 | need = 2*J->sizesnapmap; |
| 52 | else if (need < 64) |
| 53 | need = 64; |
| 54 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, |
| 55 | J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); |
| 56 | J->cur.snapmap = J->snapmapbuf; |
| 57 | J->sizesnapmap = need; |
| 58 | } |
| 59 | |
| 60 | /* -- Snapshot generation ------------------------------------------------- */ |
| 61 | |
| 62 | /* Add all modified slots to the snapshot. */ |
| 63 | static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) |
| 64 | { |
| 65 | IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */ |
| 66 | BCReg s; |
| 67 | MSize n = 0; |
| 68 | for (s = 0; s < nslots; s++) { |
| 69 | TRef tr = J->slot[s]; |
| 70 | IRRef ref = tref_ref(tr); |
| 71 | #if LJ_FR2 |
| 72 | if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */ |
| 73 | if ((tr & TREF_FRAME)) |
| 74 | map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL); |
| 75 | continue; |
| 76 | } |
| 77 | if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { |
| 78 | cTValue *base = J->L->base - J->baseslot; |
| 79 | tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); |
| 80 | ref = tref_ref(tr); |
| 81 | } |
| 82 | #endif |
| 83 | if (ref) { |
| 84 | SnapEntry sn = SNAP_TR(s, tr); |
| 85 | IRIns *ir = &J->cur.ir[ref]; |
| 86 | if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && |
| 87 | ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { |
| 88 | /* |
| 89 | ** No need to snapshot unmodified non-inherited slots. |
| 90 | ** But always snapshot the function below a frame in LJ_FR2 mode. |
| 91 | */ |
| 92 | if (!(ir->op2 & IRSLOAD_INHERIT) && |
| 93 | (!LJ_FR2 || s == 0 || s+1 == nslots || |
| 94 | !(J->slot[s+1] & (TREF_CONT|TREF_FRAME)))) |
| 95 | continue; |
| 96 | /* No need to restore readonly slots and unmodified non-parent slots. */ |
| 97 | if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && |
| 98 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) |
| 99 | sn |= SNAP_NORESTORE; |
| 100 | } |
| 101 | if (LJ_SOFTFP32 && irt_isnum(ir->t)) |
| 102 | sn |= SNAP_SOFTFPNUM; |
| 103 | map[n++] = sn; |
| 104 | } |
| 105 | } |
| 106 | return n; |
| 107 | } |
| 108 | |
| 109 | /* Add frame links at the end of the snapshot. */ |
| 110 | static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) |
| 111 | { |
| 112 | cTValue *frame = J->L->base - 1; |
| 113 | cTValue *lim = J->L->base - J->baseslot + LJ_FR2; |
| 114 | GCfunc *fn = frame_func(frame); |
| 115 | cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; |
| 116 | #if LJ_FR2 |
| 117 | uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); |
| 118 | lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot" ); |
| 119 | memcpy(map, &pcbase, sizeof(uint64_t)); |
| 120 | #else |
| 121 | MSize f = 0; |
| 122 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ |
| 123 | #endif |
| 124 | lj_assertJ(!J->pt || |
| 125 | (J->pc >= proto_bc(J->pt) && |
| 126 | J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC" ); |
| 127 | while (frame > lim) { /* Backwards traversal of all frames above base. */ |
| 128 | if (frame_islua(frame)) { |
| 129 | #if !LJ_FR2 |
| 130 | map[f++] = SNAP_MKPC(frame_pc(frame)); |
| 131 | #endif |
| 132 | frame = frame_prevl(frame); |
| 133 | } else if (frame_iscont(frame)) { |
| 134 | #if !LJ_FR2 |
| 135 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
| 136 | map[f++] = SNAP_MKPC(frame_contpc(frame)); |
| 137 | #endif |
| 138 | frame = frame_prevd(frame); |
| 139 | } else { |
| 140 | lj_assertJ(!frame_isc(frame), "broken frame chain" ); |
| 141 | #if !LJ_FR2 |
| 142 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
| 143 | #endif |
| 144 | frame = frame_prevd(frame); |
| 145 | continue; |
| 146 | } |
| 147 | if (frame + funcproto(frame_func(frame))->framesize > ftop) |
| 148 | ftop = frame + funcproto(frame_func(frame))->framesize; |
| 149 | } |
| 150 | *topslot = (uint8_t)(ftop - lim); |
| 151 | #if LJ_FR2 |
| 152 | lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def" ); |
| 153 | return 2; |
| 154 | #else |
| 155 | lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size" ); |
| 156 | return f; |
| 157 | #endif |
| 158 | } |
| 159 | |
| 160 | /* Take a snapshot of the current stack. */ |
| 161 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) |
| 162 | { |
| 163 | BCReg nslots = J->baseslot + J->maxslot; |
| 164 | MSize nent; |
| 165 | SnapEntry *p; |
| 166 | /* Conservative estimate. */ |
| 167 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); |
| 168 | p = &J->cur.snapmap[nsnapmap]; |
| 169 | nent = snapshot_slots(J, p, nslots); |
| 170 | snap->nent = (uint8_t)nent; |
| 171 | nent += snapshot_framelinks(J, p + nent, &snap->topslot); |
| 172 | snap->mapofs = (uint32_t)nsnapmap; |
| 173 | snap->ref = (IRRef1)J->cur.nins; |
| 174 | snap->mcofs = 0; |
| 175 | snap->nslots = (uint8_t)nslots; |
| 176 | snap->count = 0; |
| 177 | J->cur.nsnapmap = (uint32_t)(nsnapmap + nent); |
| 178 | } |
| 179 | |
| 180 | /* Add or merge a snapshot. */ |
| 181 | void lj_snap_add(jit_State *J) |
| 182 | { |
| 183 | MSize nsnap = J->cur.nsnap; |
| 184 | MSize nsnapmap = J->cur.nsnapmap; |
| 185 | /* Merge if no ins. inbetween or if requested and no guard inbetween. */ |
| 186 | if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) || |
| 187 | (J->mergesnap && !irt_isguard(J->guardemit))) { |
| 188 | if (nsnap == 1) { /* But preserve snap #0 PC. */ |
| 189 | emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); |
| 190 | goto nomerge; |
| 191 | } |
| 192 | nsnapmap = J->cur.snap[--nsnap].mapofs; |
| 193 | } else { |
| 194 | nomerge: |
| 195 | lj_snap_grow_buf(J, nsnap+1); |
| 196 | J->cur.nsnap = (uint16_t)(nsnap+1); |
| 197 | } |
| 198 | J->mergesnap = 0; |
| 199 | J->guardemit.irt = 0; |
| 200 | snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap); |
| 201 | } |
| 202 | |
| 203 | /* -- Snapshot modification ----------------------------------------------- */ |
| 204 | |
| 205 | #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA) |
| 206 | |
| 207 | /* Find unused slots with reaching-definitions bytecode data-flow analysis. */ |
| 208 | static BCReg snap_usedef(jit_State *J, uint8_t *udf, |
| 209 | const BCIns *pc, BCReg maxslot) |
| 210 | { |
| 211 | BCReg s; |
| 212 | GCobj *o; |
| 213 | |
| 214 | if (maxslot == 0) return 0; |
| 215 | #ifdef LUAJIT_USE_VALGRIND |
| 216 | /* Avoid errors for harmless reads beyond maxslot. */ |
| 217 | memset(udf, 1, SNAP_USEDEF_SLOTS); |
| 218 | #else |
| 219 | memset(udf, 1, maxslot); |
| 220 | #endif |
| 221 | |
| 222 | /* Treat open upvalues as used. */ |
| 223 | o = gcref(J->L->openupval); |
| 224 | while (o) { |
| 225 | if (uvval(gco2uv(o)) < J->L->base) break; |
| 226 | udf[uvval(gco2uv(o)) - J->L->base] = 0; |
| 227 | o = gcref(o->gch.nextgc); |
| 228 | } |
| 229 | |
| 230 | #define USE_SLOT(s) udf[(s)] &= ~1 |
| 231 | #define DEF_SLOT(s) udf[(s)] *= 3 |
| 232 | |
| 233 | /* Scan through following bytecode and check for uses/defs. */ |
| 234 | lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, |
| 235 | "snapshot PC out of range" ); |
| 236 | for (;;) { |
| 237 | BCIns ins = *pc++; |
| 238 | BCOp op = bc_op(ins); |
| 239 | switch (bcmode_b(op)) { |
| 240 | case BCMvar: USE_SLOT(bc_b(ins)); break; |
| 241 | default: break; |
| 242 | } |
| 243 | switch (bcmode_c(op)) { |
| 244 | case BCMvar: USE_SLOT(bc_c(ins)); break; |
| 245 | case BCMrbase: |
| 246 | lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase" , op); |
| 247 | for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); |
| 248 | for (; s < maxslot; s++) DEF_SLOT(s); |
| 249 | break; |
| 250 | case BCMjump: |
| 251 | handle_jump: { |
| 252 | BCReg minslot = bc_a(ins); |
| 253 | if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT; |
| 254 | else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1; |
| 255 | else if (op == BC_UCLO) { pc += bc_j(ins); break; } |
| 256 | for (s = minslot; s < maxslot; s++) DEF_SLOT(s); |
| 257 | return minslot < maxslot ? minslot : maxslot; |
| 258 | } |
| 259 | case BCMlit: |
| 260 | if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { |
| 261 | goto handle_jump; |
| 262 | } else if (bc_isret(op)) { |
| 263 | BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1); |
| 264 | for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); |
| 265 | for (; s < top; s++) USE_SLOT(s); |
| 266 | for (; s < maxslot; s++) DEF_SLOT(s); |
| 267 | return 0; |
| 268 | } |
| 269 | break; |
| 270 | case BCMfunc: return maxslot; /* NYI: will abort, anyway. */ |
| 271 | default: break; |
| 272 | } |
| 273 | switch (bcmode_a(op)) { |
| 274 | case BCMvar: USE_SLOT(bc_a(ins)); break; |
| 275 | case BCMdst: |
| 276 | if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins)); |
| 277 | break; |
| 278 | case BCMbase: |
| 279 | if (op >= BC_CALLM && op <= BC_VARG) { |
| 280 | BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? |
| 281 | maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2); |
| 282 | if (LJ_FR2) DEF_SLOT(bc_a(ins)+1); |
| 283 | s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); |
| 284 | for (; s < top; s++) USE_SLOT(s); |
| 285 | for (; s < maxslot; s++) DEF_SLOT(s); |
| 286 | if (op == BC_CALLT || op == BC_CALLMT) { |
| 287 | for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); |
| 288 | return 0; |
| 289 | } |
| 290 | } else if (op == BC_KNIL) { |
| 291 | for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s); |
| 292 | } else if (op == BC_TSETM) { |
| 293 | for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s); |
| 294 | } |
| 295 | break; |
| 296 | default: break; |
| 297 | } |
| 298 | lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, |
| 299 | "use/def analysis PC out of range" ); |
| 300 | } |
| 301 | |
| 302 | #undef USE_SLOT |
| 303 | #undef DEF_SLOT |
| 304 | |
| 305 | return 0; /* unreachable */ |
| 306 | } |
| 307 | |
| 308 | /* Purge dead slots before the next snapshot. */ |
| 309 | void lj_snap_purge(jit_State *J) |
| 310 | { |
| 311 | uint8_t udf[SNAP_USEDEF_SLOTS]; |
| 312 | BCReg maxslot = J->maxslot; |
| 313 | BCReg s = snap_usedef(J, udf, J->pc, maxslot); |
| 314 | for (; s < maxslot; s++) |
| 315 | if (udf[s] != 0) |
| 316 | J->base[s] = 0; /* Purge dead slots. */ |
| 317 | } |
| 318 | |
| 319 | /* Shrink last snapshot. */ |
| 320 | void lj_snap_shrink(jit_State *J) |
| 321 | { |
| 322 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
| 323 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; |
| 324 | MSize n, m, nlim, nent = snap->nent; |
| 325 | uint8_t udf[SNAP_USEDEF_SLOTS]; |
| 326 | BCReg maxslot = J->maxslot; |
| 327 | BCReg baseslot = J->baseslot; |
| 328 | BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); |
| 329 | maxslot += baseslot; |
| 330 | minslot += baseslot; |
| 331 | snap->nslots = (uint8_t)maxslot; |
| 332 | for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */ |
| 333 | BCReg s = snap_slot(map[n]); |
| 334 | if (s < minslot || (s < maxslot && udf[s-baseslot] == 0)) |
| 335 | map[m++] = map[n]; /* Only copy used slots. */ |
| 336 | } |
| 337 | snap->nent = (uint8_t)m; |
| 338 | nlim = J->cur.nsnapmap - snap->mapofs - 1; |
| 339 | while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */ |
| 340 | J->cur.nsnapmap = (uint32_t)(snap->mapofs + m); /* Free up space in map. */ |
| 341 | } |
| 342 | |
| 343 | /* -- Snapshot access ----------------------------------------------------- */ |
| 344 | |
| 345 | /* Initialize a Bloom Filter with all renamed refs. |
| 346 | ** There are very few renames (often none), so the filter has |
| 347 | ** very few bits set. This makes it suitable for negative filtering. |
| 348 | */ |
| 349 | static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim) |
| 350 | { |
| 351 | BloomFilter rfilt = 0; |
| 352 | IRIns *ir; |
| 353 | for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) |
| 354 | if (ir->op2 <= lim) |
| 355 | bloomset(rfilt, ir->op1); |
| 356 | return rfilt; |
| 357 | } |
| 358 | |
| 359 | /* Process matching renames to find the original RegSP. */ |
| 360 | static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) |
| 361 | { |
| 362 | IRIns *ir; |
| 363 | for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) |
| 364 | if (ir->op1 == ref && ir->op2 <= lim) |
| 365 | rs = ir->prev; |
| 366 | return rs; |
| 367 | } |
| 368 | |
| 369 | /* Copy RegSP from parent snapshot to the parent links of the IR. */ |
| 370 | IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir) |
| 371 | { |
| 372 | SnapShot *snap = &T->snap[snapno]; |
| 373 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
| 374 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
| 375 | MSize n = 0; |
| 376 | IRRef ref = 0; |
| 377 | UNUSED(J); |
| 378 | for ( ; ; ir++) { |
| 379 | uint32_t rs; |
| 380 | if (ir->o == IR_SLOAD) { |
| 381 | if (!(ir->op2 & IRSLOAD_PARENT)) break; |
| 382 | for ( ; ; n++) { |
| 383 | lj_assertJ(n < snap->nent, "slot %d not found in snapshot" , ir->op1); |
| 384 | if (snap_slot(map[n]) == ir->op1) { |
| 385 | ref = snap_ref(map[n++]); |
| 386 | break; |
| 387 | } |
| 388 | } |
| 389 | } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) { |
| 390 | ref++; |
| 391 | } else if (ir->o == IR_PVAL) { |
| 392 | ref = ir->op1 + REF_BIAS; |
| 393 | } else { |
| 394 | break; |
| 395 | } |
| 396 | rs = T->ir[ref].prev; |
| 397 | if (bloomtest(rfilt, ref)) |
| 398 | rs = snap_renameref(T, snapno, ref, rs); |
| 399 | ir->prev = (uint16_t)rs; |
| 400 | lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot" , ref - REF_BIAS); |
| 401 | } |
| 402 | return ir; |
| 403 | } |
| 404 | |
| 405 | /* -- Snapshot replay ----------------------------------------------------- */ |
| 406 | |
| 407 | /* Replay constant from parent trace. */ |
| 408 | static TRef snap_replay_const(jit_State *J, IRIns *ir) |
| 409 | { |
| 410 | /* Only have to deal with constants that can occur in stack slots. */ |
| 411 | switch ((IROp)ir->o) { |
| 412 | case IR_KPRI: return TREF_PRI(irt_type(ir->t)); |
| 413 | case IR_KINT: return lj_ir_kint(J, ir->i); |
| 414 | case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); |
| 415 | case IR_KNUM: case IR_KINT64: |
| 416 | return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); |
| 417 | case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ |
| 418 | default: lj_assertJ(0, "bad IR constant op %d" , ir->o); return TREF_NIL; |
| 419 | } |
| 420 | } |
| 421 | |
| 422 | /* De-duplicate parent reference. */ |
| 423 | static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) |
| 424 | { |
| 425 | MSize j; |
| 426 | for (j = 0; j < nmax; j++) |
| 427 | if (snap_ref(map[j]) == ref) |
| 428 | return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); |
| 429 | return 0; |
| 430 | } |
| 431 | |
| 432 | /* Emit parent reference with de-duplication. */ |
| 433 | static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax, |
| 434 | BloomFilter seen, IRRef ref) |
| 435 | { |
| 436 | IRIns *ir = &T->ir[ref]; |
| 437 | TRef tr; |
| 438 | if (irref_isk(ref)) |
| 439 | tr = snap_replay_const(J, ir); |
| 440 | else if (!regsp_used(ir->prev)) |
| 441 | tr = 0; |
| 442 | else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0) |
| 443 | tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0); |
| 444 | return tr; |
| 445 | } |
| 446 | |
| 447 | /* Check whether a sunk store corresponds to an allocation. Slow path. */ |
| 448 | static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs) |
| 449 | { |
| 450 | if (irs->o == IR_ASTORE || irs->o == IR_HSTORE || |
| 451 | irs->o == IR_FSTORE || irs->o == IR_XSTORE) { |
| 452 | IRIns *irk = &T->ir[irs->op1]; |
| 453 | if (irk->o == IR_AREF || irk->o == IR_HREFK) |
| 454 | irk = &T->ir[irk->op1]; |
| 455 | return (&T->ir[irk->op1] == ira); |
| 456 | } |
| 457 | return 0; |
| 458 | } |
| 459 | |
| 460 | /* Check whether a sunk store corresponds to an allocation. Fast path. */ |
| 461 | static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs) |
| 462 | { |
| 463 | if (irs->s != 255) |
| 464 | return (ira + irs->s == irs); /* Fast check. */ |
| 465 | return snap_sunk_store2(T, ira, irs); |
| 466 | } |
| 467 | |
| 468 | /* Replay snapshot state to setup side trace. */ |
| 469 | void lj_snap_replay(jit_State *J, GCtrace *T) |
| 470 | { |
| 471 | SnapShot *snap = &T->snap[J->exitno]; |
| 472 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
| 473 | MSize n, nent = snap->nent; |
| 474 | BloomFilter seen = 0; |
| 475 | int pass23 = 0; |
| 476 | J->framedepth = 0; |
| 477 | /* Emit IR for slots inherited from parent snapshot. */ |
| 478 | for (n = 0; n < nent; n++) { |
| 479 | SnapEntry sn = map[n]; |
| 480 | BCReg s = snap_slot(sn); |
| 481 | IRRef ref = snap_ref(sn); |
| 482 | IRIns *ir = &T->ir[ref]; |
| 483 | TRef tr; |
| 484 | /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ |
| 485 | if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0) |
| 486 | goto setslot; |
| 487 | bloomset(seen, ref); |
| 488 | if (irref_isk(ref)) { |
| 489 | /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */ |
| 490 | if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL))) |
| 491 | tr = 0; |
| 492 | else |
| 493 | tr = snap_replay_const(J, ir); |
| 494 | } else if (!regsp_used(ir->prev)) { |
| 495 | pass23 = 1; |
| 496 | lj_assertJ(s != 0, "unused slot 0 in snapshot" ); |
| 497 | tr = s; |
| 498 | } else { |
| 499 | IRType t = irt_type(ir->t); |
| 500 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; |
| 501 | if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; |
| 502 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); |
| 503 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); |
| 504 | } |
| 505 | setslot: |
| 506 | J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ |
| 507 | J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); |
| 508 | if ((sn & SNAP_FRAME)) |
| 509 | J->baseslot = s+1; |
| 510 | } |
| 511 | if (pass23) { |
| 512 | IRIns *irlast = &T->ir[snap->ref]; |
| 513 | pass23 = 0; |
| 514 | /* Emit dependent PVALs. */ |
| 515 | for (n = 0; n < nent; n++) { |
| 516 | SnapEntry sn = map[n]; |
| 517 | IRRef refp = snap_ref(sn); |
| 518 | IRIns *ir = &T->ir[refp]; |
| 519 | if (regsp_reg(ir->r) == RID_SUNK) { |
| 520 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; |
| 521 | pass23 = 1; |
| 522 | lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || |
| 523 | ir->o == IR_CNEW || ir->o == IR_CNEWI, |
| 524 | "sunk parent IR %04d has bad op %d" , refp - REF_BIAS, ir->o); |
| 525 | if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); |
| 526 | if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); |
| 527 | if (LJ_HASFFI && ir->o == IR_CNEWI) { |
| 528 | if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) |
| 529 | snap_pref(J, T, map, nent, seen, (ir+1)->op2); |
| 530 | } else { |
| 531 | IRIns *irs; |
| 532 | for (irs = ir+1; irs < irlast; irs++) |
| 533 | if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { |
| 534 | if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) |
| 535 | snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); |
| 536 | else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && |
| 537 | irs+1 < irlast && (irs+1)->o == IR_HIOP) |
| 538 | snap_pref(J, T, map, nent, seen, (irs+1)->op2); |
| 539 | } |
| 540 | } |
| 541 | } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { |
| 542 | lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, |
| 543 | "sunk parent IR %04d has bad op %d" , refp - REF_BIAS, ir->o); |
| 544 | J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); |
| 545 | } |
| 546 | } |
| 547 | /* Replay sunk instructions. */ |
| 548 | for (n = 0; pass23 && n < nent; n++) { |
| 549 | SnapEntry sn = map[n]; |
| 550 | IRRef refp = snap_ref(sn); |
| 551 | IRIns *ir = &T->ir[refp]; |
| 552 | if (regsp_reg(ir->r) == RID_SUNK) { |
| 553 | TRef op1, op2; |
| 554 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */ |
| 555 | J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; |
| 556 | continue; |
| 557 | } |
| 558 | op1 = ir->op1; |
| 559 | if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); |
| 560 | op2 = ir->op2; |
| 561 | if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); |
| 562 | if (LJ_HASFFI && ir->o == IR_CNEWI) { |
| 563 | if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { |
| 564 | lj_needsplit(J); /* Emit joining HIOP. */ |
| 565 | op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, |
| 566 | snap_pref(J, T, map, nent, seen, (ir+1)->op2)); |
| 567 | } |
| 568 | J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2); |
| 569 | } else { |
| 570 | IRIns *irs; |
| 571 | TRef tr = emitir(ir->ot, op1, op2); |
| 572 | J->slot[snap_slot(sn)] = tr; |
| 573 | for (irs = ir+1; irs < irlast; irs++) |
| 574 | if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { |
| 575 | IRIns *irr = &T->ir[irs->op1]; |
| 576 | TRef val, key = irr->op2, tmp = tr; |
| 577 | if (irr->o != IR_FREF) { |
| 578 | IRIns *irk = &T->ir[key]; |
| 579 | if (irr->o == IR_HREFK) |
| 580 | key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]), |
| 581 | irk->op2); |
| 582 | else |
| 583 | key = snap_replay_const(J, irk); |
| 584 | if (irr->o == IR_HREFK || irr->o == IR_AREF) { |
| 585 | IRIns *irf = &T->ir[irr->op1]; |
| 586 | tmp = emitir(irf->ot, tmp, irf->op2); |
| 587 | } |
| 588 | } |
| 589 | tmp = emitir(irr->ot, tmp, key); |
| 590 | val = snap_pref(J, T, map, nent, seen, irs->op2); |
| 591 | if (val == 0) { |
| 592 | IRIns *irc = &T->ir[irs->op2]; |
| 593 | lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT, |
| 594 | "sunk store for parent IR %04d with bad op %d" , |
| 595 | refp - REF_BIAS, irc->o); |
| 596 | val = snap_pref(J, T, map, nent, seen, irc->op1); |
| 597 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); |
| 598 | } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && |
| 599 | irs+1 < irlast && (irs+1)->o == IR_HIOP) { |
| 600 | IRType t = IRT_I64; |
| 601 | if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP) |
| 602 | t = IRT_NUM; |
| 603 | lj_needsplit(J); |
| 604 | if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { |
| 605 | uint64_t k = (uint32_t)T->ir[irs->op2].i + |
| 606 | ((uint64_t)T->ir[(irs+1)->op2].i << 32); |
| 607 | val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k); |
| 608 | } else { |
| 609 | val = emitir_raw(IRT(IR_HIOP, t), val, |
| 610 | snap_pref(J, T, map, nent, seen, (irs+1)->op2)); |
| 611 | } |
| 612 | tmp = emitir(IRT(irs->o, t), tmp, val); |
| 613 | continue; |
| 614 | } |
| 615 | tmp = emitir(irs->ot, tmp, val); |
| 616 | } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) { |
| 617 | emitir(IRT(IR_XBAR, IRT_NIL), 0, 0); |
| 618 | } |
| 619 | } |
| 620 | } |
| 621 | } |
| 622 | } |
| 623 | J->base = J->slot + J->baseslot; |
| 624 | J->maxslot = snap->nslots - J->baseslot; |
| 625 | lj_snap_add(J); |
| 626 | if (pass23) /* Need explicit GC step _after_ initial snapshot. */ |
| 627 | emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0); |
| 628 | } |
| 629 | |
| 630 | /* -- Snapshot restore ---------------------------------------------------- */ |
| 631 | |
| 632 | static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, |
| 633 | SnapNo snapno, BloomFilter rfilt, |
| 634 | IRIns *ir, TValue *o); |
| 635 | |
| 636 | /* Restore a value from the trace exit state. */ |
| 637 | static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, |
| 638 | SnapNo snapno, BloomFilter rfilt, |
| 639 | IRRef ref, TValue *o) |
| 640 | { |
| 641 | IRIns *ir = &T->ir[ref]; |
| 642 | IRType1 t = ir->t; |
| 643 | RegSP rs = ir->prev; |
| 644 | if (irref_isk(ref)) { /* Restore constant slot. */ |
| 645 | if (ir->o == IR_KPTR) { |
| 646 | o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir); |
| 647 | } else { |
| 648 | lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL), |
| 649 | "restore of const from IR %04d with bad op %d" , |
| 650 | ref - REF_BIAS, ir->o); |
| 651 | lj_ir_kvalue(J->L, o, ir); |
| 652 | } |
| 653 | return; |
| 654 | } |
| 655 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) |
| 656 | rs = snap_renameref(T, snapno, ref, rs); |
| 657 | if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ |
| 658 | int32_t *sps = &ex->spill[regsp_spill(rs)]; |
| 659 | if (irt_isinteger(t)) { |
| 660 | setintV(o, *sps); |
| 661 | #if !LJ_SOFTFP32 |
| 662 | } else if (irt_isnum(t)) { |
| 663 | o->u64 = *(uint64_t *)sps; |
| 664 | #endif |
| 665 | #if LJ_64 && !LJ_GC64 |
| 666 | } else if (irt_islightud(t)) { |
| 667 | /* 64 bit lightuserdata which may escape already has the tag bits. */ |
| 668 | o->u64 = *(uint64_t *)sps; |
| 669 | #endif |
| 670 | } else { |
| 671 | lj_assertJ(!irt_ispri(t), "PRI ref with spill slot" ); |
| 672 | setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); |
| 673 | } |
| 674 | } else { /* Restore from register. */ |
| 675 | Reg r = regsp_reg(rs); |
| 676 | if (ra_noreg(r)) { |
| 677 | lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, |
| 678 | "restore from IR %04d has no reg" , ref - REF_BIAS); |
| 679 | snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); |
| 680 | if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); |
| 681 | return; |
| 682 | } else if (irt_isinteger(t)) { |
| 683 | setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); |
| 684 | #if !LJ_SOFTFP |
| 685 | } else if (irt_isnum(t)) { |
| 686 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); |
| 687 | #elif LJ_64 /* && LJ_SOFTFP */ |
| 688 | } else if (irt_isnum(t)) { |
| 689 | o->u64 = ex->gpr[r-RID_MIN_GPR]; |
| 690 | #endif |
| 691 | #if LJ_64 && !LJ_GC64 |
| 692 | } else if (irt_is64(t)) { |
| 693 | /* 64 bit values that already have the tag bits. */ |
| 694 | o->u64 = ex->gpr[r-RID_MIN_GPR]; |
| 695 | #endif |
| 696 | } else if (irt_ispri(t)) { |
| 697 | setpriV(o, irt_toitype(t)); |
| 698 | } else { |
| 699 | setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t)); |
| 700 | } |
| 701 | } |
| 702 | } |
| 703 | |
| 704 | #if LJ_HASFFI |
| 705 | /* Restore raw data from the trace exit state. */ |
| 706 | static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex, |
| 707 | SnapNo snapno, BloomFilter rfilt, |
| 708 | IRRef ref, void *dst, CTSize sz) |
| 709 | { |
| 710 | IRIns *ir = &T->ir[ref]; |
| 711 | RegSP rs = ir->prev; |
| 712 | int32_t *src; |
| 713 | uint64_t tmp; |
| 714 | UNUSED(J); |
| 715 | if (irref_isk(ref)) { |
| 716 | if (ir_isk64(ir)) { |
| 717 | src = (int32_t *)&ir[1]; |
| 718 | } else if (sz == 8) { |
| 719 | tmp = (uint64_t)(uint32_t)ir->i; |
| 720 | src = (int32_t *)&tmp; |
| 721 | } else { |
| 722 | src = &ir->i; |
| 723 | } |
| 724 | } else { |
| 725 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) |
| 726 | rs = snap_renameref(T, snapno, ref, rs); |
| 727 | if (ra_hasspill(regsp_spill(rs))) { |
| 728 | src = &ex->spill[regsp_spill(rs)]; |
| 729 | if (sz == 8 && !irt_is64(ir->t)) { |
| 730 | tmp = (uint64_t)(uint32_t)*src; |
| 731 | src = (int32_t *)&tmp; |
| 732 | } |
| 733 | } else { |
| 734 | Reg r = regsp_reg(rs); |
| 735 | if (ra_noreg(r)) { |
| 736 | /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ |
| 737 | lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, |
| 738 | "restore from IR %04d has no reg" , ref - REF_BIAS); |
| 739 | snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4); |
| 740 | *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; |
| 741 | return; |
| 742 | } |
| 743 | src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; |
| 744 | #if !LJ_SOFTFP |
| 745 | if (r >= RID_MAX_GPR) { |
| 746 | src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; |
| 747 | #if LJ_TARGET_PPC |
| 748 | if (sz == 4) { /* PPC FPRs are always doubles. */ |
| 749 | *(float *)dst = (float)*(double *)src; |
| 750 | return; |
| 751 | } |
| 752 | #else |
| 753 | if (LJ_BE && sz == 4) src++; |
| 754 | #endif |
| 755 | } else |
| 756 | #endif |
| 757 | if (LJ_64 && LJ_BE && sz == 4) src++; |
| 758 | } |
| 759 | } |
| 760 | lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8, |
| 761 | "restore from IR %04d with bad size %d" , ref - REF_BIAS, sz); |
| 762 | if (sz == 4) *(int32_t *)dst = *src; |
| 763 | else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; |
| 764 | else if (sz == 1) *(int8_t *)dst = (int8_t)*src; |
| 765 | else *(int16_t *)dst = (int16_t)*src; |
| 766 | } |
| 767 | #endif |
| 768 | |
| 769 | /* Unsink allocation from the trace exit state. Unsink sunk stores. */ |
| 770 | static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, |
| 771 | SnapNo snapno, BloomFilter rfilt, |
| 772 | IRIns *ir, TValue *o) |
| 773 | { |
| 774 | lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || |
| 775 | ir->o == IR_CNEW || ir->o == IR_CNEWI, |
| 776 | "sunk allocation with bad op %d" , ir->o); |
| 777 | #if LJ_HASFFI |
| 778 | if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { |
| 779 | CTState *cts = ctype_cts(J->L); |
| 780 | CTypeID id = (CTypeID)T->ir[ir->op1].i; |
| 781 | CTSize sz; |
| 782 | CTInfo info = lj_ctype_info(cts, id, &sz); |
| 783 | GCcdata *cd = lj_cdata_newx(cts, id, sz, info); |
| 784 | setcdataV(J->L, o, cd); |
| 785 | if (ir->o == IR_CNEWI) { |
| 786 | uint8_t *p = (uint8_t *)cdataptr(cd); |
| 787 | lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d" , sz); |
| 788 | if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { |
| 789 | snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2, |
| 790 | LJ_LE ? p+4 : p, 4); |
| 791 | if (LJ_BE) p += 4; |
| 792 | sz = 4; |
| 793 | } |
| 794 | snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz); |
| 795 | } else { |
| 796 | IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; |
| 797 | for (irs = ir+1; irs < irlast; irs++) |
| 798 | if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { |
| 799 | IRIns *iro = &T->ir[T->ir[irs->op1].op2]; |
| 800 | uint8_t *p = (uint8_t *)cd; |
| 801 | CTSize szs; |
| 802 | lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d" , irs->o); |
| 803 | lj_assertJ(T->ir[irs->op1].o == IR_ADD, |
| 804 | "sunk store with bad add op %d" , T->ir[irs->op1].o); |
| 805 | lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64, |
| 806 | "sunk store with bad const offset op %d" , iro->o); |
| 807 | if (irt_is64(irs->t)) szs = 8; |
| 808 | else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; |
| 809 | else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; |
| 810 | else szs = 4; |
| 811 | if (LJ_64 && iro->o == IR_KINT64) |
| 812 | p += (int64_t)ir_k64(iro)->u64; |
| 813 | else |
| 814 | p += iro->i; |
| 815 | lj_assertJ(p >= (uint8_t *)cdataptr(cd) && |
| 816 | p + szs <= (uint8_t *)cdataptr(cd) + sz, |
| 817 | "sunk store with offset out of range" ); |
| 818 | if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { |
| 819 | lj_assertJ(szs == 4, "sunk store with bad size %d" , szs); |
| 820 | snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2, |
| 821 | LJ_LE ? p+4 : p, 4); |
| 822 | if (LJ_BE) p += 4; |
| 823 | } |
| 824 | snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs); |
| 825 | } |
| 826 | } |
| 827 | } else |
| 828 | #endif |
| 829 | { |
| 830 | IRIns *irs, *irlast; |
| 831 | GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) : |
| 832 | lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1])); |
| 833 | settabV(J->L, o, t); |
| 834 | irlast = &T->ir[T->snap[snapno].ref]; |
| 835 | for (irs = ir+1; irs < irlast; irs++) |
| 836 | if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { |
| 837 | IRIns *irk = &T->ir[irs->op1]; |
| 838 | TValue tmp, *val; |
| 839 | lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE || |
| 840 | irs->o == IR_FSTORE, |
| 841 | "sunk store with bad op %d" , irs->o); |
| 842 | if (irk->o == IR_FREF) { |
| 843 | lj_assertJ(irk->op2 == IRFL_TAB_META, |
| 844 | "sunk store with bad field %d" , irk->op2); |
| 845 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); |
| 846 | /* NOBARRIER: The table is new (marked white). */ |
| 847 | setgcref(t->metatable, obj2gco(tabV(&tmp))); |
| 848 | } else { |
| 849 | irk = &T->ir[irk->op2]; |
| 850 | if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1]; |
| 851 | lj_ir_kvalue(J->L, &tmp, irk); |
| 852 | val = lj_tab_set(J->L, t, &tmp); |
| 853 | /* NOBARRIER: The table is new (marked white). */ |
| 854 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); |
| 855 | if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { |
| 856 | snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); |
| 857 | val->u32.hi = tmp.u32.lo; |
| 858 | } |
| 859 | } |
| 860 | } |
| 861 | } |
| 862 | } |
| 863 | |
| 864 | /* Restore interpreter state from exit state with the help of a snapshot. */ |
| 865 | const BCIns *lj_snap_restore(jit_State *J, void *exptr) |
| 866 | { |
| 867 | ExitState *ex = (ExitState *)exptr; |
| 868 | SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ |
| 869 | GCtrace *T = traceref(J, J->parent); |
| 870 | SnapShot *snap = &T->snap[snapno]; |
| 871 | MSize n, nent = snap->nent; |
| 872 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
| 873 | #if !LJ_FR2 || defined(LUA_USE_ASSERT) |
| 874 | SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; |
| 875 | #endif |
| 876 | #if !LJ_FR2 |
| 877 | ptrdiff_t ftsz0; |
| 878 | #endif |
| 879 | TValue *frame; |
| 880 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
| 881 | const BCIns *pc = snap_pc(&map[nent]); |
| 882 | lua_State *L = J->L; |
| 883 | |
| 884 | /* Set interpreter PC to the next PC to get correct error messages. */ |
| 885 | setcframe_pc(cframe_raw(L->cframe), pc+1); |
| 886 | |
| 887 | /* Make sure the stack is big enough for the slots from the snapshot. */ |
| 888 | if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) { |
| 889 | L->top = curr_topL(L); |
| 890 | lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize); |
| 891 | } |
| 892 | |
| 893 | /* Fill stack slots with data from the registers and spill slots. */ |
| 894 | frame = L->base-1-LJ_FR2; |
| 895 | #if !LJ_FR2 |
| 896 | ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ |
| 897 | #endif |
| 898 | for (n = 0; n < nent; n++) { |
| 899 | SnapEntry sn = map[n]; |
| 900 | if (!(sn & SNAP_NORESTORE)) { |
| 901 | TValue *o = &frame[snap_slot(sn)]; |
| 902 | IRRef ref = snap_ref(sn); |
| 903 | IRIns *ir = &T->ir[ref]; |
| 904 | if (ir->r == RID_SUNK) { |
| 905 | MSize j; |
| 906 | for (j = 0; j < n; j++) |
| 907 | if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */ |
| 908 | copyTV(L, o, &frame[snap_slot(map[j])]); |
| 909 | goto dupslot; |
| 910 | } |
| 911 | snap_unsink(J, T, ex, snapno, rfilt, ir, o); |
| 912 | dupslot: |
| 913 | continue; |
| 914 | } |
| 915 | snap_restoreval(J, T, ex, snapno, rfilt, ref, o); |
| 916 | if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { |
| 917 | TValue tmp; |
| 918 | snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); |
| 919 | o->u32.hi = tmp.u32.lo; |
| 920 | #if !LJ_FR2 |
| 921 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
| 922 | /* Overwrite tag with frame link. */ |
| 923 | setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); |
| 924 | L->base = o+1; |
| 925 | #endif |
| 926 | } |
| 927 | } |
| 928 | } |
| 929 | #if LJ_FR2 |
| 930 | L->base += (map[nent+LJ_BE] & 0xff); |
| 931 | #endif |
| 932 | lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot" ); |
| 933 | |
| 934 | /* Compute current stack top. */ |
| 935 | switch (bc_op(*pc)) { |
| 936 | default: |
| 937 | if (bc_op(*pc) < BC_FUNCF) { |
| 938 | L->top = curr_topL(L); |
| 939 | break; |
| 940 | } |
| 941 | /* fallthrough */ |
| 942 | case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM: |
| 943 | L->top = frame + snap->nslots; |
| 944 | break; |
| 945 | } |
| 946 | return pc; |
| 947 | } |
| 948 | |
| 949 | #undef emitir_raw |
| 950 | #undef emitir |
| 951 | |
| 952 | #endif |
| 953 | |