| 1 | /* | 
|---|
| 2 | ** Snapshot handling. | 
|---|
| 3 | ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h | 
|---|
| 4 | */ | 
|---|
| 5 |  | 
|---|
| 6 | #define lj_snap_c | 
|---|
| 7 | #define LUA_CORE | 
|---|
| 8 |  | 
|---|
| 9 | #include "lj_obj.h" | 
|---|
| 10 |  | 
|---|
| 11 | #if LJ_HASJIT | 
|---|
| 12 |  | 
|---|
| 13 | #include "lj_gc.h" | 
|---|
| 14 | #include "lj_tab.h" | 
|---|
| 15 | #include "lj_state.h" | 
|---|
| 16 | #include "lj_frame.h" | 
|---|
| 17 | #include "lj_bc.h" | 
|---|
| 18 | #include "lj_ir.h" | 
|---|
| 19 | #include "lj_jit.h" | 
|---|
| 20 | #include "lj_iropt.h" | 
|---|
| 21 | #include "lj_trace.h" | 
|---|
| 22 | #include "lj_snap.h" | 
|---|
| 23 | #include "lj_target.h" | 
|---|
| 24 | #if LJ_HASFFI | 
|---|
| 25 | #include "lj_ctype.h" | 
|---|
| 26 | #include "lj_cdata.h" | 
|---|
| 27 | #endif | 
|---|
| 28 |  | 
|---|
| 29 | /* Some local macros to save typing. Undef'd at the end. */ | 
|---|
| 30 | #define IR(ref)		(&J->cur.ir[(ref)]) | 
|---|
| 31 |  | 
|---|
| 32 | /* Pass IR on to next optimization in chain (FOLD). */ | 
|---|
| 33 | #define emitir(ot, a, b)	(lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | 
|---|
| 34 |  | 
|---|
| 35 | /* Emit raw IR without passing through optimizations. */ | 
|---|
| 36 | #define emitir_raw(ot, a, b)	(lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) | 
|---|
| 37 |  | 
|---|
| 38 | /* -- Snapshot buffer allocation ------------------------------------------ */ | 
|---|
| 39 |  | 
|---|
| 40 | /* Grow snapshot buffer. */ | 
|---|
| 41 | void lj_snap_grow_buf_(jit_State *J, MSize need) | 
|---|
| 42 | { | 
|---|
| 43 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | 
|---|
| 44 | if (need > maxsnap) | 
|---|
| 45 | lj_trace_err(J, LJ_TRERR_SNAPOV); | 
|---|
| 46 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | 
|---|
| 47 | J->cur.snap = J->snapbuf; | 
|---|
| 48 | } | 
|---|
| 49 |  | 
|---|
| 50 | /* Grow snapshot map buffer. */ | 
|---|
| 51 | void lj_snap_grow_map_(jit_State *J, MSize need) | 
|---|
| 52 | { | 
|---|
| 53 | if (need < 2*J->sizesnapmap) | 
|---|
| 54 | need = 2*J->sizesnapmap; | 
|---|
| 55 | else if (need < 64) | 
|---|
| 56 | need = 64; | 
|---|
| 57 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, | 
|---|
| 58 | J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); | 
|---|
| 59 | J->cur.snapmap = J->snapmapbuf; | 
|---|
| 60 | J->sizesnapmap = need; | 
|---|
| 61 | } | 
|---|
| 62 |  | 
|---|
| 63 | /* -- Snapshot generation ------------------------------------------------- */ | 
|---|
| 64 |  | 
|---|
| 65 | /* Add all modified slots to the snapshot. */ | 
|---|
| 66 | static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | 
|---|
| 67 | { | 
|---|
| 68 | IRRef retf = J->chain[IR_RETF];  /* Limits SLOAD restore elimination. */ | 
|---|
| 69 | BCReg s; | 
|---|
| 70 | MSize n = 0; | 
|---|
| 71 | for (s = 0; s < nslots; s++) { | 
|---|
| 72 | TRef tr = J->slot[s]; | 
|---|
| 73 | IRRef ref = tref_ref(tr); | 
|---|
| 74 | if (ref) { | 
|---|
| 75 | SnapEntry sn = SNAP_TR(s, tr); | 
|---|
| 76 | IRIns *ir = IR(ref); | 
|---|
| 77 | if (!(sn & (SNAP_CONT|SNAP_FRAME)) && | 
|---|
| 78 | ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { | 
|---|
| 79 | /* No need to snapshot unmodified non-inherited slots. */ | 
|---|
| 80 | if (!(ir->op2 & IRSLOAD_INHERIT)) | 
|---|
| 81 | continue; | 
|---|
| 82 | /* No need to restore readonly slots and unmodified non-parent slots. */ | 
|---|
| 83 | if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && | 
|---|
| 84 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | 
|---|
| 85 | sn |= SNAP_NORESTORE; | 
|---|
| 86 | } | 
|---|
| 87 | if (LJ_SOFTFP && irt_isnum(ir->t)) | 
|---|
| 88 | sn |= SNAP_SOFTFPNUM; | 
|---|
| 89 | map[n++] = sn; | 
|---|
| 90 | } | 
|---|
| 91 | } | 
|---|
| 92 | return n; | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | /* Add frame links at the end of the snapshot. */ | 
|---|
| 96 | static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) | 
|---|
| 97 | { | 
|---|
| 98 | cTValue *frame = J->L->base - 1; | 
|---|
| 99 | cTValue *lim = J->L->base - J->baseslot; | 
|---|
| 100 | cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; | 
|---|
| 101 | MSize f = 0; | 
|---|
| 102 | map[f++] = SNAP_MKPC(J->pc);  /* The current PC is always the first entry. */ | 
|---|
| 103 | while (frame > lim) {  /* Backwards traversal of all frames above base. */ | 
|---|
| 104 | if (frame_islua(frame)) { | 
|---|
| 105 | map[f++] = SNAP_MKPC(frame_pc(frame)); | 
|---|
| 106 | frame = frame_prevl(frame); | 
|---|
| 107 | if (frame + funcproto(frame_func(frame))->framesize > ftop) | 
|---|
| 108 | ftop = frame + funcproto(frame_func(frame))->framesize; | 
|---|
| 109 | } else if (frame_iscont(frame)) { | 
|---|
| 110 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | 
|---|
| 111 | map[f++] = SNAP_MKPC(frame_contpc(frame)); | 
|---|
| 112 | frame = frame_prevd(frame); | 
|---|
| 113 | } else { | 
|---|
| 114 | lua_assert(!frame_isc(frame)); | 
|---|
| 115 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | 
|---|
| 116 | frame = frame_prevd(frame); | 
|---|
| 117 | } | 
|---|
| 118 | } | 
|---|
| 119 | lua_assert(f == (MSize)(1 + J->framedepth)); | 
|---|
| 120 | return (BCReg)(ftop - lim); | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | /* Take a snapshot of the current stack. */ | 
|---|
| 124 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) | 
|---|
| 125 | { | 
|---|
| 126 | BCReg nslots = J->baseslot + J->maxslot; | 
|---|
| 127 | MSize nent; | 
|---|
| 128 | SnapEntry *p; | 
|---|
| 129 | /* Conservative estimate. */ | 
|---|
| 130 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); | 
|---|
| 131 | p = &J->cur.snapmap[nsnapmap]; | 
|---|
| 132 | nent = snapshot_slots(J, p, nslots); | 
|---|
| 133 | snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); | 
|---|
| 134 | snap->mapofs = (uint16_t)nsnapmap; | 
|---|
| 135 | snap->ref = (IRRef1)J->cur.nins; | 
|---|
| 136 | snap->nent = (uint8_t)nent; | 
|---|
| 137 | snap->nslots = (uint8_t)nslots; | 
|---|
| 138 | snap->count = 0; | 
|---|
| 139 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); | 
|---|
| 140 | } | 
|---|
| 141 |  | 
|---|
| 142 | /* Add or merge a snapshot. */ | 
|---|
| 143 | void lj_snap_add(jit_State *J) | 
|---|
| 144 | { | 
|---|
| 145 | MSize nsnap = J->cur.nsnap; | 
|---|
| 146 | MSize nsnapmap = J->cur.nsnapmap; | 
|---|
| 147 | /* Merge if no ins. inbetween or if requested and no guard inbetween. */ | 
|---|
| 148 | if (J->mergesnap ? !irt_isguard(J->guardemit) : | 
|---|
| 149 | (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { | 
|---|
| 150 | if (nsnap == 1) {  /* But preserve snap #0 PC. */ | 
|---|
| 151 | emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); | 
|---|
| 152 | goto nomerge; | 
|---|
| 153 | } | 
|---|
| 154 | nsnapmap = J->cur.snap[--nsnap].mapofs; | 
|---|
| 155 | } else { | 
|---|
| 156 | nomerge: | 
|---|
| 157 | lj_snap_grow_buf(J, nsnap+1); | 
|---|
| 158 | J->cur.nsnap = (uint16_t)(nsnap+1); | 
|---|
| 159 | } | 
|---|
| 160 | J->mergesnap = 0; | 
|---|
| 161 | J->guardemit.irt = 0; | 
|---|
| 162 | snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap); | 
|---|
| 163 | } | 
|---|
| 164 |  | 
|---|
| 165 | /* -- Snapshot modification ----------------------------------------------- */ | 
|---|
| 166 |  | 
|---|
| 167 | #define SNAP_USEDEF_SLOTS	(LJ_MAX_JSLOTS+LJ_STACK_EXTRA) | 
|---|
| 168 |  | 
|---|
| 169 | /* Find unused slots with reaching-definitions bytecode data-flow analysis. */ | 
|---|
| 170 | static BCReg snap_usedef(jit_State *J, uint8_t *udf, | 
|---|
| 171 | const BCIns *pc, BCReg maxslot) | 
|---|
| 172 | { | 
|---|
| 173 | BCReg s; | 
|---|
| 174 | GCobj *o; | 
|---|
| 175 |  | 
|---|
| 176 | if (maxslot == 0) return 0; | 
|---|
| 177 | #ifdef LUAJIT_USE_VALGRIND | 
|---|
| 178 | /* Avoid errors for harmless reads beyond maxslot. */ | 
|---|
| 179 | memset(udf, 1, SNAP_USEDEF_SLOTS); | 
|---|
| 180 | #else | 
|---|
| 181 | memset(udf, 1, maxslot); | 
|---|
| 182 | #endif | 
|---|
| 183 |  | 
|---|
| 184 | /* Treat open upvalues as used. */ | 
|---|
| 185 | o = gcref(J->L->openupval); | 
|---|
| 186 | while (o) { | 
|---|
| 187 | if (uvval(gco2uv(o)) < J->L->base) break; | 
|---|
| 188 | udf[uvval(gco2uv(o)) - J->L->base] = 0; | 
|---|
| 189 | o = gcref(o->gch.nextgc); | 
|---|
| 190 | } | 
|---|
| 191 |  | 
|---|
| 192 | #define USE_SLOT(s)		udf[(s)] &= ~1 | 
|---|
| 193 | #define DEF_SLOT(s)		udf[(s)] *= 3 | 
|---|
| 194 |  | 
|---|
| 195 | /* Scan through following bytecode and check for uses/defs. */ | 
|---|
| 196 | lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); | 
|---|
| 197 | for (;;) { | 
|---|
| 198 | BCIns ins = *pc++; | 
|---|
| 199 | BCOp op = bc_op(ins); | 
|---|
| 200 | switch (bcmode_b(op)) { | 
|---|
| 201 | case BCMvar: USE_SLOT(bc_b(ins)); break; | 
|---|
| 202 | default: break; | 
|---|
| 203 | } | 
|---|
| 204 | switch (bcmode_c(op)) { | 
|---|
| 205 | case BCMvar: USE_SLOT(bc_c(ins)); break; | 
|---|
| 206 | case BCMrbase: | 
|---|
| 207 | lua_assert(op == BC_CAT); | 
|---|
| 208 | for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); | 
|---|
| 209 | for (; s < maxslot; s++) DEF_SLOT(s); | 
|---|
| 210 | break; | 
|---|
| 211 | case BCMjump: | 
|---|
| 212 | handle_jump: { | 
|---|
| 213 | BCReg minslot = bc_a(ins); | 
|---|
| 214 | if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT; | 
|---|
| 215 | else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1; | 
|---|
| 216 | else if (op == BC_UCLO) { pc += bc_j(ins); break; } | 
|---|
| 217 | for (s = minslot; s < maxslot; s++) DEF_SLOT(s); | 
|---|
| 218 | return minslot < maxslot ? minslot : maxslot; | 
|---|
| 219 | } | 
|---|
| 220 | case BCMlit: | 
|---|
| 221 | if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { | 
|---|
| 222 | goto handle_jump; | 
|---|
| 223 | } else if (bc_isret(op)) { | 
|---|
| 224 | BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1); | 
|---|
| 225 | for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); | 
|---|
| 226 | for (; s < top; s++) USE_SLOT(s); | 
|---|
| 227 | for (; s < maxslot; s++) DEF_SLOT(s); | 
|---|
| 228 | return 0; | 
|---|
| 229 | } | 
|---|
| 230 | break; | 
|---|
| 231 | case BCMfunc: return maxslot;  /* NYI: will abort, anyway. */ | 
|---|
| 232 | default: break; | 
|---|
| 233 | } | 
|---|
| 234 | switch (bcmode_a(op)) { | 
|---|
| 235 | case BCMvar: USE_SLOT(bc_a(ins)); break; | 
|---|
| 236 | case BCMdst: | 
|---|
| 237 | if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins)); | 
|---|
| 238 | break; | 
|---|
| 239 | case BCMbase: | 
|---|
| 240 | if (op >= BC_CALLM && op <= BC_VARG) { | 
|---|
| 241 | BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? | 
|---|
| 242 | maxslot : (bc_a(ins) + bc_c(ins)); | 
|---|
| 243 | s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); | 
|---|
| 244 | for (; s < top; s++) USE_SLOT(s); | 
|---|
| 245 | for (; s < maxslot; s++) DEF_SLOT(s); | 
|---|
| 246 | if (op == BC_CALLT || op == BC_CALLMT) { | 
|---|
| 247 | for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); | 
|---|
| 248 | return 0; | 
|---|
| 249 | } | 
|---|
| 250 | } else if (op == BC_KNIL) { | 
|---|
| 251 | for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s); | 
|---|
| 252 | } else if (op == BC_TSETM) { | 
|---|
| 253 | for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s); | 
|---|
| 254 | } | 
|---|
| 255 | break; | 
|---|
| 256 | default: break; | 
|---|
| 257 | } | 
|---|
| 258 | lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); | 
|---|
| 259 | } | 
|---|
| 260 |  | 
|---|
| 261 | #undef USE_SLOT | 
|---|
| 262 | #undef DEF_SLOT | 
|---|
| 263 |  | 
|---|
| 264 | return 0;  /* unreachable */ | 
|---|
| 265 | } | 
|---|
| 266 |  | 
|---|
| 267 | /* Purge dead slots before the next snapshot. */ | 
|---|
| 268 | void lj_snap_purge(jit_State *J) | 
|---|
| 269 | { | 
|---|
| 270 | uint8_t udf[SNAP_USEDEF_SLOTS]; | 
|---|
| 271 | BCReg maxslot = J->maxslot; | 
|---|
| 272 | BCReg s = snap_usedef(J, udf, J->pc, maxslot); | 
|---|
| 273 | for (; s < maxslot; s++) | 
|---|
| 274 | if (udf[s] != 0) | 
|---|
| 275 | J->base[s] = 0;  /* Purge dead slots. */ | 
|---|
| 276 | } | 
|---|
| 277 |  | 
|---|
| 278 | /* Shrink last snapshot. */ | 
|---|
| 279 | void lj_snap_shrink(jit_State *J) | 
|---|
| 280 | { | 
|---|
| 281 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | 
|---|
| 282 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; | 
|---|
| 283 | MSize n, m, nlim, nent = snap->nent; | 
|---|
| 284 | uint8_t udf[SNAP_USEDEF_SLOTS]; | 
|---|
| 285 | BCReg maxslot = J->maxslot; | 
|---|
| 286 | BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); | 
|---|
| 287 | BCReg baseslot = J->baseslot; | 
|---|
| 288 | maxslot += baseslot; | 
|---|
| 289 | minslot += baseslot; | 
|---|
| 290 | snap->nslots = (uint8_t)maxslot; | 
|---|
| 291 | for (n = m = 0; n < nent; n++) {  /* Remove unused slots from snapshot. */ | 
|---|
| 292 | BCReg s = snap_slot(map[n]); | 
|---|
| 293 | if (s < minslot || (s < maxslot && udf[s-baseslot] == 0)) | 
|---|
| 294 | map[m++] = map[n];  /* Only copy used slots. */ | 
|---|
| 295 | } | 
|---|
| 296 | snap->nent = (uint8_t)m; | 
|---|
| 297 | nlim = J->cur.nsnapmap - snap->mapofs - 1; | 
|---|
| 298 | while (n <= nlim) map[m++] = map[n++];  /* Move PC + frame links down. */ | 
|---|
| 299 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + m);  /* Free up space in map. */ | 
|---|
| 300 | } | 
|---|
| 301 |  | 
|---|
| 302 | /* -- Snapshot access ----------------------------------------------------- */ | 
|---|
| 303 |  | 
|---|
| 304 | /* Initialize a Bloom Filter with all renamed refs. | 
|---|
| 305 | ** There are very few renames (often none), so the filter has | 
|---|
| 306 | ** very few bits set. This makes it suitable for negative filtering. | 
|---|
| 307 | */ | 
|---|
| 308 | static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim) | 
|---|
| 309 | { | 
|---|
| 310 | BloomFilter rfilt = 0; | 
|---|
| 311 | IRIns *ir; | 
|---|
| 312 | for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) | 
|---|
| 313 | if (ir->op2 <= lim) | 
|---|
| 314 | bloomset(rfilt, ir->op1); | 
|---|
| 315 | return rfilt; | 
|---|
| 316 | } | 
|---|
| 317 |  | 
|---|
| 318 | /* Process matching renames to find the original RegSP. */ | 
|---|
| 319 | static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) | 
|---|
| 320 | { | 
|---|
| 321 | IRIns *ir; | 
|---|
| 322 | for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) | 
|---|
| 323 | if (ir->op1 == ref && ir->op2 <= lim) | 
|---|
| 324 | rs = ir->prev; | 
|---|
| 325 | return rs; | 
|---|
| 326 | } | 
|---|
| 327 |  | 
|---|
| 328 | /* Copy RegSP from parent snapshot to the parent links of the IR. */ | 
|---|
| 329 | IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) | 
|---|
| 330 | { | 
|---|
| 331 | SnapShot *snap = &T->snap[snapno]; | 
|---|
| 332 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 
|---|
| 333 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 
|---|
| 334 | MSize n = 0; | 
|---|
| 335 | IRRef ref = 0; | 
|---|
| 336 | for ( ; ; ir++) { | 
|---|
| 337 | uint32_t rs; | 
|---|
| 338 | if (ir->o == IR_SLOAD) { | 
|---|
| 339 | if (!(ir->op2 & IRSLOAD_PARENT)) break; | 
|---|
| 340 | for ( ; ; n++) { | 
|---|
| 341 | lua_assert(n < snap->nent); | 
|---|
| 342 | if (snap_slot(map[n]) == ir->op1) { | 
|---|
| 343 | ref = snap_ref(map[n++]); | 
|---|
| 344 | break; | 
|---|
| 345 | } | 
|---|
| 346 | } | 
|---|
| 347 | } else if (LJ_SOFTFP && ir->o == IR_HIOP) { | 
|---|
| 348 | ref++; | 
|---|
| 349 | } else if (ir->o == IR_PVAL) { | 
|---|
| 350 | ref = ir->op1 + REF_BIAS; | 
|---|
| 351 | } else { | 
|---|
| 352 | break; | 
|---|
| 353 | } | 
|---|
| 354 | rs = T->ir[ref].prev; | 
|---|
| 355 | if (bloomtest(rfilt, ref)) | 
|---|
| 356 | rs = snap_renameref(T, snapno, ref, rs); | 
|---|
| 357 | ir->prev = (uint16_t)rs; | 
|---|
| 358 | lua_assert(regsp_used(rs)); | 
|---|
| 359 | } | 
|---|
| 360 | return ir; | 
|---|
| 361 | } | 
|---|
| 362 |  | 
|---|
| 363 | /* -- Snapshot replay ----------------------------------------------------- */ | 
|---|
| 364 |  | 
|---|
| 365 | /* Replay constant from parent trace. */ | 
|---|
| 366 | static TRef snap_replay_const(jit_State *J, IRIns *ir) | 
|---|
| 367 | { | 
|---|
| 368 | /* Only have to deal with constants that can occur in stack slots. */ | 
|---|
| 369 | switch ((IROp)ir->o) { | 
|---|
| 370 | case IR_KPRI: return TREF_PRI(irt_type(ir->t)); | 
|---|
| 371 | case IR_KINT: return lj_ir_kint(J, ir->i); | 
|---|
| 372 | case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); | 
|---|
| 373 | case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); | 
|---|
| 374 | case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); | 
|---|
| 375 | case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir));  /* Continuation. */ | 
|---|
| 376 | default: lua_assert(0); return TREF_NIL; break; | 
|---|
| 377 | } | 
|---|
| 378 | } | 
|---|
| 379 |  | 
|---|
| 380 | /* De-duplicate parent reference. */ | 
|---|
| 381 | static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) | 
|---|
| 382 | { | 
|---|
| 383 | MSize j; | 
|---|
| 384 | for (j = 0; j < nmax; j++) | 
|---|
| 385 | if (snap_ref(map[j]) == ref) | 
|---|
| 386 | return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); | 
|---|
| 387 | return 0; | 
|---|
| 388 | } | 
|---|
| 389 |  | 
|---|
| 390 | /* Emit parent reference with de-duplication. */ | 
|---|
| 391 | static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax, | 
|---|
| 392 | BloomFilter seen, IRRef ref) | 
|---|
| 393 | { | 
|---|
| 394 | IRIns *ir = &T->ir[ref]; | 
|---|
| 395 | TRef tr; | 
|---|
| 396 | if (irref_isk(ref)) | 
|---|
| 397 | tr = snap_replay_const(J, ir); | 
|---|
| 398 | else if (!regsp_used(ir->prev)) | 
|---|
| 399 | tr = 0; | 
|---|
| 400 | else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0) | 
|---|
| 401 | tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0); | 
|---|
| 402 | return tr; | 
|---|
| 403 | } | 
|---|
| 404 |  | 
|---|
| 405 | /* Check whether a sunk store corresponds to an allocation. Slow path. */ | 
|---|
| 406 | static int snap_sunk_store2(jit_State *J, IRIns *ira, IRIns *irs) | 
|---|
| 407 | { | 
|---|
| 408 | if (irs->o == IR_ASTORE || irs->o == IR_HSTORE || | 
|---|
| 409 | irs->o == IR_FSTORE || irs->o == IR_XSTORE) { | 
|---|
| 410 | IRIns *irk = IR(irs->op1); | 
|---|
| 411 | if (irk->o == IR_AREF || irk->o == IR_HREFK) | 
|---|
| 412 | irk = IR(irk->op1); | 
|---|
| 413 | return (IR(irk->op1) == ira); | 
|---|
| 414 | } | 
|---|
| 415 | return 0; | 
|---|
| 416 | } | 
|---|
| 417 |  | 
|---|
| 418 | /* Check whether a sunk store corresponds to an allocation. Fast path. */ | 
|---|
| 419 | static LJ_AINLINE int snap_sunk_store(jit_State *J, IRIns *ira, IRIns *irs) | 
|---|
| 420 | { | 
|---|
| 421 | if (irs->s != 255) | 
|---|
| 422 | return (ira + irs->s == irs);  /* Fast check. */ | 
|---|
| 423 | return snap_sunk_store2(J, ira, irs); | 
|---|
| 424 | } | 
|---|
| 425 |  | 
|---|
| 426 | /* Replay snapshot state to setup side trace. */ | 
|---|
| 427 | void lj_snap_replay(jit_State *J, GCtrace *T) | 
|---|
| 428 | { | 
|---|
| 429 | SnapShot *snap = &T->snap[J->exitno]; | 
|---|
| 430 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 
|---|
| 431 | MSize n, nent = snap->nent; | 
|---|
| 432 | BloomFilter seen = 0; | 
|---|
| 433 | int pass23 = 0; | 
|---|
| 434 | J->framedepth = 0; | 
|---|
| 435 | /* Emit IR for slots inherited from parent snapshot. */ | 
|---|
| 436 | for (n = 0; n < nent; n++) { | 
|---|
| 437 | SnapEntry sn = map[n]; | 
|---|
| 438 | BCReg s = snap_slot(sn); | 
|---|
| 439 | IRRef ref = snap_ref(sn); | 
|---|
| 440 | IRIns *ir = &T->ir[ref]; | 
|---|
| 441 | TRef tr; | 
|---|
| 442 | /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ | 
|---|
| 443 | if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0) | 
|---|
| 444 | goto setslot; | 
|---|
| 445 | bloomset(seen, ref); | 
|---|
| 446 | if (irref_isk(ref)) { | 
|---|
| 447 | tr = snap_replay_const(J, ir); | 
|---|
| 448 | } else if (!regsp_used(ir->prev)) { | 
|---|
| 449 | pass23 = 1; | 
|---|
| 450 | lua_assert(s != 0); | 
|---|
| 451 | tr = s; | 
|---|
| 452 | } else { | 
|---|
| 453 | IRType t = irt_type(ir->t); | 
|---|
| 454 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; | 
|---|
| 455 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; | 
|---|
| 456 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); | 
|---|
| 457 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); | 
|---|
| 458 | } | 
|---|
| 459 | setslot: | 
|---|
| 460 | J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME));  /* Same as TREF_* flags. */ | 
|---|
| 461 | J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); | 
|---|
| 462 | if ((sn & SNAP_FRAME)) | 
|---|
| 463 | J->baseslot = s+1; | 
|---|
| 464 | } | 
|---|
| 465 | if (pass23) { | 
|---|
| 466 | IRIns *irlast = &T->ir[snap->ref]; | 
|---|
| 467 | pass23 = 0; | 
|---|
| 468 | /* Emit dependent PVALs. */ | 
|---|
| 469 | for (n = 0; n < nent; n++) { | 
|---|
| 470 | SnapEntry sn = map[n]; | 
|---|
| 471 | IRRef refp = snap_ref(sn); | 
|---|
| 472 | IRIns *ir = &T->ir[refp]; | 
|---|
| 473 | if (regsp_reg(ir->r) == RID_SUNK) { | 
|---|
| 474 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; | 
|---|
| 475 | pass23 = 1; | 
|---|
| 476 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || | 
|---|
| 477 | ir->o == IR_CNEW || ir->o == IR_CNEWI); | 
|---|
| 478 | if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); | 
|---|
| 479 | if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); | 
|---|
| 480 | if (LJ_HASFFI && ir->o == IR_CNEWI) { | 
|---|
| 481 | if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) | 
|---|
| 482 | snap_pref(J, T, map, nent, seen, (ir+1)->op2); | 
|---|
| 483 | } else { | 
|---|
| 484 | IRIns *irs; | 
|---|
| 485 | for (irs = ir+1; irs < irlast; irs++) | 
|---|
| 486 | if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { | 
|---|
| 487 | if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) | 
|---|
| 488 | snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); | 
|---|
| 489 | else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && | 
|---|
| 490 | irs+1 < irlast && (irs+1)->o == IR_HIOP) | 
|---|
| 491 | snap_pref(J, T, map, nent, seen, (irs+1)->op2); | 
|---|
| 492 | } | 
|---|
| 493 | } | 
|---|
| 494 | } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { | 
|---|
| 495 | lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); | 
|---|
| 496 | J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); | 
|---|
| 497 | } | 
|---|
| 498 | } | 
|---|
| 499 | /* Replay sunk instructions. */ | 
|---|
| 500 | for (n = 0; pass23 && n < nent; n++) { | 
|---|
| 501 | SnapEntry sn = map[n]; | 
|---|
| 502 | IRRef refp = snap_ref(sn); | 
|---|
| 503 | IRIns *ir = &T->ir[refp]; | 
|---|
| 504 | if (regsp_reg(ir->r) == RID_SUNK) { | 
|---|
| 505 | TRef op1, op2; | 
|---|
| 506 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) {  /* De-dup allocs. */ | 
|---|
| 507 | J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; | 
|---|
| 508 | continue; | 
|---|
| 509 | } | 
|---|
| 510 | op1 = ir->op1; | 
|---|
| 511 | if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); | 
|---|
| 512 | op2 = ir->op2; | 
|---|
| 513 | if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); | 
|---|
| 514 | if (LJ_HASFFI && ir->o == IR_CNEWI) { | 
|---|
| 515 | if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { | 
|---|
| 516 | lj_needsplit(J);  /* Emit joining HIOP. */ | 
|---|
| 517 | op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, | 
|---|
| 518 | snap_pref(J, T, map, nent, seen, (ir+1)->op2)); | 
|---|
| 519 | } | 
|---|
| 520 | J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2); | 
|---|
| 521 | } else { | 
|---|
| 522 | IRIns *irs; | 
|---|
| 523 | TRef tr = emitir(ir->ot, op1, op2); | 
|---|
| 524 | J->slot[snap_slot(sn)] = tr; | 
|---|
| 525 | for (irs = ir+1; irs < irlast; irs++) | 
|---|
| 526 | if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { | 
|---|
| 527 | IRIns *irr = &T->ir[irs->op1]; | 
|---|
| 528 | TRef val, key = irr->op2, tmp = tr; | 
|---|
| 529 | if (irr->o != IR_FREF) { | 
|---|
| 530 | IRIns *irk = &T->ir[key]; | 
|---|
| 531 | if (irr->o == IR_HREFK) | 
|---|
| 532 | key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]), | 
|---|
| 533 | irk->op2); | 
|---|
| 534 | else | 
|---|
| 535 | key = snap_replay_const(J, irk); | 
|---|
| 536 | if (irr->o == IR_HREFK || irr->o == IR_AREF) { | 
|---|
| 537 | IRIns *irf = &T->ir[irr->op1]; | 
|---|
| 538 | tmp = emitir(irf->ot, tmp, irf->op2); | 
|---|
| 539 | } | 
|---|
| 540 | } | 
|---|
| 541 | tmp = emitir(irr->ot, tmp, key); | 
|---|
| 542 | val = snap_pref(J, T, map, nent, seen, irs->op2); | 
|---|
| 543 | if (val == 0) { | 
|---|
| 544 | IRIns *irc = &T->ir[irs->op2]; | 
|---|
| 545 | lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); | 
|---|
| 546 | val = snap_pref(J, T, map, nent, seen, irc->op1); | 
|---|
| 547 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); | 
|---|
| 548 | } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && | 
|---|
| 549 | irs+1 < irlast && (irs+1)->o == IR_HIOP) { | 
|---|
| 550 | IRType t = IRT_I64; | 
|---|
| 551 | if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) | 
|---|
| 552 | t = IRT_NUM; | 
|---|
| 553 | lj_needsplit(J); | 
|---|
| 554 | if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { | 
|---|
| 555 | uint64_t k = (uint32_t)T->ir[irs->op2].i + | 
|---|
| 556 | ((uint64_t)T->ir[(irs+1)->op2].i << 32); | 
|---|
| 557 | val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, | 
|---|
| 558 | lj_ir_k64_find(J, k)); | 
|---|
| 559 | } else { | 
|---|
| 560 | val = emitir_raw(IRT(IR_HIOP, t), val, | 
|---|
| 561 | snap_pref(J, T, map, nent, seen, (irs+1)->op2)); | 
|---|
| 562 | } | 
|---|
| 563 | tmp = emitir(IRT(irs->o, t), tmp, val); | 
|---|
| 564 | continue; | 
|---|
| 565 | } | 
|---|
| 566 | tmp = emitir(irs->ot, tmp, val); | 
|---|
| 567 | } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) { | 
|---|
| 568 | emitir(IRT(IR_XBAR, IRT_NIL), 0, 0); | 
|---|
| 569 | } | 
|---|
| 570 | } | 
|---|
| 571 | } | 
|---|
| 572 | } | 
|---|
| 573 | } | 
|---|
| 574 | J->base = J->slot + J->baseslot; | 
|---|
| 575 | J->maxslot = snap->nslots - J->baseslot; | 
|---|
| 576 | lj_snap_add(J); | 
|---|
| 577 | if (pass23)  /* Need explicit GC step _after_ initial snapshot. */ | 
|---|
| 578 | emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0); | 
|---|
| 579 | } | 
|---|
| 580 |  | 
|---|
| 581 | /* -- Snapshot restore ---------------------------------------------------- */ | 
|---|
| 582 |  | 
|---|
| 583 | static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | 
|---|
| 584 | SnapNo snapno, BloomFilter rfilt, | 
|---|
| 585 | IRIns *ir, TValue *o); | 
|---|
| 586 |  | 
|---|
| 587 | /* Restore a value from the trace exit state. */ | 
|---|
| 588 | static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | 
|---|
| 589 | SnapNo snapno, BloomFilter rfilt, | 
|---|
| 590 | IRRef ref, TValue *o) | 
|---|
| 591 | { | 
|---|
| 592 | IRIns *ir = &T->ir[ref]; | 
|---|
| 593 | IRType1 t = ir->t; | 
|---|
| 594 | RegSP rs = ir->prev; | 
|---|
| 595 | if (irref_isk(ref)) {  /* Restore constant slot. */ | 
|---|
| 596 | lj_ir_kvalue(J->L, o, ir); | 
|---|
| 597 | return; | 
|---|
| 598 | } | 
|---|
| 599 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | 
|---|
| 600 | rs = snap_renameref(T, snapno, ref, rs); | 
|---|
| 601 | if (ra_hasspill(regsp_spill(rs))) {  /* Restore from spill slot. */ | 
|---|
| 602 | int32_t *sps = &ex->spill[regsp_spill(rs)]; | 
|---|
| 603 | if (irt_isinteger(t)) { | 
|---|
| 604 | setintV(o, *sps); | 
|---|
| 605 | #if !LJ_SOFTFP | 
|---|
| 606 | } else if (irt_isnum(t)) { | 
|---|
| 607 | o->u64 = *(uint64_t *)sps; | 
|---|
| 608 | #endif | 
|---|
| 609 | } else if (LJ_64 && irt_islightud(t)) { | 
|---|
| 610 | /* 64 bit lightuserdata which may escape already has the tag bits. */ | 
|---|
| 611 | o->u64 = *(uint64_t *)sps; | 
|---|
| 612 | } else { | 
|---|
| 613 | lua_assert(!irt_ispri(t));  /* PRI refs never have a spill slot. */ | 
|---|
| 614 | setgcrefi(o->gcr, *sps); | 
|---|
| 615 | setitype(o, irt_toitype(t)); | 
|---|
| 616 | } | 
|---|
| 617 | } else {  /* Restore from register. */ | 
|---|
| 618 | Reg r = regsp_reg(rs); | 
|---|
| 619 | if (ra_noreg(r)) { | 
|---|
| 620 | lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); | 
|---|
| 621 | snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); | 
|---|
| 622 | if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); | 
|---|
| 623 | return; | 
|---|
| 624 | } else if (irt_isinteger(t)) { | 
|---|
| 625 | setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); | 
|---|
| 626 | #if !LJ_SOFTFP | 
|---|
| 627 | } else if (irt_isnum(t)) { | 
|---|
| 628 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); | 
|---|
| 629 | #endif | 
|---|
| 630 | } else if (LJ_64 && irt_islightud(t)) { | 
|---|
| 631 | /* 64 bit lightuserdata which may escape already has the tag bits. */ | 
|---|
| 632 | o->u64 = ex->gpr[r-RID_MIN_GPR]; | 
|---|
| 633 | } else { | 
|---|
| 634 | if (!irt_ispri(t)) | 
|---|
| 635 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); | 
|---|
| 636 | setitype(o, irt_toitype(t)); | 
|---|
| 637 | } | 
|---|
| 638 | } | 
|---|
| 639 | } | 
|---|
| 640 |  | 
|---|
| 641 | #if LJ_HASFFI | 
|---|
| 642 | /* Restore raw data from the trace exit state. */ | 
|---|
| 643 | static void snap_restoredata(GCtrace *T, ExitState *ex, | 
|---|
| 644 | SnapNo snapno, BloomFilter rfilt, | 
|---|
| 645 | IRRef ref, void *dst, CTSize sz) | 
|---|
| 646 | { | 
|---|
| 647 | IRIns *ir = &T->ir[ref]; | 
|---|
| 648 | RegSP rs = ir->prev; | 
|---|
| 649 | int32_t *src; | 
|---|
| 650 | uint64_t tmp; | 
|---|
| 651 | if (irref_isk(ref)) { | 
|---|
| 652 | if (ir->o == IR_KNUM || ir->o == IR_KINT64) { | 
|---|
| 653 | src = mref(ir->ptr, int32_t); | 
|---|
| 654 | } else if (sz == 8) { | 
|---|
| 655 | tmp = (uint64_t)(uint32_t)ir->i; | 
|---|
| 656 | src = (int32_t *)&tmp; | 
|---|
| 657 | } else { | 
|---|
| 658 | src = &ir->i; | 
|---|
| 659 | } | 
|---|
| 660 | } else { | 
|---|
| 661 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | 
|---|
| 662 | rs = snap_renameref(T, snapno, ref, rs); | 
|---|
| 663 | if (ra_hasspill(regsp_spill(rs))) { | 
|---|
| 664 | src = &ex->spill[regsp_spill(rs)]; | 
|---|
| 665 | if (sz == 8 && !irt_is64(ir->t)) { | 
|---|
| 666 | tmp = (uint64_t)(uint32_t)*src; | 
|---|
| 667 | src = (int32_t *)&tmp; | 
|---|
| 668 | } | 
|---|
| 669 | } else { | 
|---|
| 670 | Reg r = regsp_reg(rs); | 
|---|
| 671 | if (ra_noreg(r)) { | 
|---|
| 672 | /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ | 
|---|
| 673 | lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); | 
|---|
| 674 | snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); | 
|---|
| 675 | *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; | 
|---|
| 676 | return; | 
|---|
| 677 | } | 
|---|
| 678 | src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; | 
|---|
| 679 | #if !LJ_SOFTFP | 
|---|
| 680 | if (r >= RID_MAX_GPR) { | 
|---|
| 681 | src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; | 
|---|
| 682 | #if LJ_TARGET_PPC | 
|---|
| 683 | if (sz == 4) {  /* PPC FPRs are always doubles. */ | 
|---|
| 684 | *(float *)dst = (float)*(double *)src; | 
|---|
| 685 | return; | 
|---|
| 686 | } | 
|---|
| 687 | #else | 
|---|
| 688 | if (LJ_BE && sz == 4) src++; | 
|---|
| 689 | #endif | 
|---|
| 690 | } | 
|---|
| 691 | #endif | 
|---|
| 692 | } | 
|---|
| 693 | } | 
|---|
| 694 | lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); | 
|---|
| 695 | if (sz == 4) *(int32_t *)dst = *src; | 
|---|
| 696 | else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; | 
|---|
| 697 | else if (sz == 1) *(int8_t *)dst = (int8_t)*src; | 
|---|
| 698 | else *(int16_t *)dst = (int16_t)*src; | 
|---|
| 699 | } | 
|---|
| 700 | #endif | 
|---|
| 701 |  | 
|---|
| 702 | /* Unsink allocation from the trace exit state. Unsink sunk stores. */ | 
|---|
| 703 | static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | 
|---|
| 704 | SnapNo snapno, BloomFilter rfilt, | 
|---|
| 705 | IRIns *ir, TValue *o) | 
|---|
| 706 | { | 
|---|
| 707 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || | 
|---|
| 708 | ir->o == IR_CNEW || ir->o == IR_CNEWI); | 
|---|
| 709 | #if LJ_HASFFI | 
|---|
| 710 | if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { | 
|---|
| 711 | CTState *cts = ctype_cts(J->L); | 
|---|
| 712 | CTypeID id = (CTypeID)T->ir[ir->op1].i; | 
|---|
| 713 | CTSize sz = lj_ctype_size(cts, id); | 
|---|
| 714 | GCcdata *cd = lj_cdata_new(cts, id, sz); | 
|---|
| 715 | setcdataV(J->L, o, cd); | 
|---|
| 716 | if (ir->o == IR_CNEWI) { | 
|---|
| 717 | uint8_t *p = (uint8_t *)cdataptr(cd); | 
|---|
| 718 | lua_assert(sz == 4 || sz == 8); | 
|---|
| 719 | if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { | 
|---|
| 720 | snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); | 
|---|
| 721 | if (LJ_BE) p += 4; | 
|---|
| 722 | sz = 4; | 
|---|
| 723 | } | 
|---|
| 724 | snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); | 
|---|
| 725 | } else { | 
|---|
| 726 | IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; | 
|---|
| 727 | for (irs = ir+1; irs < irlast; irs++) | 
|---|
| 728 | if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { | 
|---|
| 729 | IRIns *iro = &T->ir[T->ir[irs->op1].op2]; | 
|---|
| 730 | uint8_t *p = (uint8_t *)cd; | 
|---|
| 731 | CTSize szs; | 
|---|
| 732 | lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); | 
|---|
| 733 | lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); | 
|---|
| 734 | if (irt_is64(irs->t)) szs = 8; | 
|---|
| 735 | else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; | 
|---|
| 736 | else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; | 
|---|
| 737 | else szs = 4; | 
|---|
| 738 | if (LJ_64 && iro->o == IR_KINT64) | 
|---|
| 739 | p += (int64_t)ir_k64(iro)->u64; | 
|---|
| 740 | else | 
|---|
| 741 | p += iro->i; | 
|---|
| 742 | lua_assert(p >= (uint8_t *)cdataptr(cd) && | 
|---|
| 743 | p + szs <= (uint8_t *)cdataptr(cd) + sz); | 
|---|
| 744 | if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { | 
|---|
| 745 | lua_assert(szs == 4); | 
|---|
| 746 | snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); | 
|---|
| 747 | if (LJ_BE) p += 4; | 
|---|
| 748 | } | 
|---|
| 749 | snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); | 
|---|
| 750 | } | 
|---|
| 751 | } | 
|---|
| 752 | } else | 
|---|
| 753 | #endif | 
|---|
| 754 | { | 
|---|
| 755 | IRIns *irs, *irlast; | 
|---|
| 756 | GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) : | 
|---|
| 757 | lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1])); | 
|---|
| 758 | settabV(J->L, o, t); | 
|---|
| 759 | irlast = &T->ir[T->snap[snapno].ref]; | 
|---|
| 760 | for (irs = ir+1; irs < irlast; irs++) | 
|---|
| 761 | if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { | 
|---|
| 762 | IRIns *irk = &T->ir[irs->op1]; | 
|---|
| 763 | TValue tmp, *val; | 
|---|
| 764 | lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || | 
|---|
| 765 | irs->o == IR_FSTORE); | 
|---|
| 766 | if (irk->o == IR_FREF) { | 
|---|
| 767 | lua_assert(irk->op2 == IRFL_TAB_META); | 
|---|
| 768 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); | 
|---|
| 769 | /* NOBARRIER: The table is new (marked white). */ | 
|---|
| 770 | setgcref(t->metatable, obj2gco(tabV(&tmp))); | 
|---|
| 771 | } else { | 
|---|
| 772 | irk = &T->ir[irk->op2]; | 
|---|
| 773 | if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1]; | 
|---|
| 774 | lj_ir_kvalue(J->L, &tmp, irk); | 
|---|
| 775 | val = lj_tab_set(J->L, t, &tmp); | 
|---|
| 776 | /* NOBARRIER: The table is new (marked white). */ | 
|---|
| 777 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); | 
|---|
| 778 | if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { | 
|---|
| 779 | snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); | 
|---|
| 780 | val->u32.hi = tmp.u32.lo; | 
|---|
| 781 | } | 
|---|
| 782 | } | 
|---|
| 783 | } | 
|---|
| 784 | } | 
|---|
| 785 | } | 
|---|
| 786 |  | 
|---|
| 787 | /* Restore interpreter state from exit state with the help of a snapshot. */ | 
|---|
| 788 | const BCIns *lj_snap_restore(jit_State *J, void *exptr) | 
|---|
| 789 | { | 
|---|
| 790 | ExitState *ex = (ExitState *)exptr; | 
|---|
| 791 | SnapNo snapno = J->exitno;  /* For now, snapno == exitno. */ | 
|---|
| 792 | GCtrace *T = traceref(J, J->parent); | 
|---|
| 793 | SnapShot *snap = &T->snap[snapno]; | 
|---|
| 794 | MSize n, nent = snap->nent; | 
|---|
| 795 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 
|---|
| 796 | SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; | 
|---|
| 797 | int32_t ftsz0; | 
|---|
| 798 | TValue *frame; | 
|---|
| 799 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 
|---|
| 800 | const BCIns *pc = snap_pc(map[nent]); | 
|---|
| 801 | lua_State *L = J->L; | 
|---|
| 802 |  | 
|---|
| 803 | /* Set interpreter PC to the next PC to get correct error messages. */ | 
|---|
| 804 | setcframe_pc(cframe_raw(L->cframe), pc+1); | 
|---|
| 805 |  | 
|---|
| 806 | /* Make sure the stack is big enough for the slots from the snapshot. */ | 
|---|
| 807 | if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) { | 
|---|
| 808 | L->top = curr_topL(L); | 
|---|
| 809 | lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize); | 
|---|
| 810 | } | 
|---|
| 811 |  | 
|---|
| 812 | /* Fill stack slots with data from the registers and spill slots. */ | 
|---|
| 813 | frame = L->base-1; | 
|---|
| 814 | ftsz0 = frame_ftsz(frame);  /* Preserve link to previous frame in slot #0. */ | 
|---|
| 815 | for (n = 0; n < nent; n++) { | 
|---|
| 816 | SnapEntry sn = map[n]; | 
|---|
| 817 | if (!(sn & SNAP_NORESTORE)) { | 
|---|
| 818 | TValue *o = &frame[snap_slot(sn)]; | 
|---|
| 819 | IRRef ref = snap_ref(sn); | 
|---|
| 820 | IRIns *ir = &T->ir[ref]; | 
|---|
| 821 | if (ir->r == RID_SUNK) { | 
|---|
| 822 | MSize j; | 
|---|
| 823 | for (j = 0; j < n; j++) | 
|---|
| 824 | if (snap_ref(map[j]) == ref) {  /* De-duplicate sunk allocations. */ | 
|---|
| 825 | copyTV(L, o, &frame[snap_slot(map[j])]); | 
|---|
| 826 | goto dupslot; | 
|---|
| 827 | } | 
|---|
| 828 | snap_unsink(J, T, ex, snapno, rfilt, ir, o); | 
|---|
| 829 | dupslot: | 
|---|
| 830 | continue; | 
|---|
| 831 | } | 
|---|
| 832 | snap_restoreval(J, T, ex, snapno, rfilt, ref, o); | 
|---|
| 833 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { | 
|---|
| 834 | TValue tmp; | 
|---|
| 835 | snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); | 
|---|
| 836 | o->u32.hi = tmp.u32.lo; | 
|---|
| 837 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 
|---|
| 838 | /* Overwrite tag with frame link. */ | 
|---|
| 839 | o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0; | 
|---|
| 840 | L->base = o+1; | 
|---|
| 841 | } | 
|---|
| 842 | } | 
|---|
| 843 | } | 
|---|
| 844 | lua_assert(map + nent == flinks); | 
|---|
| 845 |  | 
|---|
| 846 | /* Compute current stack top. */ | 
|---|
| 847 | switch (bc_op(*pc)) { | 
|---|
| 848 | default: | 
|---|
| 849 | if (bc_op(*pc) < BC_FUNCF) { | 
|---|
| 850 | L->top = curr_topL(L); | 
|---|
| 851 | break; | 
|---|
| 852 | } | 
|---|
| 853 | /* fallthrough */ | 
|---|
| 854 | case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM: | 
|---|
| 855 | L->top = frame + snap->nslots; | 
|---|
| 856 | break; | 
|---|
| 857 | } | 
|---|
| 858 | return pc; | 
|---|
| 859 | } | 
|---|
| 860 |  | 
|---|
| 861 | #undef IR | 
|---|
| 862 | #undef emitir_raw | 
|---|
| 863 | #undef emitir | 
|---|
| 864 |  | 
|---|
| 865 | #endif | 
|---|
| 866 |  | 
|---|