1 | /* |
2 | ** Snapshot handling. |
3 | ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h |
4 | */ |
5 | |
6 | #define lj_snap_c |
7 | #define LUA_CORE |
8 | |
9 | #include "lj_obj.h" |
10 | |
11 | #if LJ_HASJIT |
12 | |
13 | #include "lj_gc.h" |
14 | #include "lj_tab.h" |
15 | #include "lj_state.h" |
16 | #include "lj_frame.h" |
17 | #include "lj_bc.h" |
18 | #include "lj_ir.h" |
19 | #include "lj_jit.h" |
20 | #include "lj_iropt.h" |
21 | #include "lj_trace.h" |
22 | #include "lj_snap.h" |
23 | #include "lj_target.h" |
24 | #if LJ_HASFFI |
25 | #include "lj_ctype.h" |
26 | #include "lj_cdata.h" |
27 | #endif |
28 | |
29 | /* Some local macros to save typing. Undef'd at the end. */ |
30 | #define IR(ref) (&J->cur.ir[(ref)]) |
31 | |
32 | /* Pass IR on to next optimization in chain (FOLD). */ |
33 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) |
34 | |
35 | /* Emit raw IR without passing through optimizations. */ |
36 | #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) |
37 | |
38 | /* -- Snapshot buffer allocation ------------------------------------------ */ |
39 | |
40 | /* Grow snapshot buffer. */ |
41 | void lj_snap_grow_buf_(jit_State *J, MSize need) |
42 | { |
43 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; |
44 | if (need > maxsnap) |
45 | lj_trace_err(J, LJ_TRERR_SNAPOV); |
46 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); |
47 | J->cur.snap = J->snapbuf; |
48 | } |
49 | |
50 | /* Grow snapshot map buffer. */ |
51 | void lj_snap_grow_map_(jit_State *J, MSize need) |
52 | { |
53 | if (need < 2*J->sizesnapmap) |
54 | need = 2*J->sizesnapmap; |
55 | else if (need < 64) |
56 | need = 64; |
57 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, |
58 | J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); |
59 | J->cur.snapmap = J->snapmapbuf; |
60 | J->sizesnapmap = need; |
61 | } |
62 | |
63 | /* -- Snapshot generation ------------------------------------------------- */ |
64 | |
65 | /* Add all modified slots to the snapshot. */ |
66 | static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) |
67 | { |
68 | IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */ |
69 | BCReg s; |
70 | MSize n = 0; |
71 | for (s = 0; s < nslots; s++) { |
72 | TRef tr = J->slot[s]; |
73 | IRRef ref = tref_ref(tr); |
74 | if (ref) { |
75 | SnapEntry sn = SNAP_TR(s, tr); |
76 | IRIns *ir = IR(ref); |
77 | if (!(sn & (SNAP_CONT|SNAP_FRAME)) && |
78 | ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { |
79 | /* No need to snapshot unmodified non-inherited slots. */ |
80 | if (!(ir->op2 & IRSLOAD_INHERIT)) |
81 | continue; |
82 | /* No need to restore readonly slots and unmodified non-parent slots. */ |
83 | if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && |
84 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) |
85 | sn |= SNAP_NORESTORE; |
86 | } |
87 | if (LJ_SOFTFP && irt_isnum(ir->t)) |
88 | sn |= SNAP_SOFTFPNUM; |
89 | map[n++] = sn; |
90 | } |
91 | } |
92 | return n; |
93 | } |
94 | |
95 | /* Add frame links at the end of the snapshot. */ |
96 | static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) |
97 | { |
98 | cTValue *frame = J->L->base - 1; |
99 | cTValue *lim = J->L->base - J->baseslot; |
100 | cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; |
101 | MSize f = 0; |
102 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ |
103 | while (frame > lim) { /* Backwards traversal of all frames above base. */ |
104 | if (frame_islua(frame)) { |
105 | map[f++] = SNAP_MKPC(frame_pc(frame)); |
106 | frame = frame_prevl(frame); |
107 | if (frame + funcproto(frame_func(frame))->framesize > ftop) |
108 | ftop = frame + funcproto(frame_func(frame))->framesize; |
109 | } else if (frame_iscont(frame)) { |
110 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
111 | map[f++] = SNAP_MKPC(frame_contpc(frame)); |
112 | frame = frame_prevd(frame); |
113 | } else { |
114 | lua_assert(!frame_isc(frame)); |
115 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
116 | frame = frame_prevd(frame); |
117 | } |
118 | } |
119 | lua_assert(f == (MSize)(1 + J->framedepth)); |
120 | return (BCReg)(ftop - lim); |
121 | } |
122 | |
123 | /* Take a snapshot of the current stack. */ |
124 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) |
125 | { |
126 | BCReg nslots = J->baseslot + J->maxslot; |
127 | MSize nent; |
128 | SnapEntry *p; |
129 | /* Conservative estimate. */ |
130 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); |
131 | p = &J->cur.snapmap[nsnapmap]; |
132 | nent = snapshot_slots(J, p, nslots); |
133 | snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); |
134 | snap->mapofs = (uint16_t)nsnapmap; |
135 | snap->ref = (IRRef1)J->cur.nins; |
136 | snap->nent = (uint8_t)nent; |
137 | snap->nslots = (uint8_t)nslots; |
138 | snap->count = 0; |
139 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); |
140 | } |
141 | |
142 | /* Add or merge a snapshot. */ |
143 | void lj_snap_add(jit_State *J) |
144 | { |
145 | MSize nsnap = J->cur.nsnap; |
146 | MSize nsnapmap = J->cur.nsnapmap; |
147 | /* Merge if no ins. inbetween or if requested and no guard inbetween. */ |
148 | if (J->mergesnap ? !irt_isguard(J->guardemit) : |
149 | (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { |
150 | if (nsnap == 1) { /* But preserve snap #0 PC. */ |
151 | emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); |
152 | goto nomerge; |
153 | } |
154 | nsnapmap = J->cur.snap[--nsnap].mapofs; |
155 | } else { |
156 | nomerge: |
157 | lj_snap_grow_buf(J, nsnap+1); |
158 | J->cur.nsnap = (uint16_t)(nsnap+1); |
159 | } |
160 | J->mergesnap = 0; |
161 | J->guardemit.irt = 0; |
162 | snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap); |
163 | } |
164 | |
165 | /* -- Snapshot modification ----------------------------------------------- */ |
166 | |
167 | #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA) |
168 | |
169 | /* Find unused slots with reaching-definitions bytecode data-flow analysis. */ |
170 | static BCReg snap_usedef(jit_State *J, uint8_t *udf, |
171 | const BCIns *pc, BCReg maxslot) |
172 | { |
173 | BCReg s; |
174 | GCobj *o; |
175 | |
176 | if (maxslot == 0) return 0; |
177 | #ifdef LUAJIT_USE_VALGRIND |
178 | /* Avoid errors for harmless reads beyond maxslot. */ |
179 | memset(udf, 1, SNAP_USEDEF_SLOTS); |
180 | #else |
181 | memset(udf, 1, maxslot); |
182 | #endif |
183 | |
184 | /* Treat open upvalues as used. */ |
185 | o = gcref(J->L->openupval); |
186 | while (o) { |
187 | if (uvval(gco2uv(o)) < J->L->base) break; |
188 | udf[uvval(gco2uv(o)) - J->L->base] = 0; |
189 | o = gcref(o->gch.nextgc); |
190 | } |
191 | |
192 | #define USE_SLOT(s) udf[(s)] &= ~1 |
193 | #define DEF_SLOT(s) udf[(s)] *= 3 |
194 | |
195 | /* Scan through following bytecode and check for uses/defs. */ |
196 | lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); |
197 | for (;;) { |
198 | BCIns ins = *pc++; |
199 | BCOp op = bc_op(ins); |
200 | switch (bcmode_b(op)) { |
201 | case BCMvar: USE_SLOT(bc_b(ins)); break; |
202 | default: break; |
203 | } |
204 | switch (bcmode_c(op)) { |
205 | case BCMvar: USE_SLOT(bc_c(ins)); break; |
206 | case BCMrbase: |
207 | lua_assert(op == BC_CAT); |
208 | for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); |
209 | for (; s < maxslot; s++) DEF_SLOT(s); |
210 | break; |
211 | case BCMjump: |
212 | handle_jump: { |
213 | BCReg minslot = bc_a(ins); |
214 | if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT; |
215 | else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1; |
216 | else if (op == BC_UCLO) { pc += bc_j(ins); break; } |
217 | for (s = minslot; s < maxslot; s++) DEF_SLOT(s); |
218 | return minslot < maxslot ? minslot : maxslot; |
219 | } |
220 | case BCMlit: |
221 | if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { |
222 | goto handle_jump; |
223 | } else if (bc_isret(op)) { |
224 | BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1); |
225 | for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); |
226 | for (; s < top; s++) USE_SLOT(s); |
227 | for (; s < maxslot; s++) DEF_SLOT(s); |
228 | return 0; |
229 | } |
230 | break; |
231 | case BCMfunc: return maxslot; /* NYI: will abort, anyway. */ |
232 | default: break; |
233 | } |
234 | switch (bcmode_a(op)) { |
235 | case BCMvar: USE_SLOT(bc_a(ins)); break; |
236 | case BCMdst: |
237 | if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins)); |
238 | break; |
239 | case BCMbase: |
240 | if (op >= BC_CALLM && op <= BC_VARG) { |
241 | BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? |
242 | maxslot : (bc_a(ins) + bc_c(ins)); |
243 | s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); |
244 | for (; s < top; s++) USE_SLOT(s); |
245 | for (; s < maxslot; s++) DEF_SLOT(s); |
246 | if (op == BC_CALLT || op == BC_CALLMT) { |
247 | for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); |
248 | return 0; |
249 | } |
250 | } else if (op == BC_KNIL) { |
251 | for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s); |
252 | } else if (op == BC_TSETM) { |
253 | for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s); |
254 | } |
255 | break; |
256 | default: break; |
257 | } |
258 | lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); |
259 | } |
260 | |
261 | #undef USE_SLOT |
262 | #undef DEF_SLOT |
263 | |
264 | return 0; /* unreachable */ |
265 | } |
266 | |
267 | /* Purge dead slots before the next snapshot. */ |
268 | void lj_snap_purge(jit_State *J) |
269 | { |
270 | uint8_t udf[SNAP_USEDEF_SLOTS]; |
271 | BCReg maxslot = J->maxslot; |
272 | BCReg s = snap_usedef(J, udf, J->pc, maxslot); |
273 | for (; s < maxslot; s++) |
274 | if (udf[s] != 0) |
275 | J->base[s] = 0; /* Purge dead slots. */ |
276 | } |
277 | |
278 | /* Shrink last snapshot. */ |
279 | void lj_snap_shrink(jit_State *J) |
280 | { |
281 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
282 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; |
283 | MSize n, m, nlim, nent = snap->nent; |
284 | uint8_t udf[SNAP_USEDEF_SLOTS]; |
285 | BCReg maxslot = J->maxslot; |
286 | BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); |
287 | BCReg baseslot = J->baseslot; |
288 | maxslot += baseslot; |
289 | minslot += baseslot; |
290 | snap->nslots = (uint8_t)maxslot; |
291 | for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */ |
292 | BCReg s = snap_slot(map[n]); |
293 | if (s < minslot || (s < maxslot && udf[s-baseslot] == 0)) |
294 | map[m++] = map[n]; /* Only copy used slots. */ |
295 | } |
296 | snap->nent = (uint8_t)m; |
297 | nlim = J->cur.nsnapmap - snap->mapofs - 1; |
298 | while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */ |
299 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */ |
300 | } |
301 | |
302 | /* -- Snapshot access ----------------------------------------------------- */ |
303 | |
304 | /* Initialize a Bloom Filter with all renamed refs. |
305 | ** There are very few renames (often none), so the filter has |
306 | ** very few bits set. This makes it suitable for negative filtering. |
307 | */ |
308 | static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim) |
309 | { |
310 | BloomFilter rfilt = 0; |
311 | IRIns *ir; |
312 | for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) |
313 | if (ir->op2 <= lim) |
314 | bloomset(rfilt, ir->op1); |
315 | return rfilt; |
316 | } |
317 | |
318 | /* Process matching renames to find the original RegSP. */ |
319 | static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) |
320 | { |
321 | IRIns *ir; |
322 | for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) |
323 | if (ir->op1 == ref && ir->op2 <= lim) |
324 | rs = ir->prev; |
325 | return rs; |
326 | } |
327 | |
328 | /* Copy RegSP from parent snapshot to the parent links of the IR. */ |
329 | IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) |
330 | { |
331 | SnapShot *snap = &T->snap[snapno]; |
332 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
333 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
334 | MSize n = 0; |
335 | IRRef ref = 0; |
336 | for ( ; ; ir++) { |
337 | uint32_t rs; |
338 | if (ir->o == IR_SLOAD) { |
339 | if (!(ir->op2 & IRSLOAD_PARENT)) break; |
340 | for ( ; ; n++) { |
341 | lua_assert(n < snap->nent); |
342 | if (snap_slot(map[n]) == ir->op1) { |
343 | ref = snap_ref(map[n++]); |
344 | break; |
345 | } |
346 | } |
347 | } else if (LJ_SOFTFP && ir->o == IR_HIOP) { |
348 | ref++; |
349 | } else if (ir->o == IR_PVAL) { |
350 | ref = ir->op1 + REF_BIAS; |
351 | } else { |
352 | break; |
353 | } |
354 | rs = T->ir[ref].prev; |
355 | if (bloomtest(rfilt, ref)) |
356 | rs = snap_renameref(T, snapno, ref, rs); |
357 | ir->prev = (uint16_t)rs; |
358 | lua_assert(regsp_used(rs)); |
359 | } |
360 | return ir; |
361 | } |
362 | |
363 | /* -- Snapshot replay ----------------------------------------------------- */ |
364 | |
365 | /* Replay constant from parent trace. */ |
366 | static TRef snap_replay_const(jit_State *J, IRIns *ir) |
367 | { |
368 | /* Only have to deal with constants that can occur in stack slots. */ |
369 | switch ((IROp)ir->o) { |
370 | case IR_KPRI: return TREF_PRI(irt_type(ir->t)); |
371 | case IR_KINT: return lj_ir_kint(J, ir->i); |
372 | case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); |
373 | case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); |
374 | case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); |
375 | case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ |
376 | default: lua_assert(0); return TREF_NIL; break; |
377 | } |
378 | } |
379 | |
380 | /* De-duplicate parent reference. */ |
381 | static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) |
382 | { |
383 | MSize j; |
384 | for (j = 0; j < nmax; j++) |
385 | if (snap_ref(map[j]) == ref) |
386 | return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); |
387 | return 0; |
388 | } |
389 | |
390 | /* Emit parent reference with de-duplication. */ |
391 | static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax, |
392 | BloomFilter seen, IRRef ref) |
393 | { |
394 | IRIns *ir = &T->ir[ref]; |
395 | TRef tr; |
396 | if (irref_isk(ref)) |
397 | tr = snap_replay_const(J, ir); |
398 | else if (!regsp_used(ir->prev)) |
399 | tr = 0; |
400 | else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0) |
401 | tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0); |
402 | return tr; |
403 | } |
404 | |
405 | /* Check whether a sunk store corresponds to an allocation. Slow path. */ |
406 | static int snap_sunk_store2(jit_State *J, IRIns *ira, IRIns *irs) |
407 | { |
408 | if (irs->o == IR_ASTORE || irs->o == IR_HSTORE || |
409 | irs->o == IR_FSTORE || irs->o == IR_XSTORE) { |
410 | IRIns *irk = IR(irs->op1); |
411 | if (irk->o == IR_AREF || irk->o == IR_HREFK) |
412 | irk = IR(irk->op1); |
413 | return (IR(irk->op1) == ira); |
414 | } |
415 | return 0; |
416 | } |
417 | |
418 | /* Check whether a sunk store corresponds to an allocation. Fast path. */ |
419 | static LJ_AINLINE int snap_sunk_store(jit_State *J, IRIns *ira, IRIns *irs) |
420 | { |
421 | if (irs->s != 255) |
422 | return (ira + irs->s == irs); /* Fast check. */ |
423 | return snap_sunk_store2(J, ira, irs); |
424 | } |
425 | |
426 | /* Replay snapshot state to setup side trace. */ |
427 | void lj_snap_replay(jit_State *J, GCtrace *T) |
428 | { |
429 | SnapShot *snap = &T->snap[J->exitno]; |
430 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
431 | MSize n, nent = snap->nent; |
432 | BloomFilter seen = 0; |
433 | int pass23 = 0; |
434 | J->framedepth = 0; |
435 | /* Emit IR for slots inherited from parent snapshot. */ |
436 | for (n = 0; n < nent; n++) { |
437 | SnapEntry sn = map[n]; |
438 | BCReg s = snap_slot(sn); |
439 | IRRef ref = snap_ref(sn); |
440 | IRIns *ir = &T->ir[ref]; |
441 | TRef tr; |
442 | /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ |
443 | if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0) |
444 | goto setslot; |
445 | bloomset(seen, ref); |
446 | if (irref_isk(ref)) { |
447 | tr = snap_replay_const(J, ir); |
448 | } else if (!regsp_used(ir->prev)) { |
449 | pass23 = 1; |
450 | lua_assert(s != 0); |
451 | tr = s; |
452 | } else { |
453 | IRType t = irt_type(ir->t); |
454 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; |
455 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; |
456 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); |
457 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); |
458 | } |
459 | setslot: |
460 | J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ |
461 | J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); |
462 | if ((sn & SNAP_FRAME)) |
463 | J->baseslot = s+1; |
464 | } |
465 | if (pass23) { |
466 | IRIns *irlast = &T->ir[snap->ref]; |
467 | pass23 = 0; |
468 | /* Emit dependent PVALs. */ |
469 | for (n = 0; n < nent; n++) { |
470 | SnapEntry sn = map[n]; |
471 | IRRef refp = snap_ref(sn); |
472 | IRIns *ir = &T->ir[refp]; |
473 | if (regsp_reg(ir->r) == RID_SUNK) { |
474 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; |
475 | pass23 = 1; |
476 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || |
477 | ir->o == IR_CNEW || ir->o == IR_CNEWI); |
478 | if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); |
479 | if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); |
480 | if (LJ_HASFFI && ir->o == IR_CNEWI) { |
481 | if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) |
482 | snap_pref(J, T, map, nent, seen, (ir+1)->op2); |
483 | } else { |
484 | IRIns *irs; |
485 | for (irs = ir+1; irs < irlast; irs++) |
486 | if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { |
487 | if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) |
488 | snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); |
489 | else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && |
490 | irs+1 < irlast && (irs+1)->o == IR_HIOP) |
491 | snap_pref(J, T, map, nent, seen, (irs+1)->op2); |
492 | } |
493 | } |
494 | } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { |
495 | lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); |
496 | J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); |
497 | } |
498 | } |
499 | /* Replay sunk instructions. */ |
500 | for (n = 0; pass23 && n < nent; n++) { |
501 | SnapEntry sn = map[n]; |
502 | IRRef refp = snap_ref(sn); |
503 | IRIns *ir = &T->ir[refp]; |
504 | if (regsp_reg(ir->r) == RID_SUNK) { |
505 | TRef op1, op2; |
506 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */ |
507 | J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; |
508 | continue; |
509 | } |
510 | op1 = ir->op1; |
511 | if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); |
512 | op2 = ir->op2; |
513 | if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); |
514 | if (LJ_HASFFI && ir->o == IR_CNEWI) { |
515 | if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { |
516 | lj_needsplit(J); /* Emit joining HIOP. */ |
517 | op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, |
518 | snap_pref(J, T, map, nent, seen, (ir+1)->op2)); |
519 | } |
520 | J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2); |
521 | } else { |
522 | IRIns *irs; |
523 | TRef tr = emitir(ir->ot, op1, op2); |
524 | J->slot[snap_slot(sn)] = tr; |
525 | for (irs = ir+1; irs < irlast; irs++) |
526 | if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { |
527 | IRIns *irr = &T->ir[irs->op1]; |
528 | TRef val, key = irr->op2, tmp = tr; |
529 | if (irr->o != IR_FREF) { |
530 | IRIns *irk = &T->ir[key]; |
531 | if (irr->o == IR_HREFK) |
532 | key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]), |
533 | irk->op2); |
534 | else |
535 | key = snap_replay_const(J, irk); |
536 | if (irr->o == IR_HREFK || irr->o == IR_AREF) { |
537 | IRIns *irf = &T->ir[irr->op1]; |
538 | tmp = emitir(irf->ot, tmp, irf->op2); |
539 | } |
540 | } |
541 | tmp = emitir(irr->ot, tmp, key); |
542 | val = snap_pref(J, T, map, nent, seen, irs->op2); |
543 | if (val == 0) { |
544 | IRIns *irc = &T->ir[irs->op2]; |
545 | lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); |
546 | val = snap_pref(J, T, map, nent, seen, irc->op1); |
547 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); |
548 | } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && |
549 | irs+1 < irlast && (irs+1)->o == IR_HIOP) { |
550 | IRType t = IRT_I64; |
551 | if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) |
552 | t = IRT_NUM; |
553 | lj_needsplit(J); |
554 | if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { |
555 | uint64_t k = (uint32_t)T->ir[irs->op2].i + |
556 | ((uint64_t)T->ir[(irs+1)->op2].i << 32); |
557 | val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, |
558 | lj_ir_k64_find(J, k)); |
559 | } else { |
560 | val = emitir_raw(IRT(IR_HIOP, t), val, |
561 | snap_pref(J, T, map, nent, seen, (irs+1)->op2)); |
562 | } |
563 | tmp = emitir(IRT(irs->o, t), tmp, val); |
564 | continue; |
565 | } |
566 | tmp = emitir(irs->ot, tmp, val); |
567 | } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) { |
568 | emitir(IRT(IR_XBAR, IRT_NIL), 0, 0); |
569 | } |
570 | } |
571 | } |
572 | } |
573 | } |
574 | J->base = J->slot + J->baseslot; |
575 | J->maxslot = snap->nslots - J->baseslot; |
576 | lj_snap_add(J); |
577 | if (pass23) /* Need explicit GC step _after_ initial snapshot. */ |
578 | emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0); |
579 | } |
580 | |
581 | /* -- Snapshot restore ---------------------------------------------------- */ |
582 | |
583 | static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, |
584 | SnapNo snapno, BloomFilter rfilt, |
585 | IRIns *ir, TValue *o); |
586 | |
587 | /* Restore a value from the trace exit state. */ |
588 | static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, |
589 | SnapNo snapno, BloomFilter rfilt, |
590 | IRRef ref, TValue *o) |
591 | { |
592 | IRIns *ir = &T->ir[ref]; |
593 | IRType1 t = ir->t; |
594 | RegSP rs = ir->prev; |
595 | if (irref_isk(ref)) { /* Restore constant slot. */ |
596 | lj_ir_kvalue(J->L, o, ir); |
597 | return; |
598 | } |
599 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) |
600 | rs = snap_renameref(T, snapno, ref, rs); |
601 | if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ |
602 | int32_t *sps = &ex->spill[regsp_spill(rs)]; |
603 | if (irt_isinteger(t)) { |
604 | setintV(o, *sps); |
605 | #if !LJ_SOFTFP |
606 | } else if (irt_isnum(t)) { |
607 | o->u64 = *(uint64_t *)sps; |
608 | #endif |
609 | } else if (LJ_64 && irt_islightud(t)) { |
610 | /* 64 bit lightuserdata which may escape already has the tag bits. */ |
611 | o->u64 = *(uint64_t *)sps; |
612 | } else { |
613 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ |
614 | setgcrefi(o->gcr, *sps); |
615 | setitype(o, irt_toitype(t)); |
616 | } |
617 | } else { /* Restore from register. */ |
618 | Reg r = regsp_reg(rs); |
619 | if (ra_noreg(r)) { |
620 | lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); |
621 | snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); |
622 | if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); |
623 | return; |
624 | } else if (irt_isinteger(t)) { |
625 | setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); |
626 | #if !LJ_SOFTFP |
627 | } else if (irt_isnum(t)) { |
628 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); |
629 | #endif |
630 | } else if (LJ_64 && irt_islightud(t)) { |
631 | /* 64 bit lightuserdata which may escape already has the tag bits. */ |
632 | o->u64 = ex->gpr[r-RID_MIN_GPR]; |
633 | } else { |
634 | if (!irt_ispri(t)) |
635 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); |
636 | setitype(o, irt_toitype(t)); |
637 | } |
638 | } |
639 | } |
640 | |
641 | #if LJ_HASFFI |
642 | /* Restore raw data from the trace exit state. */ |
643 | static void snap_restoredata(GCtrace *T, ExitState *ex, |
644 | SnapNo snapno, BloomFilter rfilt, |
645 | IRRef ref, void *dst, CTSize sz) |
646 | { |
647 | IRIns *ir = &T->ir[ref]; |
648 | RegSP rs = ir->prev; |
649 | int32_t *src; |
650 | uint64_t tmp; |
651 | if (irref_isk(ref)) { |
652 | if (ir->o == IR_KNUM || ir->o == IR_KINT64) { |
653 | src = mref(ir->ptr, int32_t); |
654 | } else if (sz == 8) { |
655 | tmp = (uint64_t)(uint32_t)ir->i; |
656 | src = (int32_t *)&tmp; |
657 | } else { |
658 | src = &ir->i; |
659 | } |
660 | } else { |
661 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) |
662 | rs = snap_renameref(T, snapno, ref, rs); |
663 | if (ra_hasspill(regsp_spill(rs))) { |
664 | src = &ex->spill[regsp_spill(rs)]; |
665 | if (sz == 8 && !irt_is64(ir->t)) { |
666 | tmp = (uint64_t)(uint32_t)*src; |
667 | src = (int32_t *)&tmp; |
668 | } |
669 | } else { |
670 | Reg r = regsp_reg(rs); |
671 | if (ra_noreg(r)) { |
672 | /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ |
673 | lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); |
674 | snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); |
675 | *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; |
676 | return; |
677 | } |
678 | src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; |
679 | #if !LJ_SOFTFP |
680 | if (r >= RID_MAX_GPR) { |
681 | src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; |
682 | #if LJ_TARGET_PPC |
683 | if (sz == 4) { /* PPC FPRs are always doubles. */ |
684 | *(float *)dst = (float)*(double *)src; |
685 | return; |
686 | } |
687 | #else |
688 | if (LJ_BE && sz == 4) src++; |
689 | #endif |
690 | } |
691 | #endif |
692 | } |
693 | } |
694 | lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); |
695 | if (sz == 4) *(int32_t *)dst = *src; |
696 | else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; |
697 | else if (sz == 1) *(int8_t *)dst = (int8_t)*src; |
698 | else *(int16_t *)dst = (int16_t)*src; |
699 | } |
700 | #endif |
701 | |
702 | /* Unsink allocation from the trace exit state. Unsink sunk stores. */ |
703 | static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, |
704 | SnapNo snapno, BloomFilter rfilt, |
705 | IRIns *ir, TValue *o) |
706 | { |
707 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || |
708 | ir->o == IR_CNEW || ir->o == IR_CNEWI); |
709 | #if LJ_HASFFI |
710 | if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { |
711 | CTState *cts = ctype_cts(J->L); |
712 | CTypeID id = (CTypeID)T->ir[ir->op1].i; |
713 | CTSize sz = lj_ctype_size(cts, id); |
714 | GCcdata *cd = lj_cdata_new(cts, id, sz); |
715 | setcdataV(J->L, o, cd); |
716 | if (ir->o == IR_CNEWI) { |
717 | uint8_t *p = (uint8_t *)cdataptr(cd); |
718 | lua_assert(sz == 4 || sz == 8); |
719 | if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { |
720 | snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); |
721 | if (LJ_BE) p += 4; |
722 | sz = 4; |
723 | } |
724 | snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); |
725 | } else { |
726 | IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; |
727 | for (irs = ir+1; irs < irlast; irs++) |
728 | if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { |
729 | IRIns *iro = &T->ir[T->ir[irs->op1].op2]; |
730 | uint8_t *p = (uint8_t *)cd; |
731 | CTSize szs; |
732 | lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); |
733 | lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); |
734 | if (irt_is64(irs->t)) szs = 8; |
735 | else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; |
736 | else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; |
737 | else szs = 4; |
738 | if (LJ_64 && iro->o == IR_KINT64) |
739 | p += (int64_t)ir_k64(iro)->u64; |
740 | else |
741 | p += iro->i; |
742 | lua_assert(p >= (uint8_t *)cdataptr(cd) && |
743 | p + szs <= (uint8_t *)cdataptr(cd) + sz); |
744 | if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { |
745 | lua_assert(szs == 4); |
746 | snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); |
747 | if (LJ_BE) p += 4; |
748 | } |
749 | snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); |
750 | } |
751 | } |
752 | } else |
753 | #endif |
754 | { |
755 | IRIns *irs, *irlast; |
756 | GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) : |
757 | lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1])); |
758 | settabV(J->L, o, t); |
759 | irlast = &T->ir[T->snap[snapno].ref]; |
760 | for (irs = ir+1; irs < irlast; irs++) |
761 | if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) { |
762 | IRIns *irk = &T->ir[irs->op1]; |
763 | TValue tmp, *val; |
764 | lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || |
765 | irs->o == IR_FSTORE); |
766 | if (irk->o == IR_FREF) { |
767 | lua_assert(irk->op2 == IRFL_TAB_META); |
768 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); |
769 | /* NOBARRIER: The table is new (marked white). */ |
770 | setgcref(t->metatable, obj2gco(tabV(&tmp))); |
771 | } else { |
772 | irk = &T->ir[irk->op2]; |
773 | if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1]; |
774 | lj_ir_kvalue(J->L, &tmp, irk); |
775 | val = lj_tab_set(J->L, t, &tmp); |
776 | /* NOBARRIER: The table is new (marked white). */ |
777 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); |
778 | if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { |
779 | snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); |
780 | val->u32.hi = tmp.u32.lo; |
781 | } |
782 | } |
783 | } |
784 | } |
785 | } |
786 | |
787 | /* Restore interpreter state from exit state with the help of a snapshot. */ |
788 | const BCIns *lj_snap_restore(jit_State *J, void *exptr) |
789 | { |
790 | ExitState *ex = (ExitState *)exptr; |
791 | SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ |
792 | GCtrace *T = traceref(J, J->parent); |
793 | SnapShot *snap = &T->snap[snapno]; |
794 | MSize n, nent = snap->nent; |
795 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
796 | SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; |
797 | int32_t ftsz0; |
798 | TValue *frame; |
799 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
800 | const BCIns *pc = snap_pc(map[nent]); |
801 | lua_State *L = J->L; |
802 | |
803 | /* Set interpreter PC to the next PC to get correct error messages. */ |
804 | setcframe_pc(cframe_raw(L->cframe), pc+1); |
805 | |
806 | /* Make sure the stack is big enough for the slots from the snapshot. */ |
807 | if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) { |
808 | L->top = curr_topL(L); |
809 | lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize); |
810 | } |
811 | |
812 | /* Fill stack slots with data from the registers and spill slots. */ |
813 | frame = L->base-1; |
814 | ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ |
815 | for (n = 0; n < nent; n++) { |
816 | SnapEntry sn = map[n]; |
817 | if (!(sn & SNAP_NORESTORE)) { |
818 | TValue *o = &frame[snap_slot(sn)]; |
819 | IRRef ref = snap_ref(sn); |
820 | IRIns *ir = &T->ir[ref]; |
821 | if (ir->r == RID_SUNK) { |
822 | MSize j; |
823 | for (j = 0; j < n; j++) |
824 | if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */ |
825 | copyTV(L, o, &frame[snap_slot(map[j])]); |
826 | goto dupslot; |
827 | } |
828 | snap_unsink(J, T, ex, snapno, rfilt, ir, o); |
829 | dupslot: |
830 | continue; |
831 | } |
832 | snap_restoreval(J, T, ex, snapno, rfilt, ref, o); |
833 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { |
834 | TValue tmp; |
835 | snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); |
836 | o->u32.hi = tmp.u32.lo; |
837 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
838 | /* Overwrite tag with frame link. */ |
839 | o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0; |
840 | L->base = o+1; |
841 | } |
842 | } |
843 | } |
844 | lua_assert(map + nent == flinks); |
845 | |
846 | /* Compute current stack top. */ |
847 | switch (bc_op(*pc)) { |
848 | default: |
849 | if (bc_op(*pc) < BC_FUNCF) { |
850 | L->top = curr_topL(L); |
851 | break; |
852 | } |
853 | /* fallthrough */ |
854 | case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM: |
855 | L->top = frame + snap->nslots; |
856 | break; |
857 | } |
858 | return pc; |
859 | } |
860 | |
861 | #undef IR |
862 | #undef emitir_raw |
863 | #undef emitir |
864 | |
865 | #endif |
866 | |