1/*
2** Snapshot handling.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_snap_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_gc.h"
14#include "lj_tab.h"
15#include "lj_state.h"
16#include "lj_frame.h"
17#include "lj_bc.h"
18#include "lj_ir.h"
19#include "lj_jit.h"
20#include "lj_iropt.h"
21#include "lj_trace.h"
22#include "lj_snap.h"
23#include "lj_target.h"
24#if LJ_HASFFI
25#include "lj_ctype.h"
26#include "lj_cdata.h"
27#endif
28
29/* Pass IR on to next optimization in chain (FOLD). */
30#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
31
32/* Emit raw IR without passing through optimizations. */
33#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
34
35/* -- Snapshot buffer allocation ------------------------------------------ */
36
37/* Grow snapshot buffer. */
38void lj_snap_grow_buf_(jit_State *J, MSize need)
39{
40 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
41 if (need > maxsnap)
42 lj_trace_err(J, LJ_TRERR_SNAPOV);
43 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
44 J->cur.snap = J->snapbuf;
45}
46
47/* Grow snapshot map buffer. */
48void lj_snap_grow_map_(jit_State *J, MSize need)
49{
50 if (need < 2*J->sizesnapmap)
51 need = 2*J->sizesnapmap;
52 else if (need < 64)
53 need = 64;
54 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
55 J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
56 J->cur.snapmap = J->snapmapbuf;
57 J->sizesnapmap = need;
58}
59
60/* -- Snapshot generation ------------------------------------------------- */
61
62/* Add all modified slots to the snapshot. */
63static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
64{
65 IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */
66 BCReg s;
67 MSize n = 0;
68 for (s = 0; s < nslots; s++) {
69 TRef tr = J->slot[s];
70 IRRef ref = tref_ref(tr);
71#if LJ_FR2
72 if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */
73 if ((tr & TREF_FRAME))
74 map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL);
75 continue;
76 }
77 if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
78 cTValue *base = J->L->base - J->baseslot;
79 tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
80 ref = tref_ref(tr);
81 }
82#endif
83 if (ref) {
84 SnapEntry sn = SNAP_TR(s, tr);
85 IRIns *ir = &J->cur.ir[ref];
86 if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
87 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
88 /*
89 ** No need to snapshot unmodified non-inherited slots.
90 ** But always snapshot the function below a frame in LJ_FR2 mode.
91 */
92 if (!(ir->op2 & IRSLOAD_INHERIT) &&
93 (!LJ_FR2 || s == 0 || s+1 == nslots ||
94 !(J->slot[s+1] & (TREF_CONT|TREF_FRAME))))
95 continue;
96 /* No need to restore readonly slots and unmodified non-parent slots. */
97 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
98 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
99 sn |= SNAP_NORESTORE;
100 }
101 if (LJ_SOFTFP32 && irt_isnum(ir->t))
102 sn |= SNAP_SOFTFPNUM;
103 map[n++] = sn;
104 }
105 }
106 return n;
107}
108
109/* Add frame links at the end of the snapshot. */
110static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
111{
112 cTValue *frame = J->L->base - 1;
113 cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
114 GCfunc *fn = frame_func(frame);
115 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
116#if LJ_FR2
117 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
118 lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot");
119 memcpy(map, &pcbase, sizeof(uint64_t));
120#else
121 MSize f = 0;
122 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
123#endif
124 lj_assertJ(!J->pt ||
125 (J->pc >= proto_bc(J->pt) &&
126 J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC");
127 while (frame > lim) { /* Backwards traversal of all frames above base. */
128 if (frame_islua(frame)) {
129#if !LJ_FR2
130 map[f++] = SNAP_MKPC(frame_pc(frame));
131#endif
132 frame = frame_prevl(frame);
133 } else if (frame_iscont(frame)) {
134#if !LJ_FR2
135 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
136 map[f++] = SNAP_MKPC(frame_contpc(frame));
137#endif
138 frame = frame_prevd(frame);
139 } else {
140 lj_assertJ(!frame_isc(frame), "broken frame chain");
141#if !LJ_FR2
142 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
143#endif
144 frame = frame_prevd(frame);
145 continue;
146 }
147 if (frame + funcproto(frame_func(frame))->framesize > ftop)
148 ftop = frame + funcproto(frame_func(frame))->framesize;
149 }
150 *topslot = (uint8_t)(ftop - lim);
151#if LJ_FR2
152 lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def");
153 return 2;
154#else
155 lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size");
156 return f;
157#endif
158}
159
160/* Take a snapshot of the current stack. */
161static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
162{
163 BCReg nslots = J->baseslot + J->maxslot;
164 MSize nent;
165 SnapEntry *p;
166 /* Conservative estimate. */
167 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
168 p = &J->cur.snapmap[nsnapmap];
169 nent = snapshot_slots(J, p, nslots);
170 snap->nent = (uint8_t)nent;
171 nent += snapshot_framelinks(J, p + nent, &snap->topslot);
172 snap->mapofs = (uint32_t)nsnapmap;
173 snap->ref = (IRRef1)J->cur.nins;
174 snap->mcofs = 0;
175 snap->nslots = (uint8_t)nslots;
176 snap->count = 0;
177 J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
178}
179
180/* Add or merge a snapshot. */
181void lj_snap_add(jit_State *J)
182{
183 MSize nsnap = J->cur.nsnap;
184 MSize nsnapmap = J->cur.nsnapmap;
185 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
186 if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) ||
187 (J->mergesnap && !irt_isguard(J->guardemit))) {
188 if (nsnap == 1) { /* But preserve snap #0 PC. */
189 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
190 goto nomerge;
191 }
192 nsnapmap = J->cur.snap[--nsnap].mapofs;
193 } else {
194 nomerge:
195 lj_snap_grow_buf(J, nsnap+1);
196 J->cur.nsnap = (uint16_t)(nsnap+1);
197 }
198 J->mergesnap = 0;
199 J->guardemit.irt = 0;
200 snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
201}
202
203/* -- Snapshot modification ----------------------------------------------- */
204
205#define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
206
207/* Find unused slots with reaching-definitions bytecode data-flow analysis. */
208static BCReg snap_usedef(jit_State *J, uint8_t *udf,
209 const BCIns *pc, BCReg maxslot)
210{
211 BCReg s;
212 GCobj *o;
213
214 if (maxslot == 0) return 0;
215#ifdef LUAJIT_USE_VALGRIND
216 /* Avoid errors for harmless reads beyond maxslot. */
217 memset(udf, 1, SNAP_USEDEF_SLOTS);
218#else
219 memset(udf, 1, maxslot);
220#endif
221
222 /* Treat open upvalues as used. */
223 o = gcref(J->L->openupval);
224 while (o) {
225 if (uvval(gco2uv(o)) < J->L->base) break;
226 udf[uvval(gco2uv(o)) - J->L->base] = 0;
227 o = gcref(o->gch.nextgc);
228 }
229
230#define USE_SLOT(s) udf[(s)] &= ~1
231#define DEF_SLOT(s) udf[(s)] *= 3
232
233 /* Scan through following bytecode and check for uses/defs. */
234 lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
235 "snapshot PC out of range");
236 for (;;) {
237 BCIns ins = *pc++;
238 BCOp op = bc_op(ins);
239 switch (bcmode_b(op)) {
240 case BCMvar: USE_SLOT(bc_b(ins)); break;
241 default: break;
242 }
243 switch (bcmode_c(op)) {
244 case BCMvar: USE_SLOT(bc_c(ins)); break;
245 case BCMrbase:
246 lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op);
247 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
248 for (; s < maxslot; s++) DEF_SLOT(s);
249 break;
250 case BCMjump:
251 handle_jump: {
252 BCReg minslot = bc_a(ins);
253 if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
254 else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
255 else if (op == BC_UCLO) { pc += bc_j(ins); break; }
256 for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
257 return minslot < maxslot ? minslot : maxslot;
258 }
259 case BCMlit:
260 if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
261 goto handle_jump;
262 } else if (bc_isret(op)) {
263 BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
264 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
265 for (; s < top; s++) USE_SLOT(s);
266 for (; s < maxslot; s++) DEF_SLOT(s);
267 return 0;
268 }
269 break;
270 case BCMfunc: return maxslot; /* NYI: will abort, anyway. */
271 default: break;
272 }
273 switch (bcmode_a(op)) {
274 case BCMvar: USE_SLOT(bc_a(ins)); break;
275 case BCMdst:
276 if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
277 break;
278 case BCMbase:
279 if (op >= BC_CALLM && op <= BC_VARG) {
280 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
281 maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
282 if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
283 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
284 for (; s < top; s++) USE_SLOT(s);
285 for (; s < maxslot; s++) DEF_SLOT(s);
286 if (op == BC_CALLT || op == BC_CALLMT) {
287 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
288 return 0;
289 }
290 } else if (op == BC_KNIL) {
291 for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
292 } else if (op == BC_TSETM) {
293 for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
294 }
295 break;
296 default: break;
297 }
298 lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
299 "use/def analysis PC out of range");
300 }
301
302#undef USE_SLOT
303#undef DEF_SLOT
304
305 return 0; /* unreachable */
306}
307
308/* Purge dead slots before the next snapshot. */
309void lj_snap_purge(jit_State *J)
310{
311 uint8_t udf[SNAP_USEDEF_SLOTS];
312 BCReg maxslot = J->maxslot;
313 BCReg s = snap_usedef(J, udf, J->pc, maxslot);
314 for (; s < maxslot; s++)
315 if (udf[s] != 0)
316 J->base[s] = 0; /* Purge dead slots. */
317}
318
319/* Shrink last snapshot. */
320void lj_snap_shrink(jit_State *J)
321{
322 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
323 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
324 MSize n, m, nlim, nent = snap->nent;
325 uint8_t udf[SNAP_USEDEF_SLOTS];
326 BCReg maxslot = J->maxslot;
327 BCReg baseslot = J->baseslot;
328 BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
329 maxslot += baseslot;
330 minslot += baseslot;
331 snap->nslots = (uint8_t)maxslot;
332 for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */
333 BCReg s = snap_slot(map[n]);
334 if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
335 map[m++] = map[n]; /* Only copy used slots. */
336 }
337 snap->nent = (uint8_t)m;
338 nlim = J->cur.nsnapmap - snap->mapofs - 1;
339 while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */
340 J->cur.nsnapmap = (uint32_t)(snap->mapofs + m); /* Free up space in map. */
341}
342
343/* -- Snapshot access ----------------------------------------------------- */
344
345/* Initialize a Bloom Filter with all renamed refs.
346** There are very few renames (often none), so the filter has
347** very few bits set. This makes it suitable for negative filtering.
348*/
349static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
350{
351 BloomFilter rfilt = 0;
352 IRIns *ir;
353 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
354 if (ir->op2 <= lim)
355 bloomset(rfilt, ir->op1);
356 return rfilt;
357}
358
359/* Process matching renames to find the original RegSP. */
360static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
361{
362 IRIns *ir;
363 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
364 if (ir->op1 == ref && ir->op2 <= lim)
365 rs = ir->prev;
366 return rs;
367}
368
369/* Copy RegSP from parent snapshot to the parent links of the IR. */
370IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir)
371{
372 SnapShot *snap = &T->snap[snapno];
373 SnapEntry *map = &T->snapmap[snap->mapofs];
374 BloomFilter rfilt = snap_renamefilter(T, snapno);
375 MSize n = 0;
376 IRRef ref = 0;
377 UNUSED(J);
378 for ( ; ; ir++) {
379 uint32_t rs;
380 if (ir->o == IR_SLOAD) {
381 if (!(ir->op2 & IRSLOAD_PARENT)) break;
382 for ( ; ; n++) {
383 lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1);
384 if (snap_slot(map[n]) == ir->op1) {
385 ref = snap_ref(map[n++]);
386 break;
387 }
388 }
389 } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) {
390 ref++;
391 } else if (ir->o == IR_PVAL) {
392 ref = ir->op1 + REF_BIAS;
393 } else {
394 break;
395 }
396 rs = T->ir[ref].prev;
397 if (bloomtest(rfilt, ref))
398 rs = snap_renameref(T, snapno, ref, rs);
399 ir->prev = (uint16_t)rs;
400 lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS);
401 }
402 return ir;
403}
404
405/* -- Snapshot replay ----------------------------------------------------- */
406
407/* Replay constant from parent trace. */
408static TRef snap_replay_const(jit_State *J, IRIns *ir)
409{
410 /* Only have to deal with constants that can occur in stack slots. */
411 switch ((IROp)ir->o) {
412 case IR_KPRI: return TREF_PRI(irt_type(ir->t));
413 case IR_KINT: return lj_ir_kint(J, ir->i);
414 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
415 case IR_KNUM: case IR_KINT64:
416 return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
417 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
418 default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL;
419 }
420}
421
422/* De-duplicate parent reference. */
423static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
424{
425 MSize j;
426 for (j = 0; j < nmax; j++)
427 if (snap_ref(map[j]) == ref)
428 return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
429 return 0;
430}
431
432/* Emit parent reference with de-duplication. */
433static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
434 BloomFilter seen, IRRef ref)
435{
436 IRIns *ir = &T->ir[ref];
437 TRef tr;
438 if (irref_isk(ref))
439 tr = snap_replay_const(J, ir);
440 else if (!regsp_used(ir->prev))
441 tr = 0;
442 else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
443 tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
444 return tr;
445}
446
447/* Check whether a sunk store corresponds to an allocation. Slow path. */
448static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs)
449{
450 if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
451 irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
452 IRIns *irk = &T->ir[irs->op1];
453 if (irk->o == IR_AREF || irk->o == IR_HREFK)
454 irk = &T->ir[irk->op1];
455 return (&T->ir[irk->op1] == ira);
456 }
457 return 0;
458}
459
460/* Check whether a sunk store corresponds to an allocation. Fast path. */
461static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs)
462{
463 if (irs->s != 255)
464 return (ira + irs->s == irs); /* Fast check. */
465 return snap_sunk_store2(T, ira, irs);
466}
467
468/* Replay snapshot state to setup side trace. */
469void lj_snap_replay(jit_State *J, GCtrace *T)
470{
471 SnapShot *snap = &T->snap[J->exitno];
472 SnapEntry *map = &T->snapmap[snap->mapofs];
473 MSize n, nent = snap->nent;
474 BloomFilter seen = 0;
475 int pass23 = 0;
476 J->framedepth = 0;
477 /* Emit IR for slots inherited from parent snapshot. */
478 for (n = 0; n < nent; n++) {
479 SnapEntry sn = map[n];
480 BCReg s = snap_slot(sn);
481 IRRef ref = snap_ref(sn);
482 IRIns *ir = &T->ir[ref];
483 TRef tr;
484 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
485 if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
486 goto setslot;
487 bloomset(seen, ref);
488 if (irref_isk(ref)) {
489 /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */
490 if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)))
491 tr = 0;
492 else
493 tr = snap_replay_const(J, ir);
494 } else if (!regsp_used(ir->prev)) {
495 pass23 = 1;
496 lj_assertJ(s != 0, "unused slot 0 in snapshot");
497 tr = s;
498 } else {
499 IRType t = irt_type(ir->t);
500 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
501 if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
502 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
503 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
504 }
505 setslot:
506 J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */
507 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2));
508 if ((sn & SNAP_FRAME))
509 J->baseslot = s+1;
510 }
511 if (pass23) {
512 IRIns *irlast = &T->ir[snap->ref];
513 pass23 = 0;
514 /* Emit dependent PVALs. */
515 for (n = 0; n < nent; n++) {
516 SnapEntry sn = map[n];
517 IRRef refp = snap_ref(sn);
518 IRIns *ir = &T->ir[refp];
519 if (regsp_reg(ir->r) == RID_SUNK) {
520 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
521 pass23 = 1;
522 lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
523 ir->o == IR_CNEW || ir->o == IR_CNEWI,
524 "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
525 if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
526 if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
527 if (LJ_HASFFI && ir->o == IR_CNEWI) {
528 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
529 snap_pref(J, T, map, nent, seen, (ir+1)->op2);
530 } else {
531 IRIns *irs;
532 for (irs = ir+1; irs < irlast; irs++)
533 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
534 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
535 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
536 else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
537 irs+1 < irlast && (irs+1)->o == IR_HIOP)
538 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
539 }
540 }
541 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
542 lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
543 "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
544 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
545 }
546 }
547 /* Replay sunk instructions. */
548 for (n = 0; pass23 && n < nent; n++) {
549 SnapEntry sn = map[n];
550 IRRef refp = snap_ref(sn);
551 IRIns *ir = &T->ir[refp];
552 if (regsp_reg(ir->r) == RID_SUNK) {
553 TRef op1, op2;
554 if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
555 J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
556 continue;
557 }
558 op1 = ir->op1;
559 if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
560 op2 = ir->op2;
561 if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
562 if (LJ_HASFFI && ir->o == IR_CNEWI) {
563 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
564 lj_needsplit(J); /* Emit joining HIOP. */
565 op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
566 snap_pref(J, T, map, nent, seen, (ir+1)->op2));
567 }
568 J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2);
569 } else {
570 IRIns *irs;
571 TRef tr = emitir(ir->ot, op1, op2);
572 J->slot[snap_slot(sn)] = tr;
573 for (irs = ir+1; irs < irlast; irs++)
574 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
575 IRIns *irr = &T->ir[irs->op1];
576 TRef val, key = irr->op2, tmp = tr;
577 if (irr->o != IR_FREF) {
578 IRIns *irk = &T->ir[key];
579 if (irr->o == IR_HREFK)
580 key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
581 irk->op2);
582 else
583 key = snap_replay_const(J, irk);
584 if (irr->o == IR_HREFK || irr->o == IR_AREF) {
585 IRIns *irf = &T->ir[irr->op1];
586 tmp = emitir(irf->ot, tmp, irf->op2);
587 }
588 }
589 tmp = emitir(irr->ot, tmp, key);
590 val = snap_pref(J, T, map, nent, seen, irs->op2);
591 if (val == 0) {
592 IRIns *irc = &T->ir[irs->op2];
593 lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT,
594 "sunk store for parent IR %04d with bad op %d",
595 refp - REF_BIAS, irc->o);
596 val = snap_pref(J, T, map, nent, seen, irc->op1);
597 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
598 } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
599 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
600 IRType t = IRT_I64;
601 if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
602 t = IRT_NUM;
603 lj_needsplit(J);
604 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
605 uint64_t k = (uint32_t)T->ir[irs->op2].i +
606 ((uint64_t)T->ir[(irs+1)->op2].i << 32);
607 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
608 } else {
609 val = emitir_raw(IRT(IR_HIOP, t), val,
610 snap_pref(J, T, map, nent, seen, (irs+1)->op2));
611 }
612 tmp = emitir(IRT(irs->o, t), tmp, val);
613 continue;
614 }
615 tmp = emitir(irs->ot, tmp, val);
616 } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
617 emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
618 }
619 }
620 }
621 }
622 }
623 J->base = J->slot + J->baseslot;
624 J->maxslot = snap->nslots - J->baseslot;
625 lj_snap_add(J);
626 if (pass23) /* Need explicit GC step _after_ initial snapshot. */
627 emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
628}
629
630/* -- Snapshot restore ---------------------------------------------------- */
631
632static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
633 SnapNo snapno, BloomFilter rfilt,
634 IRIns *ir, TValue *o);
635
636/* Restore a value from the trace exit state. */
637static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
638 SnapNo snapno, BloomFilter rfilt,
639 IRRef ref, TValue *o)
640{
641 IRIns *ir = &T->ir[ref];
642 IRType1 t = ir->t;
643 RegSP rs = ir->prev;
644 if (irref_isk(ref)) { /* Restore constant slot. */
645 if (ir->o == IR_KPTR) {
646 o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir);
647 } else {
648 lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL),
649 "restore of const from IR %04d with bad op %d",
650 ref - REF_BIAS, ir->o);
651 lj_ir_kvalue(J->L, o, ir);
652 }
653 return;
654 }
655 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
656 rs = snap_renameref(T, snapno, ref, rs);
657 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
658 int32_t *sps = &ex->spill[regsp_spill(rs)];
659 if (irt_isinteger(t)) {
660 setintV(o, *sps);
661#if !LJ_SOFTFP32
662 } else if (irt_isnum(t)) {
663 o->u64 = *(uint64_t *)sps;
664#endif
665#if LJ_64 && !LJ_GC64
666 } else if (irt_islightud(t)) {
667 /* 64 bit lightuserdata which may escape already has the tag bits. */
668 o->u64 = *(uint64_t *)sps;
669#endif
670 } else {
671 lj_assertJ(!irt_ispri(t), "PRI ref with spill slot");
672 setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
673 }
674 } else { /* Restore from register. */
675 Reg r = regsp_reg(rs);
676 if (ra_noreg(r)) {
677 lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
678 "restore from IR %04d has no reg", ref - REF_BIAS);
679 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
680 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
681 return;
682 } else if (irt_isinteger(t)) {
683 setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
684#if !LJ_SOFTFP
685 } else if (irt_isnum(t)) {
686 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
687#elif LJ_64 /* && LJ_SOFTFP */
688 } else if (irt_isnum(t)) {
689 o->u64 = ex->gpr[r-RID_MIN_GPR];
690#endif
691#if LJ_64 && !LJ_GC64
692 } else if (irt_is64(t)) {
693 /* 64 bit values that already have the tag bits. */
694 o->u64 = ex->gpr[r-RID_MIN_GPR];
695#endif
696 } else if (irt_ispri(t)) {
697 setpriV(o, irt_toitype(t));
698 } else {
699 setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
700 }
701 }
702}
703
704#if LJ_HASFFI
705/* Restore raw data from the trace exit state. */
706static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex,
707 SnapNo snapno, BloomFilter rfilt,
708 IRRef ref, void *dst, CTSize sz)
709{
710 IRIns *ir = &T->ir[ref];
711 RegSP rs = ir->prev;
712 int32_t *src;
713 uint64_t tmp;
714 UNUSED(J);
715 if (irref_isk(ref)) {
716 if (ir_isk64(ir)) {
717 src = (int32_t *)&ir[1];
718 } else if (sz == 8) {
719 tmp = (uint64_t)(uint32_t)ir->i;
720 src = (int32_t *)&tmp;
721 } else {
722 src = &ir->i;
723 }
724 } else {
725 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
726 rs = snap_renameref(T, snapno, ref, rs);
727 if (ra_hasspill(regsp_spill(rs))) {
728 src = &ex->spill[regsp_spill(rs)];
729 if (sz == 8 && !irt_is64(ir->t)) {
730 tmp = (uint64_t)(uint32_t)*src;
731 src = (int32_t *)&tmp;
732 }
733 } else {
734 Reg r = regsp_reg(rs);
735 if (ra_noreg(r)) {
736 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
737 lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
738 "restore from IR %04d has no reg", ref - REF_BIAS);
739 snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4);
740 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
741 return;
742 }
743 src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
744#if !LJ_SOFTFP
745 if (r >= RID_MAX_GPR) {
746 src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
747#if LJ_TARGET_PPC
748 if (sz == 4) { /* PPC FPRs are always doubles. */
749 *(float *)dst = (float)*(double *)src;
750 return;
751 }
752#else
753 if (LJ_BE && sz == 4) src++;
754#endif
755 } else
756#endif
757 if (LJ_64 && LJ_BE && sz == 4) src++;
758 }
759 }
760 lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8,
761 "restore from IR %04d with bad size %d", ref - REF_BIAS, sz);
762 if (sz == 4) *(int32_t *)dst = *src;
763 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
764 else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
765 else *(int16_t *)dst = (int16_t)*src;
766}
767#endif
768
769/* Unsink allocation from the trace exit state. Unsink sunk stores. */
770static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
771 SnapNo snapno, BloomFilter rfilt,
772 IRIns *ir, TValue *o)
773{
774 lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
775 ir->o == IR_CNEW || ir->o == IR_CNEWI,
776 "sunk allocation with bad op %d", ir->o);
777#if LJ_HASFFI
778 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
779 CTState *cts = ctype_cts(J->L);
780 CTypeID id = (CTypeID)T->ir[ir->op1].i;
781 CTSize sz;
782 CTInfo info = lj_ctype_info(cts, id, &sz);
783 GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
784 setcdataV(J->L, o, cd);
785 if (ir->o == IR_CNEWI) {
786 uint8_t *p = (uint8_t *)cdataptr(cd);
787 lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz);
788 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
789 snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2,
790 LJ_LE ? p+4 : p, 4);
791 if (LJ_BE) p += 4;
792 sz = 4;
793 }
794 snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz);
795 } else {
796 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
797 for (irs = ir+1; irs < irlast; irs++)
798 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
799 IRIns *iro = &T->ir[T->ir[irs->op1].op2];
800 uint8_t *p = (uint8_t *)cd;
801 CTSize szs;
802 lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o);
803 lj_assertJ(T->ir[irs->op1].o == IR_ADD,
804 "sunk store with bad add op %d", T->ir[irs->op1].o);
805 lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64,
806 "sunk store with bad const offset op %d", iro->o);
807 if (irt_is64(irs->t)) szs = 8;
808 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
809 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
810 else szs = 4;
811 if (LJ_64 && iro->o == IR_KINT64)
812 p += (int64_t)ir_k64(iro)->u64;
813 else
814 p += iro->i;
815 lj_assertJ(p >= (uint8_t *)cdataptr(cd) &&
816 p + szs <= (uint8_t *)cdataptr(cd) + sz,
817 "sunk store with offset out of range");
818 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
819 lj_assertJ(szs == 4, "sunk store with bad size %d", szs);
820 snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2,
821 LJ_LE ? p+4 : p, 4);
822 if (LJ_BE) p += 4;
823 }
824 snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs);
825 }
826 }
827 } else
828#endif
829 {
830 IRIns *irs, *irlast;
831 GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
832 lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
833 settabV(J->L, o, t);
834 irlast = &T->ir[T->snap[snapno].ref];
835 for (irs = ir+1; irs < irlast; irs++)
836 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
837 IRIns *irk = &T->ir[irs->op1];
838 TValue tmp, *val;
839 lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
840 irs->o == IR_FSTORE,
841 "sunk store with bad op %d", irs->o);
842 if (irk->o == IR_FREF) {
843 lj_assertJ(irk->op2 == IRFL_TAB_META,
844 "sunk store with bad field %d", irk->op2);
845 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
846 /* NOBARRIER: The table is new (marked white). */
847 setgcref(t->metatable, obj2gco(tabV(&tmp)));
848 } else {
849 irk = &T->ir[irk->op2];
850 if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
851 lj_ir_kvalue(J->L, &tmp, irk);
852 val = lj_tab_set(J->L, t, &tmp);
853 /* NOBARRIER: The table is new (marked white). */
854 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
855 if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
856 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
857 val->u32.hi = tmp.u32.lo;
858 }
859 }
860 }
861 }
862}
863
864/* Restore interpreter state from exit state with the help of a snapshot. */
865const BCIns *lj_snap_restore(jit_State *J, void *exptr)
866{
867 ExitState *ex = (ExitState *)exptr;
868 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
869 GCtrace *T = traceref(J, J->parent);
870 SnapShot *snap = &T->snap[snapno];
871 MSize n, nent = snap->nent;
872 SnapEntry *map = &T->snapmap[snap->mapofs];
873#if !LJ_FR2 || defined(LUA_USE_ASSERT)
874 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
875#endif
876#if !LJ_FR2
877 ptrdiff_t ftsz0;
878#endif
879 TValue *frame;
880 BloomFilter rfilt = snap_renamefilter(T, snapno);
881 const BCIns *pc = snap_pc(&map[nent]);
882 lua_State *L = J->L;
883
884 /* Set interpreter PC to the next PC to get correct error messages. */
885 setcframe_pc(cframe_raw(L->cframe), pc+1);
886
887 /* Make sure the stack is big enough for the slots from the snapshot. */
888 if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
889 L->top = curr_topL(L);
890 lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
891 }
892
893 /* Fill stack slots with data from the registers and spill slots. */
894 frame = L->base-1-LJ_FR2;
895#if !LJ_FR2
896 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
897#endif
898 for (n = 0; n < nent; n++) {
899 SnapEntry sn = map[n];
900 if (!(sn & SNAP_NORESTORE)) {
901 TValue *o = &frame[snap_slot(sn)];
902 IRRef ref = snap_ref(sn);
903 IRIns *ir = &T->ir[ref];
904 if (ir->r == RID_SUNK) {
905 MSize j;
906 for (j = 0; j < n; j++)
907 if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */
908 copyTV(L, o, &frame[snap_slot(map[j])]);
909 goto dupslot;
910 }
911 snap_unsink(J, T, ex, snapno, rfilt, ir, o);
912 dupslot:
913 continue;
914 }
915 snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
916 if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
917 TValue tmp;
918 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
919 o->u32.hi = tmp.u32.lo;
920#if !LJ_FR2
921 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
922 /* Overwrite tag with frame link. */
923 setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
924 L->base = o+1;
925#endif
926 }
927 }
928 }
929#if LJ_FR2
930 L->base += (map[nent+LJ_BE] & 0xff);
931#endif
932 lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot");
933
934 /* Compute current stack top. */
935 switch (bc_op(*pc)) {
936 default:
937 if (bc_op(*pc) < BC_FUNCF) {
938 L->top = curr_topL(L);
939 break;
940 }
941 /* fallthrough */
942 case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
943 L->top = frame + snap->nslots;
944 break;
945 }
946 return pc;
947}
948
949#undef emitir_raw
950#undef emitir
951
952#endif
953