lj_snap.c source code [LuaJIT/lj_snap.c]

1	/*
2	** Snapshot handling.
3	** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4	*/
5
6	#define lj_snap_c
7	#define LUA_CORE
8
9	#include "lj_obj.h"
10
11	#if LJ_HASJIT
12
13	#include "lj_gc.h"
14	#include "lj_tab.h"
15	#include "lj_state.h"
16	#include "lj_frame.h"
17	#include "lj_bc.h"
18	#include "lj_ir.h"
19	#include "lj_jit.h"
20	#include "lj_iropt.h"
21	#include "lj_trace.h"
22	#include "lj_snap.h"
23	#include "lj_target.h"
24	#if LJ_HASFFI
25	#include "lj_ctype.h"
26	#include "lj_cdata.h"
27	#endif
28
29	/ Pass IR on to next optimization in chain (FOLD). /
30	#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
31
32	/ Emit raw IR without passing through optimizations. /
33	#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
34
35	/ -- Snapshot buffer allocation ------------------------------------------ /
36
37	/ Grow snapshot buffer. /
38	void lj_snap_grow_buf_(jit_State *J, MSize need)
39	{
40	MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
41	if (need > maxsnap)
42	lj_trace_err(J, LJ_TRERR_SNAPOV);
43	lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
44	J->cur.snap = J->snapbuf;
45	}
46
47	/ Grow snapshot map buffer. /
48	void lj_snap_grow_map_(jit_State *J, MSize need)
49	{
50	if (need < `2`*J->sizesnapmap)
51	need = `2`*J->sizesnapmap;
52	else if (need < `64`)
53	need = `64`;
54	J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
55	J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
56	J->cur.snapmap = J->snapmapbuf;
57	J->sizesnapmap = need;
58	}
59
60	/ -- Snapshot generation ------------------------------------------------- /
61
62	/ Add all modified slots to the snapshot. /
63	static MSize snapshot_slots(jit_State J, SnapEntry map, BCReg nslots)
64	{
65	IRRef retf = J->chain[IR_RETF]; / Limits SLOAD restore elimination. /
66	BCReg s;
67	MSize n = `0`;
68	for (s = `0`; s < nslots; s++) {
69	TRef tr = J->slot[s];
70	IRRef ref = tref_ref(tr);
71	#if LJ_FR2
72	if (s == `1`) { / Ignore slot 1 in LJ_FR2 mode, except if tailcalled. /
73	if ((tr & TREF_FRAME))
74	map[n++] = SNAP(`1`, SNAP_FRAME \| SNAP_NORESTORE, REF_NIL);
75	continue;
76	}
77	if ((tr & (TREF_FRAME \| TREF_CONT)) && !ref) {
78	cTValue *base = J->L->base - J->baseslot;
79	tr = J->slot[s] = (tr & `0xff0000`) \| lj_ir_k64(J, IR_KNUM, base[s].u64);
80	ref = tref_ref(tr);
81	}
82	#endif
83	if (ref) {
84	SnapEntry sn = SNAP_TR(s, tr);
85	IRIns *ir = &J->cur.ir[ref];
86	if ((LJ_FR2 \|\| !(sn & (SNAP_CONT\|SNAP_FRAME))) &&
87	ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
88	/*
89	** No need to snapshot unmodified non-inherited slots.
90	** But always snapshot the function below a frame in LJ_FR2 mode.
91	*/
92	if (!(ir->op2 & IRSLOAD_INHERIT) &&
93	(!LJ_FR2 \|\| s == `0` \|\| s+`1` == nslots \|\|
94	!(J->slot[s+`1`] & (TREF_CONT\|TREF_FRAME))))
95	continue;
96	/ No need to restore readonly slots and unmodified non-parent slots. /
97	if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
98	(ir->op2 & (IRSLOAD_READONLY\|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
99	sn \|= SNAP_NORESTORE;
100	}
101	if (LJ_SOFTFP32 && irt_isnum(ir->t))
102	sn \|= SNAP_SOFTFPNUM;
103	map[n++] = sn;
104	}
105	}
106	return n;
107	}
108
109	/ Add frame links at the end of the snapshot. /
110	static MSize snapshot_framelinks(jit_State J, SnapEntry map, uint8_t *topslot)
111	{
112	cTValue *frame = J->L->base - `1`;
113	cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
114	GCfunc *fn = frame_func(frame);
115	cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
116	#if LJ_FR2
117	uint64_t pcbase = (u64ptr(J->pc) << `8`) \| (J->baseslot - `2`);
118	lj_assertJ(`2` <= J->baseslot && J->baseslot <= `257`, "bad baseslot");
119	memcpy(map, &pcbase, sizeof(uint64_t));
120	#else
121	MSize f = `0`;
122	map[f++] = SNAP_MKPC(J->pc); / The current PC is always the first entry. /
123	#endif
124	lj_assertJ(!J->pt \|\|
125	(J->pc >= proto_bc(J->pt) &&
126	J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC");
127	while (frame > lim) { / Backwards traversal of all frames above base. /
128	if (frame_islua(frame)) {
129	#if !LJ_FR2
130	map[f++] = SNAP_MKPC(frame_pc(frame));
131	#endif
132	frame = frame_prevl(frame);
133	} else if (frame_iscont(frame)) {
134	#if !LJ_FR2
135	map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
136	map[f++] = SNAP_MKPC(frame_contpc(frame));
137	#endif
138	frame = frame_prevd(frame);
139	} else {
140	lj_assertJ(!frame_isc(frame), "broken frame chain");
141	#if !LJ_FR2
142	map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
143	#endif
144	frame = frame_prevd(frame);
145	continue;
146	}
147	if (frame + funcproto(frame_func(frame))->framesize > ftop)
148	ftop = frame + funcproto(frame_func(frame))->framesize;
149	}
150	*topslot = (uint8_t)(ftop - lim);
151	#if LJ_FR2
152	lj_assertJ(sizeof(SnapEntry) * `2` == sizeof(uint64_t), "bad SnapEntry def");
153	return `2`;
154	#else
155	lj_assertJ(f == (MSize)(`1` + J->framedepth), "miscalculated snapshot size");
156	return f;
157	#endif
158	}
159
160	/ Take a snapshot of the current stack. /
161	static void snapshot_stack(jit_State J, SnapShot snap, MSize nsnapmap)
162	{
163	BCReg nslots = J->baseslot + J->maxslot;
164	MSize nent;
165	SnapEntry *p;
166	/ Conservative estimate. /
167	lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?`2`:J->framedepth+`1`));
168	p = &J->cur.snapmap[nsnapmap];
169	nent = snapshot_slots(J, p, nslots);
170	snap->nent = (uint8_t)nent;
171	nent += snapshot_framelinks(J, p + nent, &snap->topslot);
172	snap->mapofs = (uint32_t)nsnapmap;
173	snap->ref = (IRRef1)J->cur.nins;
174	snap->mcofs = `0`;
175	snap->nslots = (uint8_t)nslots;
176	snap->count = `0`;
177	J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
178	}
179
180	/ Add or merge a snapshot. /
181	void lj_snap_add(jit_State *J)
182	{
183	MSize nsnap = J->cur.nsnap;
184	MSize nsnapmap = J->cur.nsnapmap;
185	/ Merge if no ins. inbetween or if requested and no guard inbetween. /
186	if ((nsnap > `0` && J->cur.snap[nsnap-`1`].ref == J->cur.nins) \|\|
187	(J->mergesnap && !irt_isguard(J->guardemit))) {
188	if (nsnap == `1`) { / But preserve snap #0 PC. /
189	emitir_raw(IRT(IR_NOP, IRT_NIL), `0`, `0`);
190	goto nomerge;
191	}
192	nsnapmap = J->cur.snap[--nsnap].mapofs;
193	} else {
194	nomerge:
195	lj_snap_grow_buf(J, nsnap+`1`);
196	J->cur.nsnap = (uint16_t)(nsnap+`1`);
197	}
198	J->mergesnap = `0`;
199	J->guardemit.irt = `0`;
200	snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
201	}
202
203	/ -- Snapshot modification ----------------------------------------------- /
204
205	#define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
206
207	/ Find unused slots with reaching-definitions bytecode data-flow analysis. /
208	static BCReg snap_usedef(jit_State J, uint8_t udf,
209	const BCIns *pc, BCReg maxslot)
210	{
211	BCReg s;
212	GCobj *o;
213
214	if (maxslot == `0`) return `0`;
215	#ifdef LUAJIT_USE_VALGRIND
216	/ Avoid errors for harmless reads beyond maxslot. /
217	memset(udf, `1`, SNAP_USEDEF_SLOTS);
218	#else
219	memset(udf, `1`, maxslot);
220	#endif
221
222	/ Treat open upvalues as used. /
223	o = gcref(J->L->openupval);
224	while (o) {
225	if (uvval(gco2uv(o)) < J->L->base) break;
226	udf[uvval(gco2uv(o)) - J->L->base] = `0`;
227	o = gcref(o->gch.nextgc);
228	}
229
230	#define USE_SLOT(s) udf[(s)] &= ~1
231	#define DEF_SLOT(s) udf[(s)] *= 3
232
233	/ Scan through following bytecode and check for uses/defs. /
234	lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
235	"snapshot PC out of range");
236	for (;;) {
237	BCIns ins = *pc++;
238	BCOp op = bc_op(ins);
239	switch (bcmode_b(op)) {
240	case BCMvar: USE_SLOT(bc_b(ins)); break;
241	default: break;
242	}
243	switch (bcmode_c(op)) {
244	case BCMvar: USE_SLOT(bc_c(ins)); break;
245	case BCMrbase:
246	lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op);
247	for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
248	for (; s < maxslot; s++) DEF_SLOT(s);
249	break;
250	case BCMjump:
251	handle_jump: {
252	BCReg minslot = bc_a(ins);
253	if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
254	else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-`2`])-`1`;
255	else if (op == BC_UCLO) { pc += bc_j(ins); break; }
256	for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
257	return minslot < maxslot ? minslot : maxslot;
258	}
259	case BCMlit:
260	if (op == BC_JFORL \|\| op == BC_JITERL \|\| op == BC_JLOOP) {
261	goto handle_jump;
262	} else if (bc_isret(op)) {
263	BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-`1`);
264	for (s = `0`; s < bc_a(ins); s++) DEF_SLOT(s);
265	for (; s < top; s++) USE_SLOT(s);
266	for (; s < maxslot; s++) DEF_SLOT(s);
267	return `0`;
268	}
269	break;
270	case BCMfunc: return maxslot; / NYI: will abort, anyway. /
271	default: break;
272	}
273	switch (bcmode_a(op)) {
274	case BCMvar: USE_SLOT(bc_a(ins)); break;
275	case BCMdst:
276	if (!(op == BC_ISTC \|\| op == BC_ISFC)) DEF_SLOT(bc_a(ins));
277	break;
278	case BCMbase:
279	if (op >= BC_CALLM && op <= BC_VARG) {
280	BCReg top = (op == BC_CALLM \|\| op == BC_CALLMT \|\| bc_c(ins) == `0`) ?
281	maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
282	if (LJ_FR2) DEF_SLOT(bc_a(ins)+`1`);
283	s = bc_a(ins) - ((op == BC_ITERC \|\| op == BC_ITERN) ? `3` : `0`);
284	for (; s < top; s++) USE_SLOT(s);
285	for (; s < maxslot; s++) DEF_SLOT(s);
286	if (op == BC_CALLT \|\| op == BC_CALLMT) {
287	for (s = `0`; s < bc_a(ins); s++) DEF_SLOT(s);
288	return `0`;
289	}
290	} else if (op == BC_KNIL) {
291	for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
292	} else if (op == BC_TSETM) {
293	for (s = bc_a(ins)-`1`; s < maxslot; s++) USE_SLOT(s);
294	}
295	break;
296	default: break;
297	}
298	lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
299	"use/def analysis PC out of range");
300	}
301
302	#undef USE_SLOT
303	#undef DEF_SLOT
304
305	return `0`; / unreachable /
306	}
307
308	/ Purge dead slots before the next snapshot. /
309	void lj_snap_purge(jit_State *J)
310	{
311	uint8_t udf[SNAP_USEDEF_SLOTS];
312	BCReg maxslot = J->maxslot;
313	BCReg s = snap_usedef(J, udf, J->pc, maxslot);
314	for (; s < maxslot; s++)
315	if (udf[s] != `0`)
316	J->base[s] = `0`; / Purge dead slots. /
317	}
318
319	/ Shrink last snapshot. /
320	void lj_snap_shrink(jit_State *J)
321	{
322	SnapShot *snap = &J->cur.snap[J->cur.nsnap-`1`];
323	SnapEntry *map = &J->cur.snapmap[snap->mapofs];
324	MSize n, m, nlim, nent = snap->nent;
325	uint8_t udf[SNAP_USEDEF_SLOTS];
326	BCReg maxslot = J->maxslot;
327	BCReg baseslot = J->baseslot;
328	BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
329	maxslot += baseslot;
330	minslot += baseslot;
331	snap->nslots = (uint8_t)maxslot;
332	for (n = m = `0`; n < nent; n++) { / Remove unused slots from snapshot. /
333	BCReg s = snap_slot(map[n]);
334	if (s < minslot \|\| (s < maxslot && udf[s-baseslot] == `0`))
335	map[m++] = map[n]; / Only copy used slots. /
336	}
337	snap->nent = (uint8_t)m;
338	nlim = J->cur.nsnapmap - snap->mapofs - `1`;
339	while (n <= nlim) map[m++] = map[n++]; / Move PC + frame links down. /
340	J->cur.nsnapmap = (uint32_t)(snap->mapofs + m); / Free up space in map. /
341	}
342
343	/ -- Snapshot access ----------------------------------------------------- /
344
345	/ Initialize a Bloom Filter with all renamed refs.*
346	** There are very few renames (often none), so the filter has
347	** very few bits set. This makes it suitable for negative filtering.
348	*/
349	static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
350	{
351	BloomFilter rfilt = `0`;
352	IRIns *ir;
353	for (ir = &T->ir[T->nins-`1`]; ir->o == IR_RENAME; ir--)
354	if (ir->op2 <= lim)
355	bloomset(rfilt, ir->op1);
356	return rfilt;
357	}
358
359	/ Process matching renames to find the original RegSP. /
360	static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
361	{
362	IRIns *ir;
363	for (ir = &T->ir[T->nins-`1`]; ir->o == IR_RENAME; ir--)
364	if (ir->op1 == ref && ir->op2 <= lim)
365	rs = ir->prev;
366	return rs;
367	}
368
369	/ Copy RegSP from parent snapshot to the parent links of the IR. /
370	IRIns lj_snap_regspmap(jit_State J, GCtrace T, SnapNo snapno, IRIns ir)
371	{
372	SnapShot *snap = &T->snap[snapno];
373	SnapEntry *map = &T->snapmap[snap->mapofs];
374	BloomFilter rfilt = snap_renamefilter(T, snapno);
375	MSize n = `0`;
376	IRRef ref = `0`;
377	UNUSED(J);
378	for ( ; ; ir++) {
379	uint32_t rs;
380	if (ir->o == IR_SLOAD) {
381	if (!(ir->op2 & IRSLOAD_PARENT)) break;
382	for ( ; ; n++) {
383	lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1);
384	if (snap_slot(map[n]) == ir->op1) {
385	ref = snap_ref(map[n++]);
386	break;
387	}
388	}
389	} else if (LJ_SOFTFP32 && ir->o == IR_HIOP) {
390	ref++;
391	} else if (ir->o == IR_PVAL) {
392	ref = ir->op1 + REF_BIAS;
393	} else {
394	break;
395	}
396	rs = T->ir[ref].prev;
397	if (bloomtest(rfilt, ref))
398	rs = snap_renameref(T, snapno, ref, rs);
399	ir->prev = (uint16_t)rs;
400	lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS);
401	}
402	return ir;
403	}
404
405	/ -- Snapshot replay ----------------------------------------------------- /
406
407	/ Replay constant from parent trace. /
408	static TRef snap_replay_const(jit_State J, IRIns ir)
409	{
410	/ Only have to deal with constants that can occur in stack slots. /
411	switch ((IROp)ir->o) {
412	case IR_KPRI: return TREF_PRI(irt_type(ir->t));
413	case IR_KINT: return lj_ir_kint(J, ir->i);
414	case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
415	case IR_KNUM: case IR_KINT64:
416	return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
417	case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); / Continuation. /
418	default: lj_assertJ(`0`, "bad IR constant op %d", ir->o); return TREF_NIL;
419	}
420	}
421
422	/ De-duplicate parent reference. /
423	static TRef snap_dedup(jit_State J, SnapEntry map, MSize nmax, IRRef ref)
424	{
425	MSize j;
426	for (j = `0`; j < nmax; j++)
427	if (snap_ref(map[j]) == ref)
428	return J->slot[snap_slot(map[j])] & ~(SNAP_CONT\|SNAP_FRAME);
429	return `0`;
430	}
431
432	/ Emit parent reference with de-duplication. /
433	static TRef snap_pref(jit_State J, GCtrace T, SnapEntry *map, MSize nmax,
434	BloomFilter seen, IRRef ref)
435	{
436	IRIns *ir = &T->ir[ref];
437	TRef tr;
438	if (irref_isk(ref))
439	tr = snap_replay_const(J, ir);
440	else if (!regsp_used(ir->prev))
441	tr = `0`;
442	else if (!bloomtest(seen, ref) \|\| (tr = snap_dedup(J, map, nmax, ref)) == `0`)
443	tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, `0`);
444	return tr;
445	}
446
447	/ Check whether a sunk store corresponds to an allocation. Slow path. /
448	static int snap_sunk_store2(GCtrace T, IRIns ira, IRIns *irs)
449	{
450	if (irs->o == IR_ASTORE \|\| irs->o == IR_HSTORE \|\|
451	irs->o == IR_FSTORE \|\| irs->o == IR_XSTORE) {
452	IRIns *irk = &T->ir[irs->op1];
453	if (irk->o == IR_AREF \|\| irk->o == IR_HREFK)
454	irk = &T->ir[irk->op1];
455	return (&T->ir[irk->op1] == ira);
456	}
457	return `0`;
458	}
459
460	/ Check whether a sunk store corresponds to an allocation. Fast path. /
461	static LJ_AINLINE int snap_sunk_store(GCtrace T, IRIns ira, IRIns *irs)
462	{
463	if (irs->s != `255`)
464	return (ira + irs->s == irs); / Fast check. /
465	return snap_sunk_store2(T, ira, irs);
466	}
467
468	/ Replay snapshot state to setup side trace. /
469	void lj_snap_replay(jit_State J, GCtrace T)
470	{
471	SnapShot *snap = &T->snap[J->exitno];
472	SnapEntry *map = &T->snapmap[snap->mapofs];
473	MSize n, nent = snap->nent;
474	BloomFilter seen = `0`;
475	int pass23 = `0`;
476	J->framedepth = `0`;
477	/ Emit IR for slots inherited from parent snapshot. /
478	for (n = `0`; n < nent; n++) {
479	SnapEntry sn = map[n];
480	BCReg s = snap_slot(sn);
481	IRRef ref = snap_ref(sn);
482	IRIns *ir = &T->ir[ref];
483	TRef tr;
484	/ The bloom filter avoids O(nent^2) overhead for de-duping slots. /
485	if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != `0`)
486	goto setslot;
487	bloomset(seen, ref);
488	if (irref_isk(ref)) {
489	/ See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. /
490	if (LJ_FR2 && (sn == SNAP(`1`, SNAP_FRAME \| SNAP_NORESTORE, REF_NIL)))
491	tr = `0`;
492	else
493	tr = snap_replay_const(J, ir);
494	} else if (!regsp_used(ir->prev)) {
495	pass23 = `1`;
496	lj_assertJ(s != `0`, "unused slot 0 in snapshot");
497	tr = s;
498	} else {
499	IRType t = irt_type(ir->t);
500	uint32_t mode = IRSLOAD_INHERIT\|IRSLOAD_PARENT;
501	if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
502	if (ir->o == IR_SLOAD) mode \|= (ir->op2 & IRSLOAD_READONLY);
503	tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
504	}
505	setslot:
506	J->slot[s] = tr \| (sn&(SNAP_CONT\|SNAP_FRAME)); / Same as TREF_* flags. /
507	J->framedepth += ((sn & (SNAP_CONT\|SNAP_FRAME)) && (s != LJ_FR2));
508	if ((sn & SNAP_FRAME))
509	J->baseslot = s+`1`;
510	}
511	if (pass23) {
512	IRIns *irlast = &T->ir[snap->ref];
513	pass23 = `0`;
514	/ Emit dependent PVALs. /
515	for (n = `0`; n < nent; n++) {
516	SnapEntry sn = map[n];
517	IRRef refp = snap_ref(sn);
518	IRIns *ir = &T->ir[refp];
519	if (regsp_reg(ir->r) == RID_SUNK) {
520	if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
521	pass23 = `1`;
522	lj_assertJ(ir->o == IR_TNEW \|\| ir->o == IR_TDUP \|\|
523	ir->o == IR_CNEW \|\| ir->o == IR_CNEWI,
524	"sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
525	if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
526	if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
527	if (LJ_HASFFI && ir->o == IR_CNEWI) {
528	if (LJ_32 && refp+`1` < T->nins && (ir+`1`)->o == IR_HIOP)
529	snap_pref(J, T, map, nent, seen, (ir+`1`)->op2);
530	} else {
531	IRIns *irs;
532	for (irs = ir+`1`; irs < irlast; irs++)
533	if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
534	if (snap_pref(J, T, map, nent, seen, irs->op2) == `0`)
535	snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
536	else if ((LJ_SOFTFP32 \|\| (LJ_32 && LJ_HASFFI)) &&
537	irs+`1` < irlast && (irs+`1`)->o == IR_HIOP)
538	snap_pref(J, T, map, nent, seen, (irs+`1`)->op2);
539	}
540	}
541	} else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
542	lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
543	"sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
544	J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
545	}
546	}
547	/ Replay sunk instructions. /
548	for (n = `0`; pass23 && n < nent; n++) {
549	SnapEntry sn = map[n];
550	IRRef refp = snap_ref(sn);
551	IRIns *ir = &T->ir[refp];
552	if (regsp_reg(ir->r) == RID_SUNK) {
553	TRef op1, op2;
554	if (J->slot[snap_slot(sn)] != snap_slot(sn)) { / De-dup allocs. /
555	J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
556	continue;
557	}
558	op1 = ir->op1;
559	if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
560	op2 = ir->op2;
561	if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
562	if (LJ_HASFFI && ir->o == IR_CNEWI) {
563	if (LJ_32 && refp+`1` < T->nins && (ir+`1`)->o == IR_HIOP) {
564	lj_needsplit(J); / Emit joining HIOP. /
565	op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
566	snap_pref(J, T, map, nent, seen, (ir+`1`)->op2));
567	}
568	J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK\|IRT_ISPHI), op1, op2);
569	} else {
570	IRIns *irs;
571	TRef tr = emitir(ir->ot, op1, op2);
572	J->slot[snap_slot(sn)] = tr;
573	for (irs = ir+`1`; irs < irlast; irs++)
574	if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
575	IRIns *irr = &T->ir[irs->op1];
576	TRef val, key = irr->op2, tmp = tr;
577	if (irr->o != IR_FREF) {
578	IRIns *irk = &T->ir[key];
579	if (irr->o == IR_HREFK)
580	key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
581	irk->op2);
582	else
583	key = snap_replay_const(J, irk);
584	if (irr->o == IR_HREFK \|\| irr->o == IR_AREF) {
585	IRIns *irf = &T->ir[irr->op1];
586	tmp = emitir(irf->ot, tmp, irf->op2);
587	}
588	}
589	tmp = emitir(irr->ot, tmp, key);
590	val = snap_pref(J, T, map, nent, seen, irs->op2);
591	if (val == `0`) {
592	IRIns *irc = &T->ir[irs->op2];
593	lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT,
594	"sunk store for parent IR %04d with bad op %d",
595	refp - REF_BIAS, irc->o);
596	val = snap_pref(J, T, map, nent, seen, irc->op1);
597	val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
598	} else if ((LJ_SOFTFP32 \|\| (LJ_32 && LJ_HASFFI)) &&
599	irs+`1` < irlast && (irs+`1`)->o == IR_HIOP) {
600	IRType t = IRT_I64;
601	if (LJ_SOFTFP32 && irt_type((irs+`1`)->t) == IRT_SOFTFP)
602	t = IRT_NUM;
603	lj_needsplit(J);
604	if (irref_isk(irs->op2) && irref_isk((irs+`1`)->op2)) {
605	uint64_t k = (uint32_t)T->ir[irs->op2].i +
606	((uint64_t)T->ir[(irs+`1`)->op2].i << `32`);
607	val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
608	} else {
609	val = emitir_raw(IRT(IR_HIOP, t), val,
610	snap_pref(J, T, map, nent, seen, (irs+`1`)->op2));
611	}
612	tmp = emitir(IRT(irs->o, t), tmp, val);
613	continue;
614	}
615	tmp = emitir(irs->ot, tmp, val);
616	} else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
617	emitir(IRT(IR_XBAR, IRT_NIL), `0`, `0`);
618	}
619	}
620	}
621	}
622	}
623	J->base = J->slot + J->baseslot;
624	J->maxslot = snap->nslots - J->baseslot;
625	lj_snap_add(J);
626	if (pass23) / Need explicit GC step _after_ initial snapshot. /
627	emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), `0`, `0`);
628	}
629
630	/ -- Snapshot restore ---------------------------------------------------- /
631
632	static void snap_unsink(jit_State J, GCtrace T, ExitState *ex,
633	SnapNo snapno, BloomFilter rfilt,
634	IRIns ir, TValue o);
635
636	/ Restore a value from the trace exit state. /
637	static void snap_restoreval(jit_State J, GCtrace T, ExitState *ex,
638	SnapNo snapno, BloomFilter rfilt,
639	IRRef ref, TValue *o)
640	{
641	IRIns *ir = &T->ir[ref];
642	IRType1 t = ir->t;
643	RegSP rs = ir->prev;
644	if (irref_isk(ref)) { / Restore constant slot. /
645	if (ir->o == IR_KPTR) {
646	o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir);
647	} else {
648	lj_assertJ(!(ir->o == IR_KKPTR \|\| ir->o == IR_KNULL),
649	"restore of const from IR %04d with bad op %d",
650	ref - REF_BIAS, ir->o);
651	lj_ir_kvalue(J->L, o, ir);
652	}
653	return;
654	}
655	if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
656	rs = snap_renameref(T, snapno, ref, rs);
657	if (ra_hasspill(regsp_spill(rs))) { / Restore from spill slot. /
658	int32_t *sps = &ex->spill[regsp_spill(rs)];
659	if (irt_isinteger(t)) {
660	setintV(o, *sps);
661	#if !LJ_SOFTFP32
662	} else if (irt_isnum(t)) {
663	o->u64 = (uint64_t )sps;
664	#endif
665	#if LJ_64 && !LJ_GC64
666	} else if (irt_islightud(t)) {
667	/ 64 bit lightuserdata which may escape already has the tag bits. /
668	o->u64 = (uint64_t )sps;
669	#endif
670	} else {
671	lj_assertJ(!irt_ispri(t), "PRI ref with spill slot");
672	setgcV(J->L, o, (GCobj )(uintptr_t)(GCSize *)sps, irt_toitype(t));
673	}
674	} else { / Restore from register. /
675	Reg r = regsp_reg(rs);
676	if (ra_noreg(r)) {
677	lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
678	"restore from IR %04d has no reg", ref - REF_BIAS);
679	snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
680	if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
681	return;
682	} else if (irt_isinteger(t)) {
683	setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
684	#if !LJ_SOFTFP
685	} else if (irt_isnum(t)) {
686	setnumV(o, ex->fpr[r-RID_MIN_FPR]);
687	#elif LJ_64 /* && LJ_SOFTFP */
688	} else if (irt_isnum(t)) {
689	o->u64 = ex->gpr[r-RID_MIN_GPR];
690	#endif
691	#if LJ_64 && !LJ_GC64
692	} else if (irt_is64(t)) {
693	/ 64 bit values that already have the tag bits. /
694	o->u64 = ex->gpr[r-RID_MIN_GPR];
695	#endif
696	} else if (irt_ispri(t)) {
697	setpriV(o, irt_toitype(t));
698	} else {
699	setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
700	}
701	}
702	}
703
704	#if LJ_HASFFI
705	/ Restore raw data from the trace exit state. /
706	static void snap_restoredata(jit_State J, GCtrace T, ExitState *ex,
707	SnapNo snapno, BloomFilter rfilt,
708	IRRef ref, void *dst, CTSize sz)
709	{
710	IRIns *ir = &T->ir[ref];
711	RegSP rs = ir->prev;
712	int32_t *src;
713	uint64_t tmp;
714	UNUSED(J);
715	if (irref_isk(ref)) {
716	if (ir_isk64(ir)) {
717	src = (int32_t *)&ir[`1`];
718	} else if (sz == `8`) {
719	tmp = (uint64_t)(uint32_t)ir->i;
720	src = (int32_t *)&tmp;
721	} else {
722	src = &ir->i;
723	}
724	} else {
725	if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
726	rs = snap_renameref(T, snapno, ref, rs);
727	if (ra_hasspill(regsp_spill(rs))) {
728	src = &ex->spill[regsp_spill(rs)];
729	if (sz == `8` && !irt_is64(ir->t)) {
730	tmp = (uint64_t)(uint32_t)*src;
731	src = (int32_t *)&tmp;
732	}
733	} else {
734	Reg r = regsp_reg(rs);
735	if (ra_noreg(r)) {
736	/ Note: this assumes CNEWI is never used for SOFTFP split numbers. /
737	lj_assertJ(sz == `8` && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
738	"restore from IR %04d has no reg", ref - REF_BIAS);
739	snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, `4`);
740	(lua_Number )dst = (lua_Number)(int32_t )dst;
741	return;
742	}
743	src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
744	#if !LJ_SOFTFP
745	if (r >= RID_MAX_GPR) {
746	src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
747	#if LJ_TARGET_PPC
748	if (sz == `4`) { / PPC FPRs are always doubles. /
749	(float* )dst = (float)(double *)src;
750	return;
751	}
752	#else
753	if (LJ_BE && sz == `4`) src++;
754	#endif
755	} else
756	#endif
757	if (LJ_64 && LJ_BE && sz == `4`) src++;
758	}
759	}
760	lj_assertJ(sz == `1` \|\| sz == `2` \|\| sz == `4` \|\| sz == `8`,
761	"restore from IR %04d with bad size %d", ref - REF_BIAS, sz);
762	if (sz == `4`) (int32_t )dst = *src;
763	else if (sz == `8`) (int64_t )dst = (int64_t )src;
764	else if (sz == `1`) (int8_t )dst = (int8_t)*src;
765	else (int16_t )dst = (int16_t)*src;
766	}
767	#endif
768
769	/ Unsink allocation from the trace exit state. Unsink sunk stores. /
770	static void snap_unsink(jit_State J, GCtrace T, ExitState *ex,
771	SnapNo snapno, BloomFilter rfilt,
772	IRIns ir, TValue o)
773	{
774	lj_assertJ(ir->o == IR_TNEW \|\| ir->o == IR_TDUP \|\|
775	ir->o == IR_CNEW \|\| ir->o == IR_CNEWI,
776	"sunk allocation with bad op %d", ir->o);
777	#if LJ_HASFFI
778	if (ir->o == IR_CNEW \|\| ir->o == IR_CNEWI) {
779	CTState *cts = ctype_cts(J->L);
780	CTypeID id = (CTypeID)T->ir[ir->op1].i;
781	CTSize sz;
782	CTInfo info = lj_ctype_info(cts, id, &sz);
783	GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
784	setcdataV(J->L, o, cd);
785	if (ir->o == IR_CNEWI) {
786	uint8_t p = (uint8_t )cdataptr(cd);
787	lj_assertJ(sz == `4` \|\| sz == `8`, "sunk cdata with bad size %d", sz);
788	if (LJ_32 && sz == `8` && ir+`1` < T->ir + T->nins && (ir+`1`)->o == IR_HIOP) {
789	snap_restoredata(J, T, ex, snapno, rfilt, (ir+`1`)->op2,
790	LJ_LE ? p+`4` : p, `4`);
791	if (LJ_BE) p += `4`;
792	sz = `4`;
793	}
794	snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz);
795	} else {
796	IRIns irs, irlast = &T->ir[T->snap[snapno].ref];
797	for (irs = ir+`1`; irs < irlast; irs++)
798	if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
799	IRIns *iro = &T->ir[T->ir[irs->op1].op2];
800	uint8_t p = (uint8_t )cd;
801	CTSize szs;
802	lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o);
803	lj_assertJ(T->ir[irs->op1].o == IR_ADD,
804	"sunk store with bad add op %d", T->ir[irs->op1].o);
805	lj_assertJ(iro->o == IR_KINT \|\| iro->o == IR_KINT64,
806	"sunk store with bad const offset op %d", iro->o);
807	if (irt_is64(irs->t)) szs = `8`;
808	else if (irt_isi8(irs->t) \|\| irt_isu8(irs->t)) szs = `1`;
809	else if (irt_isi16(irs->t) \|\| irt_isu16(irs->t)) szs = `2`;
810	else szs = `4`;
811	if (LJ_64 && iro->o == IR_KINT64)
812	p += (int64_t)ir_k64(iro)->u64;
813	else
814	p += iro->i;
815	lj_assertJ(p >= (uint8_t *)cdataptr(cd) &&
816	p + szs <= (uint8_t *)cdataptr(cd) + sz,
817	"sunk store with offset out of range");
818	if (LJ_32 && irs+`1` < T->ir + T->nins && (irs+`1`)->o == IR_HIOP) {
819	lj_assertJ(szs == `4`, "sunk store with bad size %d", szs);
820	snap_restoredata(J, T, ex, snapno, rfilt, (irs+`1`)->op2,
821	LJ_LE ? p+`4` : p, `4`);
822	if (LJ_BE) p += `4`;
823	}
824	snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs);
825	}
826	}
827	} else
828	#endif
829	{
830	IRIns irs, irlast;
831	GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
832	lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
833	settabV(J->L, o, t);
834	irlast = &T->ir[T->snap[snapno].ref];
835	for (irs = ir+`1`; irs < irlast; irs++)
836	if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
837	IRIns *irk = &T->ir[irs->op1];
838	TValue tmp, *val;
839	lj_assertJ(irs->o == IR_ASTORE \|\| irs->o == IR_HSTORE \|\|
840	irs->o == IR_FSTORE,
841	"sunk store with bad op %d", irs->o);
842	if (irk->o == IR_FREF) {
843	lj_assertJ(irk->op2 == IRFL_TAB_META,
844	"sunk store with bad field %d", irk->op2);
845	snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
846	/ NOBARRIER: The table is new (marked white). /
847	setgcref(t->metatable, obj2gco(tabV(&tmp)));
848	} else {
849	irk = &T->ir[irk->op2];
850	if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
851	lj_ir_kvalue(J->L, &tmp, irk);
852	val = lj_tab_set(J->L, t, &tmp);
853	/ NOBARRIER: The table is new (marked white). /
854	snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
855	if (LJ_SOFTFP32 && irs+`1` < T->ir + T->nins && (irs+`1`)->o == IR_HIOP) {
856	snap_restoreval(J, T, ex, snapno, rfilt, (irs+`1`)->op2, &tmp);
857	val->u32.hi = tmp.u32.lo;
858	}
859	}
860	}
861	}
862	}
863
864	/ Restore interpreter state from exit state with the help of a snapshot. /
865	const BCIns lj_snap_restore(jit_State J, void *exptr)
866	{
867	ExitState ex = (ExitState )exptr;
868	SnapNo snapno = J->exitno; / For now, snapno == exitno. /
869	GCtrace *T = traceref(J, J->parent);
870	SnapShot *snap = &T->snap[snapno];
871	MSize n, nent = snap->nent;
872	SnapEntry *map = &T->snapmap[snap->mapofs];
873	#if !LJ_FR2 \|\| defined(LUA_USE_ASSERT)
874	SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-`1`-LJ_FR2];
875	#endif
876	#if !LJ_FR2
877	ptrdiff_t ftsz0;
878	#endif
879	TValue *frame;
880	BloomFilter rfilt = snap_renamefilter(T, snapno);
881	const BCIns *pc = snap_pc(&map[nent]);
882	lua_State *L = J->L;
883
884	/ Set interpreter PC to the next PC to get correct error messages. /
885	setcframe_pc(cframe_raw(L->cframe), pc+`1`);
886
887	/ Make sure the stack is big enough for the slots from the snapshot. /
888	if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
889	L->top = curr_topL(L);
890	lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
891	}
892
893	/ Fill stack slots with data from the registers and spill slots. /
894	frame = L->base-`1`-LJ_FR2;
895	#if !LJ_FR2
896	ftsz0 = frame_ftsz(frame); / Preserve link to previous frame in slot #0. /
897	#endif
898	for (n = `0`; n < nent; n++) {
899	SnapEntry sn = map[n];
900	if (!(sn & SNAP_NORESTORE)) {
901	TValue *o = &frame[snap_slot(sn)];
902	IRRef ref = snap_ref(sn);
903	IRIns *ir = &T->ir[ref];
904	if (ir->r == RID_SUNK) {
905	MSize j;
906	for (j = `0`; j < n; j++)
907	if (snap_ref(map[j]) == ref) { / De-duplicate sunk allocations. /
908	copyTV(L, o, &frame[snap_slot(map[j])]);
909	goto dupslot;
910	}
911	snap_unsink(J, T, ex, snapno, rfilt, ir, o);
912	dupslot:
913	continue;
914	}
915	snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
916	if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
917	TValue tmp;
918	snap_restoreval(J, T, ex, snapno, rfilt, ref+`1`, &tmp);
919	o->u32.hi = tmp.u32.lo;
920	#if !LJ_FR2
921	} else if ((sn & (SNAP_CONT\|SNAP_FRAME))) {
922	/ Overwrite tag with frame link. /
923	setframe_ftsz(o, snap_slot(sn) != `0` ? (int32_t)*flinks-- : ftsz0);
924	L->base = o+`1`;
925	#endif
926	}
927	}
928	}
929	#if LJ_FR2
930	L->base += (map[nent+LJ_BE] & `0xff`);
931	#endif
932	lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot");
933
934	/ Compute current stack top. /
935	switch (bc_op(*pc)) {
936	default:
937	if (bc_op(*pc) < BC_FUNCF) {
938	L->top = curr_topL(L);
939	break;
940	}
941	/ fallthrough /
942	case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
943	L->top = frame + snap->nslots;
944	break;
945	}
946	return pc;
947	}
948
949	#undef emitir_raw
950	#undef emitir
951
952	#endif
953

Browse the source code of LuaJIT/lj_snap.c