lj_snap.c source code [Aerospike/modules/luajit/src/lj_snap.c]

1	/*
2	** Snapshot handling.
3	** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4	*/
5
6	#define lj_snap_c
7	#define LUA_CORE
8
9	#include "lj_obj.h"
10
11	#if LJ_HASJIT
12
13	#include "lj_gc.h"
14	#include "lj_tab.h"
15	#include "lj_state.h"
16	#include "lj_frame.h"
17	#include "lj_bc.h"
18	#include "lj_ir.h"
19	#include "lj_jit.h"
20	#include "lj_iropt.h"
21	#include "lj_trace.h"
22	#include "lj_snap.h"
23	#include "lj_target.h"
24	#if LJ_HASFFI
25	#include "lj_ctype.h"
26	#include "lj_cdata.h"
27	#endif
28
29	/ Some local macros to save typing. Undef'd at the end. /
30	#define IR(ref) (&J->cur.ir[(ref)])
31
32	/ Pass IR on to next optimization in chain (FOLD). /
33	#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
34
35	/ Emit raw IR without passing through optimizations. /
36	#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
37
38	/ -- Snapshot buffer allocation ------------------------------------------ /
39
40	/ Grow snapshot buffer. /
41	void lj_snap_grow_buf_(jit_State *J, MSize need)
42	{
43	MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
44	if (need > maxsnap)
45	lj_trace_err(J, LJ_TRERR_SNAPOV);
46	lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
47	J->cur.snap = J->snapbuf;
48	}
49
50	/ Grow snapshot map buffer. /
51	void lj_snap_grow_map_(jit_State *J, MSize need)
52	{
53	if (need < `2`*J->sizesnapmap)
54	need = `2`*J->sizesnapmap;
55	else if (need < `64`)
56	need = `64`;
57	J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
58	J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
59	J->cur.snapmap = J->snapmapbuf;
60	J->sizesnapmap = need;
61	}
62
63	/ -- Snapshot generation ------------------------------------------------- /
64
65	/ Add all modified slots to the snapshot. /
66	static MSize snapshot_slots(jit_State J, SnapEntry map, BCReg nslots)
67	{
68	IRRef retf = J->chain[IR_RETF]; / Limits SLOAD restore elimination. /
69	BCReg s;
70	MSize n = `0`;
71	for (s = `0`; s < nslots; s++) {
72	TRef tr = J->slot[s];
73	IRRef ref = tref_ref(tr);
74	if (ref) {
75	SnapEntry sn = SNAP_TR(s, tr);
76	IRIns *ir = IR(ref);
77	if (!(sn & (SNAP_CONT\|SNAP_FRAME)) &&
78	ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
79	/ No need to snapshot unmodified non-inherited slots. /
80	if (!(ir->op2 & IRSLOAD_INHERIT))
81	continue;
82	/ No need to restore readonly slots and unmodified non-parent slots. /
83	if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
84	(ir->op2 & (IRSLOAD_READONLY\|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
85	sn \|= SNAP_NORESTORE;
86	}
87	if (LJ_SOFTFP && irt_isnum(ir->t))
88	sn \|= SNAP_SOFTFPNUM;
89	map[n++] = sn;
90	}
91	}
92	return n;
93	}
94
95	/ Add frame links at the end of the snapshot. /
96	static BCReg snapshot_framelinks(jit_State J, SnapEntry map)
97	{
98	cTValue *frame = J->L->base - `1`;
99	cTValue *lim = J->L->base - J->baseslot;
100	cTValue *ftop = frame + funcproto(frame_func(frame))->framesize;
101	MSize f = `0`;
102	map[f++] = SNAP_MKPC(J->pc); / The current PC is always the first entry. /
103	while (frame > lim) { / Backwards traversal of all frames above base. /
104	if (frame_islua(frame)) {
105	map[f++] = SNAP_MKPC(frame_pc(frame));
106	frame = frame_prevl(frame);
107	if (frame + funcproto(frame_func(frame))->framesize > ftop)
108	ftop = frame + funcproto(frame_func(frame))->framesize;
109	} else if (frame_iscont(frame)) {
110	map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
111	map[f++] = SNAP_MKPC(frame_contpc(frame));
112	frame = frame_prevd(frame);
113	} else {
114	lua_assert(!frame_isc(frame));
115	map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
116	frame = frame_prevd(frame);
117	}
118	}
119	lua_assert(f == (MSize)(`1` + J->framedepth));
120	return (BCReg)(ftop - lim);
121	}
122
123	/ Take a snapshot of the current stack. /
124	static void snapshot_stack(jit_State J, SnapShot snap, MSize nsnapmap)
125	{
126	BCReg nslots = J->baseslot + J->maxslot;
127	MSize nent;
128	SnapEntry *p;
129	/ Conservative estimate. /
130	lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+`1`);
131	p = &J->cur.snapmap[nsnapmap];
132	nent = snapshot_slots(J, p, nslots);
133	snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
134	snap->mapofs = (uint16_t)nsnapmap;
135	snap->ref = (IRRef1)J->cur.nins;
136	snap->nent = (uint8_t)nent;
137	snap->nslots = (uint8_t)nslots;
138	snap->count = `0`;
139	J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + `1` + J->framedepth);
140	}
141
142	/ Add or merge a snapshot. /
143	void lj_snap_add(jit_State *J)
144	{
145	MSize nsnap = J->cur.nsnap;
146	MSize nsnapmap = J->cur.nsnapmap;
147	/ Merge if no ins. inbetween or if requested and no guard inbetween. /
148	if (J->mergesnap ? !irt_isguard(J->guardemit) :
149	(nsnap > `0` && J->cur.snap[nsnap-`1`].ref == J->cur.nins)) {
150	if (nsnap == `1`) { / But preserve snap #0 PC. /
151	emitir_raw(IRT(IR_NOP, IRT_NIL), `0`, `0`);
152	goto nomerge;
153	}
154	nsnapmap = J->cur.snap[--nsnap].mapofs;
155	} else {
156	nomerge:
157	lj_snap_grow_buf(J, nsnap+`1`);
158	J->cur.nsnap = (uint16_t)(nsnap+`1`);
159	}
160	J->mergesnap = `0`;
161	J->guardemit.irt = `0`;
162	snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
163	}
164
165	/ -- Snapshot modification ----------------------------------------------- /
166
167	#define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
168
169	/ Find unused slots with reaching-definitions bytecode data-flow analysis. /
170	static BCReg snap_usedef(jit_State J, uint8_t udf,
171	const BCIns *pc, BCReg maxslot)
172	{
173	BCReg s;
174	GCobj *o;
175
176	if (maxslot == `0`) return `0`;
177	#ifdef LUAJIT_USE_VALGRIND
178	/ Avoid errors for harmless reads beyond maxslot. /
179	memset(udf, `1`, SNAP_USEDEF_SLOTS);
180	#else
181	memset(udf, `1`, maxslot);
182	#endif
183
184	/ Treat open upvalues as used. /
185	o = gcref(J->L->openupval);
186	while (o) {
187	if (uvval(gco2uv(o)) < J->L->base) break;
188	udf[uvval(gco2uv(o)) - J->L->base] = `0`;
189	o = gcref(o->gch.nextgc);
190	}
191
192	#define USE_SLOT(s) udf[(s)] &= ~1
193	#define DEF_SLOT(s) udf[(s)] *= 3
194
195	/ Scan through following bytecode and check for uses/defs. /
196	lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
197	for (;;) {
198	BCIns ins = *pc++;
199	BCOp op = bc_op(ins);
200	switch (bcmode_b(op)) {
201	case BCMvar: USE_SLOT(bc_b(ins)); break;
202	default: break;
203	}
204	switch (bcmode_c(op)) {
205	case BCMvar: USE_SLOT(bc_c(ins)); break;
206	case BCMrbase:
207	lua_assert(op == BC_CAT);
208	for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
209	for (; s < maxslot; s++) DEF_SLOT(s);
210	break;
211	case BCMjump:
212	handle_jump: {
213	BCReg minslot = bc_a(ins);
214	if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
215	else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-`2`])-`1`;
216	else if (op == BC_UCLO) { pc += bc_j(ins); break; }
217	for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
218	return minslot < maxslot ? minslot : maxslot;
219	}
220	case BCMlit:
221	if (op == BC_JFORL \|\| op == BC_JITERL \|\| op == BC_JLOOP) {
222	goto handle_jump;
223	} else if (bc_isret(op)) {
224	BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-`1`);
225	for (s = `0`; s < bc_a(ins); s++) DEF_SLOT(s);
226	for (; s < top; s++) USE_SLOT(s);
227	for (; s < maxslot; s++) DEF_SLOT(s);
228	return `0`;
229	}
230	break;
231	case BCMfunc: return maxslot; / NYI: will abort, anyway. /
232	default: break;
233	}
234	switch (bcmode_a(op)) {
235	case BCMvar: USE_SLOT(bc_a(ins)); break;
236	case BCMdst:
237	if (!(op == BC_ISTC \|\| op == BC_ISFC)) DEF_SLOT(bc_a(ins));
238	break;
239	case BCMbase:
240	if (op >= BC_CALLM && op <= BC_VARG) {
241	BCReg top = (op == BC_CALLM \|\| op == BC_CALLMT \|\| bc_c(ins) == `0`) ?
242	maxslot : (bc_a(ins) + bc_c(ins));
243	s = bc_a(ins) - ((op == BC_ITERC \|\| op == BC_ITERN) ? `3` : `0`);
244	for (; s < top; s++) USE_SLOT(s);
245	for (; s < maxslot; s++) DEF_SLOT(s);
246	if (op == BC_CALLT \|\| op == BC_CALLMT) {
247	for (s = `0`; s < bc_a(ins); s++) DEF_SLOT(s);
248	return `0`;
249	}
250	} else if (op == BC_KNIL) {
251	for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
252	} else if (op == BC_TSETM) {
253	for (s = bc_a(ins)-`1`; s < maxslot; s++) USE_SLOT(s);
254	}
255	break;
256	default: break;
257	}
258	lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
259	}
260
261	#undef USE_SLOT
262	#undef DEF_SLOT
263
264	return `0`; / unreachable /
265	}
266
267	/ Purge dead slots before the next snapshot. /
268	void lj_snap_purge(jit_State *J)
269	{
270	uint8_t udf[SNAP_USEDEF_SLOTS];
271	BCReg maxslot = J->maxslot;
272	BCReg s = snap_usedef(J, udf, J->pc, maxslot);
273	for (; s < maxslot; s++)
274	if (udf[s] != `0`)
275	J->base[s] = `0`; / Purge dead slots. /
276	}
277
278	/ Shrink last snapshot. /
279	void lj_snap_shrink(jit_State *J)
280	{
281	SnapShot *snap = &J->cur.snap[J->cur.nsnap-`1`];
282	SnapEntry *map = &J->cur.snapmap[snap->mapofs];
283	MSize n, m, nlim, nent = snap->nent;
284	uint8_t udf[SNAP_USEDEF_SLOTS];
285	BCReg maxslot = J->maxslot;
286	BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
287	BCReg baseslot = J->baseslot;
288	maxslot += baseslot;
289	minslot += baseslot;
290	snap->nslots = (uint8_t)maxslot;
291	for (n = m = `0`; n < nent; n++) { / Remove unused slots from snapshot. /
292	BCReg s = snap_slot(map[n]);
293	if (s < minslot \|\| (s < maxslot && udf[s-baseslot] == `0`))
294	map[m++] = map[n]; / Only copy used slots. /
295	}
296	snap->nent = (uint8_t)m;
297	nlim = J->cur.nsnapmap - snap->mapofs - `1`;
298	while (n <= nlim) map[m++] = map[n++]; / Move PC + frame links down. /
299	J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); / Free up space in map. /
300	}
301
302	/ -- Snapshot access ----------------------------------------------------- /
303
304	/ Initialize a Bloom Filter with all renamed refs.*
305	** There are very few renames (often none), so the filter has
306	** very few bits set. This makes it suitable for negative filtering.
307	*/
308	static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
309	{
310	BloomFilter rfilt = `0`;
311	IRIns *ir;
312	for (ir = &T->ir[T->nins-`1`]; ir->o == IR_RENAME; ir--)
313	if (ir->op2 <= lim)
314	bloomset(rfilt, ir->op1);
315	return rfilt;
316	}
317
318	/ Process matching renames to find the original RegSP. /
319	static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
320	{
321	IRIns *ir;
322	for (ir = &T->ir[T->nins-`1`]; ir->o == IR_RENAME; ir--)
323	if (ir->op1 == ref && ir->op2 <= lim)
324	rs = ir->prev;
325	return rs;
326	}
327
328	/ Copy RegSP from parent snapshot to the parent links of the IR. /
329	IRIns lj_snap_regspmap(GCtrace T, SnapNo snapno, IRIns *ir)
330	{
331	SnapShot *snap = &T->snap[snapno];
332	SnapEntry *map = &T->snapmap[snap->mapofs];
333	BloomFilter rfilt = snap_renamefilter(T, snapno);
334	MSize n = `0`;
335	IRRef ref = `0`;
336	for ( ; ; ir++) {
337	uint32_t rs;
338	if (ir->o == IR_SLOAD) {
339	if (!(ir->op2 & IRSLOAD_PARENT)) break;
340	for ( ; ; n++) {
341	lua_assert(n < snap->nent);
342	if (snap_slot(map[n]) == ir->op1) {
343	ref = snap_ref(map[n++]);
344	break;
345	}
346	}
347	} else if (LJ_SOFTFP && ir->o == IR_HIOP) {
348	ref++;
349	} else if (ir->o == IR_PVAL) {
350	ref = ir->op1 + REF_BIAS;
351	} else {
352	break;
353	}
354	rs = T->ir[ref].prev;
355	if (bloomtest(rfilt, ref))
356	rs = snap_renameref(T, snapno, ref, rs);
357	ir->prev = (uint16_t)rs;
358	lua_assert(regsp_used(rs));
359	}
360	return ir;
361	}
362
363	/ -- Snapshot replay ----------------------------------------------------- /
364
365	/ Replay constant from parent trace. /
366	static TRef snap_replay_const(jit_State J, IRIns ir)
367	{
368	/ Only have to deal with constants that can occur in stack slots. /
369	switch ((IROp)ir->o) {
370	case IR_KPRI: return TREF_PRI(irt_type(ir->t));
371	case IR_KINT: return lj_ir_kint(J, ir->i);
372	case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
373	case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir));
374	case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir));
375	case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); / Continuation. /
376	default: lua_assert(`0`); return TREF_NIL; break;
377	}
378	}
379
380	/ De-duplicate parent reference. /
381	static TRef snap_dedup(jit_State J, SnapEntry map, MSize nmax, IRRef ref)
382	{
383	MSize j;
384	for (j = `0`; j < nmax; j++)
385	if (snap_ref(map[j]) == ref)
386	return J->slot[snap_slot(map[j])] & ~(SNAP_CONT\|SNAP_FRAME);
387	return `0`;
388	}
389
390	/ Emit parent reference with de-duplication. /
391	static TRef snap_pref(jit_State J, GCtrace T, SnapEntry *map, MSize nmax,
392	BloomFilter seen, IRRef ref)
393	{
394	IRIns *ir = &T->ir[ref];
395	TRef tr;
396	if (irref_isk(ref))
397	tr = snap_replay_const(J, ir);
398	else if (!regsp_used(ir->prev))
399	tr = `0`;
400	else if (!bloomtest(seen, ref) \|\| (tr = snap_dedup(J, map, nmax, ref)) == `0`)
401	tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, `0`);
402	return tr;
403	}
404
405	/ Check whether a sunk store corresponds to an allocation. Slow path. /
406	static int snap_sunk_store2(jit_State J, IRIns ira, IRIns *irs)
407	{
408	if (irs->o == IR_ASTORE \|\| irs->o == IR_HSTORE \|\|
409	irs->o == IR_FSTORE \|\| irs->o == IR_XSTORE) {
410	IRIns *irk = IR(irs->op1);
411	if (irk->o == IR_AREF \|\| irk->o == IR_HREFK)
412	irk = IR(irk->op1);
413	return (IR(irk->op1) == ira);
414	}
415	return `0`;
416	}
417
418	/ Check whether a sunk store corresponds to an allocation. Fast path. /
419	static LJ_AINLINE int snap_sunk_store(jit_State J, IRIns ira, IRIns *irs)
420	{
421	if (irs->s != `255`)
422	return (ira + irs->s == irs); / Fast check. /
423	return snap_sunk_store2(J, ira, irs);
424	}
425
426	/ Replay snapshot state to setup side trace. /
427	void lj_snap_replay(jit_State J, GCtrace T)
428	{
429	SnapShot *snap = &T->snap[J->exitno];
430	SnapEntry *map = &T->snapmap[snap->mapofs];
431	MSize n, nent = snap->nent;
432	BloomFilter seen = `0`;
433	int pass23 = `0`;
434	J->framedepth = `0`;
435	/ Emit IR for slots inherited from parent snapshot. /
436	for (n = `0`; n < nent; n++) {
437	SnapEntry sn = map[n];
438	BCReg s = snap_slot(sn);
439	IRRef ref = snap_ref(sn);
440	IRIns *ir = &T->ir[ref];
441	TRef tr;
442	/ The bloom filter avoids O(nent^2) overhead for de-duping slots. /
443	if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != `0`)
444	goto setslot;
445	bloomset(seen, ref);
446	if (irref_isk(ref)) {
447	tr = snap_replay_const(J, ir);
448	} else if (!regsp_used(ir->prev)) {
449	pass23 = `1`;
450	lua_assert(s != `0`);
451	tr = s;
452	} else {
453	IRType t = irt_type(ir->t);
454	uint32_t mode = IRSLOAD_INHERIT\|IRSLOAD_PARENT;
455	if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
456	if (ir->o == IR_SLOAD) mode \|= (ir->op2 & IRSLOAD_READONLY);
457	tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
458	}
459	setslot:
460	J->slot[s] = tr \| (sn&(SNAP_CONT\|SNAP_FRAME)); / Same as TREF_* flags. /
461	J->framedepth += ((sn & (SNAP_CONT\|SNAP_FRAME)) && s);
462	if ((sn & SNAP_FRAME))
463	J->baseslot = s+`1`;
464	}
465	if (pass23) {
466	IRIns *irlast = &T->ir[snap->ref];
467	pass23 = `0`;
468	/ Emit dependent PVALs. /
469	for (n = `0`; n < nent; n++) {
470	SnapEntry sn = map[n];
471	IRRef refp = snap_ref(sn);
472	IRIns *ir = &T->ir[refp];
473	if (regsp_reg(ir->r) == RID_SUNK) {
474	if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
475	pass23 = `1`;
476	lua_assert(ir->o == IR_TNEW \|\| ir->o == IR_TDUP \|\|
477	ir->o == IR_CNEW \|\| ir->o == IR_CNEWI);
478	if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
479	if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
480	if (LJ_HASFFI && ir->o == IR_CNEWI) {
481	if (LJ_32 && refp+`1` < T->nins && (ir+`1`)->o == IR_HIOP)
482	snap_pref(J, T, map, nent, seen, (ir+`1`)->op2);
483	} else {
484	IRIns *irs;
485	for (irs = ir+`1`; irs < irlast; irs++)
486	if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
487	if (snap_pref(J, T, map, nent, seen, irs->op2) == `0`)
488	snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
489	else if ((LJ_SOFTFP \|\| (LJ_32 && LJ_HASFFI)) &&
490	irs+`1` < irlast && (irs+`1`)->o == IR_HIOP)
491	snap_pref(J, T, map, nent, seen, (irs+`1`)->op2);
492	}
493	}
494	} else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
495	lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
496	J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
497	}
498	}
499	/ Replay sunk instructions. /
500	for (n = `0`; pass23 && n < nent; n++) {
501	SnapEntry sn = map[n];
502	IRRef refp = snap_ref(sn);
503	IRIns *ir = &T->ir[refp];
504	if (regsp_reg(ir->r) == RID_SUNK) {
505	TRef op1, op2;
506	if (J->slot[snap_slot(sn)] != snap_slot(sn)) { / De-dup allocs. /
507	J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
508	continue;
509	}
510	op1 = ir->op1;
511	if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
512	op2 = ir->op2;
513	if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
514	if (LJ_HASFFI && ir->o == IR_CNEWI) {
515	if (LJ_32 && refp+`1` < T->nins && (ir+`1`)->o == IR_HIOP) {
516	lj_needsplit(J); / Emit joining HIOP. /
517	op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
518	snap_pref(J, T, map, nent, seen, (ir+`1`)->op2));
519	}
520	J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2);
521	} else {
522	IRIns *irs;
523	TRef tr = emitir(ir->ot, op1, op2);
524	J->slot[snap_slot(sn)] = tr;
525	for (irs = ir+`1`; irs < irlast; irs++)
526	if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
527	IRIns *irr = &T->ir[irs->op1];
528	TRef val, key = irr->op2, tmp = tr;
529	if (irr->o != IR_FREF) {
530	IRIns *irk = &T->ir[key];
531	if (irr->o == IR_HREFK)
532	key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
533	irk->op2);
534	else
535	key = snap_replay_const(J, irk);
536	if (irr->o == IR_HREFK \|\| irr->o == IR_AREF) {
537	IRIns *irf = &T->ir[irr->op1];
538	tmp = emitir(irf->ot, tmp, irf->op2);
539	}
540	}
541	tmp = emitir(irr->ot, tmp, key);
542	val = snap_pref(J, T, map, nent, seen, irs->op2);
543	if (val == `0`) {
544	IRIns *irc = &T->ir[irs->op2];
545	lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
546	val = snap_pref(J, T, map, nent, seen, irc->op1);
547	val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
548	} else if ((LJ_SOFTFP \|\| (LJ_32 && LJ_HASFFI)) &&
549	irs+`1` < irlast && (irs+`1`)->o == IR_HIOP) {
550	IRType t = IRT_I64;
551	if (LJ_SOFTFP && irt_type((irs+`1`)->t) == IRT_SOFTFP)
552	t = IRT_NUM;
553	lj_needsplit(J);
554	if (irref_isk(irs->op2) && irref_isk((irs+`1`)->op2)) {
555	uint64_t k = (uint32_t)T->ir[irs->op2].i +
556	((uint64_t)T->ir[(irs+`1`)->op2].i << `32`);
557	val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
558	lj_ir_k64_find(J, k));
559	} else {
560	val = emitir_raw(IRT(IR_HIOP, t), val,
561	snap_pref(J, T, map, nent, seen, (irs+`1`)->op2));
562	}
563	tmp = emitir(IRT(irs->o, t), tmp, val);
564	continue;
565	}
566	tmp = emitir(irs->ot, tmp, val);
567	} else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
568	emitir(IRT(IR_XBAR, IRT_NIL), `0`, `0`);
569	}
570	}
571	}
572	}
573	}
574	J->base = J->slot + J->baseslot;
575	J->maxslot = snap->nslots - J->baseslot;
576	lj_snap_add(J);
577	if (pass23) / Need explicit GC step _after_ initial snapshot. /
578	emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), `0`, `0`);
579	}
580
581	/ -- Snapshot restore ---------------------------------------------------- /
582
583	static void snap_unsink(jit_State J, GCtrace T, ExitState *ex,
584	SnapNo snapno, BloomFilter rfilt,
585	IRIns ir, TValue o);
586
587	/ Restore a value from the trace exit state. /
588	static void snap_restoreval(jit_State J, GCtrace T, ExitState *ex,
589	SnapNo snapno, BloomFilter rfilt,
590	IRRef ref, TValue *o)
591	{
592	IRIns *ir = &T->ir[ref];
593	IRType1 t = ir->t;
594	RegSP rs = ir->prev;
595	if (irref_isk(ref)) { / Restore constant slot. /
596	lj_ir_kvalue(J->L, o, ir);
597	return;
598	}
599	if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
600	rs = snap_renameref(T, snapno, ref, rs);
601	if (ra_hasspill(regsp_spill(rs))) { / Restore from spill slot. /
602	int32_t *sps = &ex->spill[regsp_spill(rs)];
603	if (irt_isinteger(t)) {
604	setintV(o, *sps);
605	#if !LJ_SOFTFP
606	} else if (irt_isnum(t)) {
607	o->u64 = (uint64_t )sps;
608	#endif
609	} else if (LJ_64 && irt_islightud(t)) {
610	/ 64 bit lightuserdata which may escape already has the tag bits. /
611	o->u64 = (uint64_t )sps;
612	} else {
613	lua_assert(!irt_ispri(t)); / PRI refs never have a spill slot. /
614	setgcrefi(o->gcr, *sps);
615	setitype(o, irt_toitype(t));
616	}
617	} else { / Restore from register. /
618	Reg r = regsp_reg(rs);
619	if (ra_noreg(r)) {
620	lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
621	snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
622	if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
623	return;
624	} else if (irt_isinteger(t)) {
625	setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
626	#if !LJ_SOFTFP
627	} else if (irt_isnum(t)) {
628	setnumV(o, ex->fpr[r-RID_MIN_FPR]);
629	#endif
630	} else if (LJ_64 && irt_islightud(t)) {
631	/ 64 bit lightuserdata which may escape already has the tag bits. /
632	o->u64 = ex->gpr[r-RID_MIN_GPR];
633	} else {
634	if (!irt_ispri(t))
635	setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
636	setitype(o, irt_toitype(t));
637	}
638	}
639	}
640
641	#if LJ_HASFFI
642	/ Restore raw data from the trace exit state. /
643	static void snap_restoredata(GCtrace T, ExitState ex,
644	SnapNo snapno, BloomFilter rfilt,
645	IRRef ref, void *dst, CTSize sz)
646	{
647	IRIns *ir = &T->ir[ref];
648	RegSP rs = ir->prev;
649	int32_t *src;
650	uint64_t tmp;
651	if (irref_isk(ref)) {
652	if (ir->o == IR_KNUM \|\| ir->o == IR_KINT64) {
653	src = mref(ir->ptr, int32_t);
654	} else if (sz == `8`) {
655	tmp = (uint64_t)(uint32_t)ir->i;
656	src = (int32_t *)&tmp;
657	} else {
658	src = &ir->i;
659	}
660	} else {
661	if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
662	rs = snap_renameref(T, snapno, ref, rs);
663	if (ra_hasspill(regsp_spill(rs))) {
664	src = &ex->spill[regsp_spill(rs)];
665	if (sz == `8` && !irt_is64(ir->t)) {
666	tmp = (uint64_t)(uint32_t)*src;
667	src = (int32_t *)&tmp;
668	}
669	} else {
670	Reg r = regsp_reg(rs);
671	if (ra_noreg(r)) {
672	/ Note: this assumes CNEWI is never used for SOFTFP split numbers. /
673	lua_assert(sz == `8` && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
674	snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, `4`);
675	(lua_Number )dst = (lua_Number)(int32_t )dst;
676	return;
677	}
678	src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
679	#if !LJ_SOFTFP
680	if (r >= RID_MAX_GPR) {
681	src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
682	#if LJ_TARGET_PPC
683	if (sz == `4`) { / PPC FPRs are always doubles. /
684	(float* )dst = (float)(double *)src;
685	return;
686	}
687	#else
688	if (LJ_BE && sz == `4`) src++;
689	#endif
690	}
691	#endif
692	}
693	}
694	lua_assert(sz == `1` \|\| sz == `2` \|\| sz == `4` \|\| sz == `8`);
695	if (sz == `4`) (int32_t )dst = *src;
696	else if (sz == `8`) (int64_t )dst = (int64_t )src;
697	else if (sz == `1`) (int8_t )dst = (int8_t)*src;
698	else (int16_t )dst = (int16_t)*src;
699	}
700	#endif
701
702	/ Unsink allocation from the trace exit state. Unsink sunk stores. /
703	static void snap_unsink(jit_State J, GCtrace T, ExitState *ex,
704	SnapNo snapno, BloomFilter rfilt,
705	IRIns ir, TValue o)
706	{
707	lua_assert(ir->o == IR_TNEW \|\| ir->o == IR_TDUP \|\|
708	ir->o == IR_CNEW \|\| ir->o == IR_CNEWI);
709	#if LJ_HASFFI
710	if (ir->o == IR_CNEW \|\| ir->o == IR_CNEWI) {
711	CTState *cts = ctype_cts(J->L);
712	CTypeID id = (CTypeID)T->ir[ir->op1].i;
713	CTSize sz = lj_ctype_size(cts, id);
714	GCcdata *cd = lj_cdata_new(cts, id, sz);
715	setcdataV(J->L, o, cd);
716	if (ir->o == IR_CNEWI) {
717	uint8_t p = (uint8_t )cdataptr(cd);
718	lua_assert(sz == `4` \|\| sz == `8`);
719	if (LJ_32 && sz == `8` && ir+`1` < T->ir + T->nins && (ir+`1`)->o == IR_HIOP) {
720	snap_restoredata(T, ex, snapno, rfilt, (ir+`1`)->op2, LJ_LE?p+`4`:p, `4`);
721	if (LJ_BE) p += `4`;
722	sz = `4`;
723	}
724	snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
725	} else {
726	IRIns irs, irlast = &T->ir[T->snap[snapno].ref];
727	for (irs = ir+`1`; irs < irlast; irs++)
728	if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
729	IRIns *iro = &T->ir[T->ir[irs->op1].op2];
730	uint8_t p = (uint8_t )cd;
731	CTSize szs;
732	lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
733	lua_assert(iro->o == IR_KINT \|\| iro->o == IR_KINT64);
734	if (irt_is64(irs->t)) szs = `8`;
735	else if (irt_isi8(irs->t) \|\| irt_isu8(irs->t)) szs = `1`;
736	else if (irt_isi16(irs->t) \|\| irt_isu16(irs->t)) szs = `2`;
737	else szs = `4`;
738	if (LJ_64 && iro->o == IR_KINT64)
739	p += (int64_t)ir_k64(iro)->u64;
740	else
741	p += iro->i;
742	lua_assert(p >= (uint8_t *)cdataptr(cd) &&
743	p + szs <= (uint8_t *)cdataptr(cd) + sz);
744	if (LJ_32 && irs+`1` < T->ir + T->nins && (irs+`1`)->o == IR_HIOP) {
745	lua_assert(szs == `4`);
746	snap_restoredata(T, ex, snapno, rfilt, (irs+`1`)->op2, LJ_LE?p+`4`:p,`4`);
747	if (LJ_BE) p += `4`;
748	}
749	snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
750	}
751	}
752	} else
753	#endif
754	{
755	IRIns irs, irlast;
756	GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
757	lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
758	settabV(J->L, o, t);
759	irlast = &T->ir[T->snap[snapno].ref];
760	for (irs = ir+`1`; irs < irlast; irs++)
761	if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
762	IRIns *irk = &T->ir[irs->op1];
763	TValue tmp, *val;
764	lua_assert(irs->o == IR_ASTORE \|\| irs->o == IR_HSTORE \|\|
765	irs->o == IR_FSTORE);
766	if (irk->o == IR_FREF) {
767	lua_assert(irk->op2 == IRFL_TAB_META);
768	snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
769	/ NOBARRIER: The table is new (marked white). /
770	setgcref(t->metatable, obj2gco(tabV(&tmp)));
771	} else {
772	irk = &T->ir[irk->op2];
773	if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
774	lj_ir_kvalue(J->L, &tmp, irk);
775	val = lj_tab_set(J->L, t, &tmp);
776	/ NOBARRIER: The table is new (marked white). /
777	snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
778	if (LJ_SOFTFP && irs+`1` < T->ir + T->nins && (irs+`1`)->o == IR_HIOP) {
779	snap_restoreval(J, T, ex, snapno, rfilt, (irs+`1`)->op2, &tmp);
780	val->u32.hi = tmp.u32.lo;
781	}
782	}
783	}
784	}
785	}
786
787	/ Restore interpreter state from exit state with the help of a snapshot. /
788	const BCIns lj_snap_restore(jit_State J, void *exptr)
789	{
790	ExitState ex = (ExitState )exptr;
791	SnapNo snapno = J->exitno; / For now, snapno == exitno. /
792	GCtrace *T = traceref(J, J->parent);
793	SnapShot *snap = &T->snap[snapno];
794	MSize n, nent = snap->nent;
795	SnapEntry *map = &T->snapmap[snap->mapofs];
796	SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-`1`];
797	int32_t ftsz0;
798	TValue *frame;
799	BloomFilter rfilt = snap_renamefilter(T, snapno);
800	const BCIns *pc = snap_pc(map[nent]);
801	lua_State *L = J->L;
802
803	/ Set interpreter PC to the next PC to get correct error messages. /
804	setcframe_pc(cframe_raw(L->cframe), pc+`1`);
805
806	/ Make sure the stack is big enough for the slots from the snapshot. /
807	if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
808	L->top = curr_topL(L);
809	lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
810	}
811
812	/ Fill stack slots with data from the registers and spill slots. /
813	frame = L->base-`1`;
814	ftsz0 = frame_ftsz(frame); / Preserve link to previous frame in slot #0. /
815	for (n = `0`; n < nent; n++) {
816	SnapEntry sn = map[n];
817	if (!(sn & SNAP_NORESTORE)) {
818	TValue *o = &frame[snap_slot(sn)];
819	IRRef ref = snap_ref(sn);
820	IRIns *ir = &T->ir[ref];
821	if (ir->r == RID_SUNK) {
822	MSize j;
823	for (j = `0`; j < n; j++)
824	if (snap_ref(map[j]) == ref) { / De-duplicate sunk allocations. /
825	copyTV(L, o, &frame[snap_slot(map[j])]);
826	goto dupslot;
827	}
828	snap_unsink(J, T, ex, snapno, rfilt, ir, o);
829	dupslot:
830	continue;
831	}
832	snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
833	if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
834	TValue tmp;
835	snap_restoreval(J, T, ex, snapno, rfilt, ref+`1`, &tmp);
836	o->u32.hi = tmp.u32.lo;
837	} else if ((sn & (SNAP_CONT\|SNAP_FRAME))) {
838	/ Overwrite tag with frame link. /
839	o->fr.tp.ftsz = snap_slot(sn) != `0` ? (int32_t)*flinks-- : ftsz0;
840	L->base = o+`1`;
841	}
842	}
843	}
844	lua_assert(map + nent == flinks);
845
846	/ Compute current stack top. /
847	switch (bc_op(*pc)) {
848	default:
849	if (bc_op(*pc) < BC_FUNCF) {
850	L->top = curr_topL(L);
851	break;
852	}
853	/ fallthrough /
854	case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
855	L->top = frame + snap->nslots;
856	break;
857	}
858	return pc;
859	}
860
861	#undef IR
862	#undef emitir_raw
863	#undef emitir
864
865	#endif
866

Browse the source code of Aerospike/modules/luajit/src/lj_snap.c