1/*
2** Snapshot handling.
3** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_snap_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_gc.h"
14#include "lj_tab.h"
15#include "lj_state.h"
16#include "lj_frame.h"
17#include "lj_bc.h"
18#include "lj_ir.h"
19#include "lj_jit.h"
20#include "lj_iropt.h"
21#include "lj_trace.h"
22#include "lj_snap.h"
23#include "lj_target.h"
24#if LJ_HASFFI
25#include "lj_ctype.h"
26#include "lj_cdata.h"
27#endif
28
29/* Some local macros to save typing. Undef'd at the end. */
30#define IR(ref) (&J->cur.ir[(ref)])
31
32/* Pass IR on to next optimization in chain (FOLD). */
33#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
34
35/* Emit raw IR without passing through optimizations. */
36#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
37
38/* -- Snapshot buffer allocation ------------------------------------------ */
39
40/* Grow snapshot buffer. */
41void lj_snap_grow_buf_(jit_State *J, MSize need)
42{
43 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
44 if (need > maxsnap)
45 lj_trace_err(J, LJ_TRERR_SNAPOV);
46 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
47 J->cur.snap = J->snapbuf;
48}
49
50/* Grow snapshot map buffer. */
51void lj_snap_grow_map_(jit_State *J, MSize need)
52{
53 if (need < 2*J->sizesnapmap)
54 need = 2*J->sizesnapmap;
55 else if (need < 64)
56 need = 64;
57 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
58 J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
59 J->cur.snapmap = J->snapmapbuf;
60 J->sizesnapmap = need;
61}
62
63/* -- Snapshot generation ------------------------------------------------- */
64
65/* Add all modified slots to the snapshot. */
66static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
67{
68 IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */
69 BCReg s;
70 MSize n = 0;
71 for (s = 0; s < nslots; s++) {
72 TRef tr = J->slot[s];
73 IRRef ref = tref_ref(tr);
74 if (ref) {
75 SnapEntry sn = SNAP_TR(s, tr);
76 IRIns *ir = IR(ref);
77 if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
78 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
79 /* No need to snapshot unmodified non-inherited slots. */
80 if (!(ir->op2 & IRSLOAD_INHERIT))
81 continue;
82 /* No need to restore readonly slots and unmodified non-parent slots. */
83 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
84 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
85 sn |= SNAP_NORESTORE;
86 }
87 if (LJ_SOFTFP && irt_isnum(ir->t))
88 sn |= SNAP_SOFTFPNUM;
89 map[n++] = sn;
90 }
91 }
92 return n;
93}
94
95/* Add frame links at the end of the snapshot. */
96static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
97{
98 cTValue *frame = J->L->base - 1;
99 cTValue *lim = J->L->base - J->baseslot;
100 cTValue *ftop = frame + funcproto(frame_func(frame))->framesize;
101 MSize f = 0;
102 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
103 while (frame > lim) { /* Backwards traversal of all frames above base. */
104 if (frame_islua(frame)) {
105 map[f++] = SNAP_MKPC(frame_pc(frame));
106 frame = frame_prevl(frame);
107 if (frame + funcproto(frame_func(frame))->framesize > ftop)
108 ftop = frame + funcproto(frame_func(frame))->framesize;
109 } else if (frame_iscont(frame)) {
110 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
111 map[f++] = SNAP_MKPC(frame_contpc(frame));
112 frame = frame_prevd(frame);
113 } else {
114 lua_assert(!frame_isc(frame));
115 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
116 frame = frame_prevd(frame);
117 }
118 }
119 lua_assert(f == (MSize)(1 + J->framedepth));
120 return (BCReg)(ftop - lim);
121}
122
123/* Take a snapshot of the current stack. */
124static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
125{
126 BCReg nslots = J->baseslot + J->maxslot;
127 MSize nent;
128 SnapEntry *p;
129 /* Conservative estimate. */
130 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
131 p = &J->cur.snapmap[nsnapmap];
132 nent = snapshot_slots(J, p, nslots);
133 snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
134 snap->mapofs = (uint16_t)nsnapmap;
135 snap->ref = (IRRef1)J->cur.nins;
136 snap->nent = (uint8_t)nent;
137 snap->nslots = (uint8_t)nslots;
138 snap->count = 0;
139 J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth);
140}
141
142/* Add or merge a snapshot. */
143void lj_snap_add(jit_State *J)
144{
145 MSize nsnap = J->cur.nsnap;
146 MSize nsnapmap = J->cur.nsnapmap;
147 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
148 if (J->mergesnap ? !irt_isguard(J->guardemit) :
149 (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
150 if (nsnap == 1) { /* But preserve snap #0 PC. */
151 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
152 goto nomerge;
153 }
154 nsnapmap = J->cur.snap[--nsnap].mapofs;
155 } else {
156 nomerge:
157 lj_snap_grow_buf(J, nsnap+1);
158 J->cur.nsnap = (uint16_t)(nsnap+1);
159 }
160 J->mergesnap = 0;
161 J->guardemit.irt = 0;
162 snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
163}
164
165/* -- Snapshot modification ----------------------------------------------- */
166
167#define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
168
169/* Find unused slots with reaching-definitions bytecode data-flow analysis. */
170static BCReg snap_usedef(jit_State *J, uint8_t *udf,
171 const BCIns *pc, BCReg maxslot)
172{
173 BCReg s;
174 GCobj *o;
175
176 if (maxslot == 0) return 0;
177#ifdef LUAJIT_USE_VALGRIND
178 /* Avoid errors for harmless reads beyond maxslot. */
179 memset(udf, 1, SNAP_USEDEF_SLOTS);
180#else
181 memset(udf, 1, maxslot);
182#endif
183
184 /* Treat open upvalues as used. */
185 o = gcref(J->L->openupval);
186 while (o) {
187 if (uvval(gco2uv(o)) < J->L->base) break;
188 udf[uvval(gco2uv(o)) - J->L->base] = 0;
189 o = gcref(o->gch.nextgc);
190 }
191
192#define USE_SLOT(s) udf[(s)] &= ~1
193#define DEF_SLOT(s) udf[(s)] *= 3
194
195 /* Scan through following bytecode and check for uses/defs. */
196 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
197 for (;;) {
198 BCIns ins = *pc++;
199 BCOp op = bc_op(ins);
200 switch (bcmode_b(op)) {
201 case BCMvar: USE_SLOT(bc_b(ins)); break;
202 default: break;
203 }
204 switch (bcmode_c(op)) {
205 case BCMvar: USE_SLOT(bc_c(ins)); break;
206 case BCMrbase:
207 lua_assert(op == BC_CAT);
208 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
209 for (; s < maxslot; s++) DEF_SLOT(s);
210 break;
211 case BCMjump:
212 handle_jump: {
213 BCReg minslot = bc_a(ins);
214 if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
215 else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
216 else if (op == BC_UCLO) { pc += bc_j(ins); break; }
217 for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
218 return minslot < maxslot ? minslot : maxslot;
219 }
220 case BCMlit:
221 if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
222 goto handle_jump;
223 } else if (bc_isret(op)) {
224 BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
225 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
226 for (; s < top; s++) USE_SLOT(s);
227 for (; s < maxslot; s++) DEF_SLOT(s);
228 return 0;
229 }
230 break;
231 case BCMfunc: return maxslot; /* NYI: will abort, anyway. */
232 default: break;
233 }
234 switch (bcmode_a(op)) {
235 case BCMvar: USE_SLOT(bc_a(ins)); break;
236 case BCMdst:
237 if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
238 break;
239 case BCMbase:
240 if (op >= BC_CALLM && op <= BC_VARG) {
241 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
242 maxslot : (bc_a(ins) + bc_c(ins));
243 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
244 for (; s < top; s++) USE_SLOT(s);
245 for (; s < maxslot; s++) DEF_SLOT(s);
246 if (op == BC_CALLT || op == BC_CALLMT) {
247 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
248 return 0;
249 }
250 } else if (op == BC_KNIL) {
251 for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
252 } else if (op == BC_TSETM) {
253 for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
254 }
255 break;
256 default: break;
257 }
258 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
259 }
260
261#undef USE_SLOT
262#undef DEF_SLOT
263
264 return 0; /* unreachable */
265}
266
267/* Purge dead slots before the next snapshot. */
268void lj_snap_purge(jit_State *J)
269{
270 uint8_t udf[SNAP_USEDEF_SLOTS];
271 BCReg maxslot = J->maxslot;
272 BCReg s = snap_usedef(J, udf, J->pc, maxslot);
273 for (; s < maxslot; s++)
274 if (udf[s] != 0)
275 J->base[s] = 0; /* Purge dead slots. */
276}
277
278/* Shrink last snapshot. */
279void lj_snap_shrink(jit_State *J)
280{
281 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
282 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
283 MSize n, m, nlim, nent = snap->nent;
284 uint8_t udf[SNAP_USEDEF_SLOTS];
285 BCReg maxslot = J->maxslot;
286 BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
287 BCReg baseslot = J->baseslot;
288 maxslot += baseslot;
289 minslot += baseslot;
290 snap->nslots = (uint8_t)maxslot;
291 for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */
292 BCReg s = snap_slot(map[n]);
293 if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
294 map[m++] = map[n]; /* Only copy used slots. */
295 }
296 snap->nent = (uint8_t)m;
297 nlim = J->cur.nsnapmap - snap->mapofs - 1;
298 while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */
299 J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */
300}
301
302/* -- Snapshot access ----------------------------------------------------- */
303
304/* Initialize a Bloom Filter with all renamed refs.
305** There are very few renames (often none), so the filter has
306** very few bits set. This makes it suitable for negative filtering.
307*/
308static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
309{
310 BloomFilter rfilt = 0;
311 IRIns *ir;
312 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
313 if (ir->op2 <= lim)
314 bloomset(rfilt, ir->op1);
315 return rfilt;
316}
317
318/* Process matching renames to find the original RegSP. */
319static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
320{
321 IRIns *ir;
322 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
323 if (ir->op1 == ref && ir->op2 <= lim)
324 rs = ir->prev;
325 return rs;
326}
327
328/* Copy RegSP from parent snapshot to the parent links of the IR. */
329IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
330{
331 SnapShot *snap = &T->snap[snapno];
332 SnapEntry *map = &T->snapmap[snap->mapofs];
333 BloomFilter rfilt = snap_renamefilter(T, snapno);
334 MSize n = 0;
335 IRRef ref = 0;
336 for ( ; ; ir++) {
337 uint32_t rs;
338 if (ir->o == IR_SLOAD) {
339 if (!(ir->op2 & IRSLOAD_PARENT)) break;
340 for ( ; ; n++) {
341 lua_assert(n < snap->nent);
342 if (snap_slot(map[n]) == ir->op1) {
343 ref = snap_ref(map[n++]);
344 break;
345 }
346 }
347 } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
348 ref++;
349 } else if (ir->o == IR_PVAL) {
350 ref = ir->op1 + REF_BIAS;
351 } else {
352 break;
353 }
354 rs = T->ir[ref].prev;
355 if (bloomtest(rfilt, ref))
356 rs = snap_renameref(T, snapno, ref, rs);
357 ir->prev = (uint16_t)rs;
358 lua_assert(regsp_used(rs));
359 }
360 return ir;
361}
362
363/* -- Snapshot replay ----------------------------------------------------- */
364
365/* Replay constant from parent trace. */
366static TRef snap_replay_const(jit_State *J, IRIns *ir)
367{
368 /* Only have to deal with constants that can occur in stack slots. */
369 switch ((IROp)ir->o) {
370 case IR_KPRI: return TREF_PRI(irt_type(ir->t));
371 case IR_KINT: return lj_ir_kint(J, ir->i);
372 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
373 case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir));
374 case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir));
375 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
376 default: lua_assert(0); return TREF_NIL; break;
377 }
378}
379
380/* De-duplicate parent reference. */
381static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
382{
383 MSize j;
384 for (j = 0; j < nmax; j++)
385 if (snap_ref(map[j]) == ref)
386 return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
387 return 0;
388}
389
390/* Emit parent reference with de-duplication. */
391static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
392 BloomFilter seen, IRRef ref)
393{
394 IRIns *ir = &T->ir[ref];
395 TRef tr;
396 if (irref_isk(ref))
397 tr = snap_replay_const(J, ir);
398 else if (!regsp_used(ir->prev))
399 tr = 0;
400 else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
401 tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
402 return tr;
403}
404
405/* Check whether a sunk store corresponds to an allocation. Slow path. */
406static int snap_sunk_store2(jit_State *J, IRIns *ira, IRIns *irs)
407{
408 if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
409 irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
410 IRIns *irk = IR(irs->op1);
411 if (irk->o == IR_AREF || irk->o == IR_HREFK)
412 irk = IR(irk->op1);
413 return (IR(irk->op1) == ira);
414 }
415 return 0;
416}
417
418/* Check whether a sunk store corresponds to an allocation. Fast path. */
419static LJ_AINLINE int snap_sunk_store(jit_State *J, IRIns *ira, IRIns *irs)
420{
421 if (irs->s != 255)
422 return (ira + irs->s == irs); /* Fast check. */
423 return snap_sunk_store2(J, ira, irs);
424}
425
426/* Replay snapshot state to setup side trace. */
427void lj_snap_replay(jit_State *J, GCtrace *T)
428{
429 SnapShot *snap = &T->snap[J->exitno];
430 SnapEntry *map = &T->snapmap[snap->mapofs];
431 MSize n, nent = snap->nent;
432 BloomFilter seen = 0;
433 int pass23 = 0;
434 J->framedepth = 0;
435 /* Emit IR for slots inherited from parent snapshot. */
436 for (n = 0; n < nent; n++) {
437 SnapEntry sn = map[n];
438 BCReg s = snap_slot(sn);
439 IRRef ref = snap_ref(sn);
440 IRIns *ir = &T->ir[ref];
441 TRef tr;
442 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
443 if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
444 goto setslot;
445 bloomset(seen, ref);
446 if (irref_isk(ref)) {
447 tr = snap_replay_const(J, ir);
448 } else if (!regsp_used(ir->prev)) {
449 pass23 = 1;
450 lua_assert(s != 0);
451 tr = s;
452 } else {
453 IRType t = irt_type(ir->t);
454 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
455 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
456 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
457 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
458 }
459 setslot:
460 J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */
461 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s);
462 if ((sn & SNAP_FRAME))
463 J->baseslot = s+1;
464 }
465 if (pass23) {
466 IRIns *irlast = &T->ir[snap->ref];
467 pass23 = 0;
468 /* Emit dependent PVALs. */
469 for (n = 0; n < nent; n++) {
470 SnapEntry sn = map[n];
471 IRRef refp = snap_ref(sn);
472 IRIns *ir = &T->ir[refp];
473 if (regsp_reg(ir->r) == RID_SUNK) {
474 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
475 pass23 = 1;
476 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
477 ir->o == IR_CNEW || ir->o == IR_CNEWI);
478 if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
479 if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
480 if (LJ_HASFFI && ir->o == IR_CNEWI) {
481 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
482 snap_pref(J, T, map, nent, seen, (ir+1)->op2);
483 } else {
484 IRIns *irs;
485 for (irs = ir+1; irs < irlast; irs++)
486 if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
487 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
488 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
489 else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
490 irs+1 < irlast && (irs+1)->o == IR_HIOP)
491 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
492 }
493 }
494 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
495 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
496 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
497 }
498 }
499 /* Replay sunk instructions. */
500 for (n = 0; pass23 && n < nent; n++) {
501 SnapEntry sn = map[n];
502 IRRef refp = snap_ref(sn);
503 IRIns *ir = &T->ir[refp];
504 if (regsp_reg(ir->r) == RID_SUNK) {
505 TRef op1, op2;
506 if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
507 J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
508 continue;
509 }
510 op1 = ir->op1;
511 if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
512 op2 = ir->op2;
513 if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
514 if (LJ_HASFFI && ir->o == IR_CNEWI) {
515 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
516 lj_needsplit(J); /* Emit joining HIOP. */
517 op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
518 snap_pref(J, T, map, nent, seen, (ir+1)->op2));
519 }
520 J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2);
521 } else {
522 IRIns *irs;
523 TRef tr = emitir(ir->ot, op1, op2);
524 J->slot[snap_slot(sn)] = tr;
525 for (irs = ir+1; irs < irlast; irs++)
526 if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
527 IRIns *irr = &T->ir[irs->op1];
528 TRef val, key = irr->op2, tmp = tr;
529 if (irr->o != IR_FREF) {
530 IRIns *irk = &T->ir[key];
531 if (irr->o == IR_HREFK)
532 key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
533 irk->op2);
534 else
535 key = snap_replay_const(J, irk);
536 if (irr->o == IR_HREFK || irr->o == IR_AREF) {
537 IRIns *irf = &T->ir[irr->op1];
538 tmp = emitir(irf->ot, tmp, irf->op2);
539 }
540 }
541 tmp = emitir(irr->ot, tmp, key);
542 val = snap_pref(J, T, map, nent, seen, irs->op2);
543 if (val == 0) {
544 IRIns *irc = &T->ir[irs->op2];
545 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
546 val = snap_pref(J, T, map, nent, seen, irc->op1);
547 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
548 } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
549 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
550 IRType t = IRT_I64;
551 if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
552 t = IRT_NUM;
553 lj_needsplit(J);
554 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
555 uint64_t k = (uint32_t)T->ir[irs->op2].i +
556 ((uint64_t)T->ir[(irs+1)->op2].i << 32);
557 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
558 lj_ir_k64_find(J, k));
559 } else {
560 val = emitir_raw(IRT(IR_HIOP, t), val,
561 snap_pref(J, T, map, nent, seen, (irs+1)->op2));
562 }
563 tmp = emitir(IRT(irs->o, t), tmp, val);
564 continue;
565 }
566 tmp = emitir(irs->ot, tmp, val);
567 } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
568 emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
569 }
570 }
571 }
572 }
573 }
574 J->base = J->slot + J->baseslot;
575 J->maxslot = snap->nslots - J->baseslot;
576 lj_snap_add(J);
577 if (pass23) /* Need explicit GC step _after_ initial snapshot. */
578 emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
579}
580
581/* -- Snapshot restore ---------------------------------------------------- */
582
583static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
584 SnapNo snapno, BloomFilter rfilt,
585 IRIns *ir, TValue *o);
586
587/* Restore a value from the trace exit state. */
588static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
589 SnapNo snapno, BloomFilter rfilt,
590 IRRef ref, TValue *o)
591{
592 IRIns *ir = &T->ir[ref];
593 IRType1 t = ir->t;
594 RegSP rs = ir->prev;
595 if (irref_isk(ref)) { /* Restore constant slot. */
596 lj_ir_kvalue(J->L, o, ir);
597 return;
598 }
599 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
600 rs = snap_renameref(T, snapno, ref, rs);
601 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
602 int32_t *sps = &ex->spill[regsp_spill(rs)];
603 if (irt_isinteger(t)) {
604 setintV(o, *sps);
605#if !LJ_SOFTFP
606 } else if (irt_isnum(t)) {
607 o->u64 = *(uint64_t *)sps;
608#endif
609 } else if (LJ_64 && irt_islightud(t)) {
610 /* 64 bit lightuserdata which may escape already has the tag bits. */
611 o->u64 = *(uint64_t *)sps;
612 } else {
613 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
614 setgcrefi(o->gcr, *sps);
615 setitype(o, irt_toitype(t));
616 }
617 } else { /* Restore from register. */
618 Reg r = regsp_reg(rs);
619 if (ra_noreg(r)) {
620 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
621 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
622 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
623 return;
624 } else if (irt_isinteger(t)) {
625 setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
626#if !LJ_SOFTFP
627 } else if (irt_isnum(t)) {
628 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
629#endif
630 } else if (LJ_64 && irt_islightud(t)) {
631 /* 64 bit lightuserdata which may escape already has the tag bits. */
632 o->u64 = ex->gpr[r-RID_MIN_GPR];
633 } else {
634 if (!irt_ispri(t))
635 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
636 setitype(o, irt_toitype(t));
637 }
638 }
639}
640
641#if LJ_HASFFI
642/* Restore raw data from the trace exit state. */
643static void snap_restoredata(GCtrace *T, ExitState *ex,
644 SnapNo snapno, BloomFilter rfilt,
645 IRRef ref, void *dst, CTSize sz)
646{
647 IRIns *ir = &T->ir[ref];
648 RegSP rs = ir->prev;
649 int32_t *src;
650 uint64_t tmp;
651 if (irref_isk(ref)) {
652 if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
653 src = mref(ir->ptr, int32_t);
654 } else if (sz == 8) {
655 tmp = (uint64_t)(uint32_t)ir->i;
656 src = (int32_t *)&tmp;
657 } else {
658 src = &ir->i;
659 }
660 } else {
661 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
662 rs = snap_renameref(T, snapno, ref, rs);
663 if (ra_hasspill(regsp_spill(rs))) {
664 src = &ex->spill[regsp_spill(rs)];
665 if (sz == 8 && !irt_is64(ir->t)) {
666 tmp = (uint64_t)(uint32_t)*src;
667 src = (int32_t *)&tmp;
668 }
669 } else {
670 Reg r = regsp_reg(rs);
671 if (ra_noreg(r)) {
672 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
673 lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
674 snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
675 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
676 return;
677 }
678 src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
679#if !LJ_SOFTFP
680 if (r >= RID_MAX_GPR) {
681 src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
682#if LJ_TARGET_PPC
683 if (sz == 4) { /* PPC FPRs are always doubles. */
684 *(float *)dst = (float)*(double *)src;
685 return;
686 }
687#else
688 if (LJ_BE && sz == 4) src++;
689#endif
690 }
691#endif
692 }
693 }
694 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
695 if (sz == 4) *(int32_t *)dst = *src;
696 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
697 else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
698 else *(int16_t *)dst = (int16_t)*src;
699}
700#endif
701
702/* Unsink allocation from the trace exit state. Unsink sunk stores. */
703static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
704 SnapNo snapno, BloomFilter rfilt,
705 IRIns *ir, TValue *o)
706{
707 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
708 ir->o == IR_CNEW || ir->o == IR_CNEWI);
709#if LJ_HASFFI
710 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
711 CTState *cts = ctype_cts(J->L);
712 CTypeID id = (CTypeID)T->ir[ir->op1].i;
713 CTSize sz = lj_ctype_size(cts, id);
714 GCcdata *cd = lj_cdata_new(cts, id, sz);
715 setcdataV(J->L, o, cd);
716 if (ir->o == IR_CNEWI) {
717 uint8_t *p = (uint8_t *)cdataptr(cd);
718 lua_assert(sz == 4 || sz == 8);
719 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
720 snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
721 if (LJ_BE) p += 4;
722 sz = 4;
723 }
724 snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
725 } else {
726 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
727 for (irs = ir+1; irs < irlast; irs++)
728 if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
729 IRIns *iro = &T->ir[T->ir[irs->op1].op2];
730 uint8_t *p = (uint8_t *)cd;
731 CTSize szs;
732 lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
733 lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
734 if (irt_is64(irs->t)) szs = 8;
735 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
736 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
737 else szs = 4;
738 if (LJ_64 && iro->o == IR_KINT64)
739 p += (int64_t)ir_k64(iro)->u64;
740 else
741 p += iro->i;
742 lua_assert(p >= (uint8_t *)cdataptr(cd) &&
743 p + szs <= (uint8_t *)cdataptr(cd) + sz);
744 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
745 lua_assert(szs == 4);
746 snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
747 if (LJ_BE) p += 4;
748 }
749 snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
750 }
751 }
752 } else
753#endif
754 {
755 IRIns *irs, *irlast;
756 GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
757 lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
758 settabV(J->L, o, t);
759 irlast = &T->ir[T->snap[snapno].ref];
760 for (irs = ir+1; irs < irlast; irs++)
761 if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
762 IRIns *irk = &T->ir[irs->op1];
763 TValue tmp, *val;
764 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
765 irs->o == IR_FSTORE);
766 if (irk->o == IR_FREF) {
767 lua_assert(irk->op2 == IRFL_TAB_META);
768 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
769 /* NOBARRIER: The table is new (marked white). */
770 setgcref(t->metatable, obj2gco(tabV(&tmp)));
771 } else {
772 irk = &T->ir[irk->op2];
773 if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
774 lj_ir_kvalue(J->L, &tmp, irk);
775 val = lj_tab_set(J->L, t, &tmp);
776 /* NOBARRIER: The table is new (marked white). */
777 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
778 if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
779 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
780 val->u32.hi = tmp.u32.lo;
781 }
782 }
783 }
784 }
785}
786
787/* Restore interpreter state from exit state with the help of a snapshot. */
788const BCIns *lj_snap_restore(jit_State *J, void *exptr)
789{
790 ExitState *ex = (ExitState *)exptr;
791 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
792 GCtrace *T = traceref(J, J->parent);
793 SnapShot *snap = &T->snap[snapno];
794 MSize n, nent = snap->nent;
795 SnapEntry *map = &T->snapmap[snap->mapofs];
796 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
797 int32_t ftsz0;
798 TValue *frame;
799 BloomFilter rfilt = snap_renamefilter(T, snapno);
800 const BCIns *pc = snap_pc(map[nent]);
801 lua_State *L = J->L;
802
803 /* Set interpreter PC to the next PC to get correct error messages. */
804 setcframe_pc(cframe_raw(L->cframe), pc+1);
805
806 /* Make sure the stack is big enough for the slots from the snapshot. */
807 if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
808 L->top = curr_topL(L);
809 lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
810 }
811
812 /* Fill stack slots with data from the registers and spill slots. */
813 frame = L->base-1;
814 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
815 for (n = 0; n < nent; n++) {
816 SnapEntry sn = map[n];
817 if (!(sn & SNAP_NORESTORE)) {
818 TValue *o = &frame[snap_slot(sn)];
819 IRRef ref = snap_ref(sn);
820 IRIns *ir = &T->ir[ref];
821 if (ir->r == RID_SUNK) {
822 MSize j;
823 for (j = 0; j < n; j++)
824 if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */
825 copyTV(L, o, &frame[snap_slot(map[j])]);
826 goto dupslot;
827 }
828 snap_unsink(J, T, ex, snapno, rfilt, ir, o);
829 dupslot:
830 continue;
831 }
832 snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
833 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
834 TValue tmp;
835 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
836 o->u32.hi = tmp.u32.lo;
837 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
838 /* Overwrite tag with frame link. */
839 o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0;
840 L->base = o+1;
841 }
842 }
843 }
844 lua_assert(map + nent == flinks);
845
846 /* Compute current stack top. */
847 switch (bc_op(*pc)) {
848 default:
849 if (bc_op(*pc) < BC_FUNCF) {
850 L->top = curr_topL(L);
851 break;
852 }
853 /* fallthrough */
854 case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
855 L->top = frame + snap->nslots;
856 break;
857 }
858 return pc;
859}
860
861#undef IR
862#undef emitir_raw
863#undef emitir
864
865#endif
866