1 | /* |
2 | ** Trace recorder (bytecode -> SSA IR). |
3 | ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h |
4 | */ |
5 | |
6 | #define lj_record_c |
7 | #define LUA_CORE |
8 | |
9 | #include "lj_obj.h" |
10 | |
11 | #if LJ_HASJIT |
12 | |
13 | #include "lj_err.h" |
14 | #include "lj_str.h" |
15 | #include "lj_tab.h" |
16 | #include "lj_meta.h" |
17 | #include "lj_frame.h" |
18 | #if LJ_HASFFI |
19 | #include "lj_ctype.h" |
20 | #endif |
21 | #include "lj_bc.h" |
22 | #include "lj_ff.h" |
23 | #include "lj_ir.h" |
24 | #include "lj_jit.h" |
25 | #include "lj_ircall.h" |
26 | #include "lj_iropt.h" |
27 | #include "lj_trace.h" |
28 | #include "lj_record.h" |
29 | #include "lj_ffrecord.h" |
30 | #include "lj_snap.h" |
31 | #include "lj_dispatch.h" |
32 | #include "lj_vm.h" |
33 | |
34 | /* Some local macros to save typing. Undef'd at the end. */ |
35 | #define IR(ref) (&J->cur.ir[(ref)]) |
36 | |
37 | /* Pass IR on to next optimization in chain (FOLD). */ |
38 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) |
39 | |
40 | /* Emit raw IR without passing through optimizations. */ |
41 | #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) |
42 | |
43 | /* -- Sanity checks ------------------------------------------------------- */ |
44 | |
45 | #ifdef LUA_USE_ASSERT |
46 | /* Sanity check the whole IR -- sloooow. */ |
47 | static void rec_check_ir(jit_State *J) |
48 | { |
49 | IRRef i, nins = J->cur.nins, nk = J->cur.nk; |
50 | lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); |
51 | for (i = nins-1; i >= nk; i--) { |
52 | IRIns *ir = IR(i); |
53 | uint32_t mode = lj_ir_mode[ir->o]; |
54 | IRRef op1 = ir->op1; |
55 | IRRef op2 = ir->op2; |
56 | switch (irm_op1(mode)) { |
57 | case IRMnone: lua_assert(op1 == 0); break; |
58 | case IRMref: lua_assert(op1 >= nk); |
59 | lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; |
60 | case IRMlit: break; |
61 | case IRMcst: lua_assert(i < REF_BIAS); continue; |
62 | } |
63 | switch (irm_op2(mode)) { |
64 | case IRMnone: lua_assert(op2 == 0); break; |
65 | case IRMref: lua_assert(op2 >= nk); |
66 | lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; |
67 | case IRMlit: break; |
68 | case IRMcst: lua_assert(0); break; |
69 | } |
70 | if (ir->prev) { |
71 | lua_assert(ir->prev >= nk); |
72 | lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); |
73 | lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o); |
74 | } |
75 | } |
76 | } |
77 | |
78 | /* Compare stack slots and frames of the recorder and the VM. */ |
79 | static void rec_check_slots(jit_State *J) |
80 | { |
81 | BCReg s, nslots = J->baseslot + J->maxslot; |
82 | int32_t depth = 0; |
83 | cTValue *base = J->L->base - J->baseslot; |
84 | lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); |
85 | lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); |
86 | lua_assert(nslots < LJ_MAX_JSLOTS); |
87 | for (s = 0; s < nslots; s++) { |
88 | TRef tr = J->slot[s]; |
89 | if (tr) { |
90 | cTValue *tv = &base[s]; |
91 | IRRef ref = tref_ref(tr); |
92 | IRIns *ir; |
93 | lua_assert(ref >= J->cur.nk && ref < J->cur.nins); |
94 | ir = IR(ref); |
95 | lua_assert(irt_t(ir->t) == tref_t(tr)); |
96 | if (s == 0) { |
97 | lua_assert(tref_isfunc(tr)); |
98 | } else if ((tr & TREF_FRAME)) { |
99 | GCfunc *fn = gco2func(frame_gc(tv)); |
100 | BCReg delta = (BCReg)(tv - frame_prev(tv)); |
101 | lua_assert(tref_isfunc(tr)); |
102 | if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); |
103 | lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); |
104 | depth++; |
105 | } else if ((tr & TREF_CONT)) { |
106 | lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); |
107 | lua_assert((J->slot[s+1] & TREF_FRAME)); |
108 | depth++; |
109 | } else { |
110 | if (tvisnumber(tv)) |
111 | lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ |
112 | else |
113 | lua_assert(itype2irt(tv) == tref_type(tr)); |
114 | if (tref_isk(tr)) { /* Compare constants. */ |
115 | TValue tvk; |
116 | lj_ir_kvalue(J->L, &tvk, ir); |
117 | if (!(tvisnum(&tvk) && tvisnan(&tvk))) |
118 | lua_assert(lj_obj_equal(tv, &tvk)); |
119 | else |
120 | lua_assert(tvisnum(tv) && tvisnan(tv)); |
121 | } |
122 | } |
123 | } |
124 | } |
125 | lua_assert(J->framedepth == depth); |
126 | } |
127 | #endif |
128 | |
129 | /* -- Type handling and specialization ------------------------------------ */ |
130 | |
131 | /* Note: these functions return tagged references (TRef). */ |
132 | |
133 | /* Specialize a slot to a specific type. Note: slot can be negative! */ |
134 | static TRef sloadt(jit_State *J, int32_t slot, IRType t, int mode) |
135 | { |
136 | /* Caller may set IRT_GUARD in t. */ |
137 | TRef ref = emitir_raw(IRT(IR_SLOAD, t), (int32_t)J->baseslot+slot, mode); |
138 | J->base[slot] = ref; |
139 | return ref; |
140 | } |
141 | |
142 | /* Specialize a slot to the runtime type. Note: slot can be negative! */ |
143 | static TRef sload(jit_State *J, int32_t slot) |
144 | { |
145 | IRType t = itype2irt(&J->L->base[slot]); |
146 | TRef ref = emitir_raw(IRTG(IR_SLOAD, t), (int32_t)J->baseslot+slot, |
147 | IRSLOAD_TYPECHECK); |
148 | if (irtype_ispri(t)) ref = TREF_PRI(t); /* Canonicalize primitive refs. */ |
149 | J->base[slot] = ref; |
150 | return ref; |
151 | } |
152 | |
153 | /* Get TRef from slot. Load slot and specialize if not done already. */ |
154 | #define getslot(J, s) (J->base[(s)] ? J->base[(s)] : sload(J, (int32_t)(s))) |
155 | |
156 | /* Get TRef for current function. */ |
157 | static TRef getcurrf(jit_State *J) |
158 | { |
159 | if (J->base[-1]) |
160 | return J->base[-1]; |
161 | lua_assert(J->baseslot == 1); |
162 | return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); |
163 | } |
164 | |
165 | /* Compare for raw object equality. |
166 | ** Returns 0 if the objects are the same. |
167 | ** Returns 1 if they are different, but the same type. |
168 | ** Returns 2 for two different types. |
169 | ** Comparisons between primitives always return 1 -- no caller cares about it. |
170 | */ |
171 | int lj_record_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv) |
172 | { |
173 | int diff = !lj_obj_equal(av, bv); |
174 | if (!tref_isk2(a, b)) { /* Shortcut, also handles primitives. */ |
175 | IRType ta = tref_isinteger(a) ? IRT_INT : tref_type(a); |
176 | IRType tb = tref_isinteger(b) ? IRT_INT : tref_type(b); |
177 | if (ta != tb) { |
178 | /* Widen mixed number/int comparisons to number/number comparison. */ |
179 | if (ta == IRT_INT && tb == IRT_NUM) { |
180 | a = emitir(IRTN(IR_CONV), a, IRCONV_NUM_INT); |
181 | ta = IRT_NUM; |
182 | } else if (ta == IRT_NUM && tb == IRT_INT) { |
183 | b = emitir(IRTN(IR_CONV), b, IRCONV_NUM_INT); |
184 | } else { |
185 | return 2; /* Two different types are never equal. */ |
186 | } |
187 | } |
188 | emitir(IRTG(diff ? IR_NE : IR_EQ, ta), a, b); |
189 | } |
190 | return diff; |
191 | } |
192 | |
193 | /* Constify a value. Returns 0 for non-representable object types. */ |
194 | TRef lj_record_constify(jit_State *J, cTValue *o) |
195 | { |
196 | if (tvisgcv(o)) |
197 | return lj_ir_kgc(J, gcV(o), itype2irt(o)); |
198 | else if (tvisint(o)) |
199 | return lj_ir_kint(J, intV(o)); |
200 | else if (tvisnum(o)) |
201 | return lj_ir_knumint(J, numV(o)); |
202 | else if (tvisbool(o)) |
203 | return TREF_PRI(itype2irt(o)); |
204 | else |
205 | return 0; /* Can't represent lightuserdata (pointless). */ |
206 | } |
207 | |
208 | /* -- Record loop ops ----------------------------------------------------- */ |
209 | |
210 | /* Loop event. */ |
211 | typedef enum { |
212 | LOOPEV_LEAVE, /* Loop is left or not entered. */ |
213 | LOOPEV_ENTERLO, /* Loop is entered with a low iteration count left. */ |
214 | LOOPEV_ENTER /* Loop is entered. */ |
215 | } LoopEvent; |
216 | |
217 | /* Canonicalize slots: convert integers to numbers. */ |
218 | static void canonicalize_slots(jit_State *J) |
219 | { |
220 | BCReg s; |
221 | if (LJ_DUALNUM) return; |
222 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { |
223 | TRef tr = J->slot[s]; |
224 | if (tref_isinteger(tr)) { |
225 | IRIns *ir = IR(tref_ref(tr)); |
226 | if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) |
227 | J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); |
228 | } |
229 | } |
230 | } |
231 | |
232 | /* Stop recording. */ |
233 | static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) |
234 | { |
235 | lj_trace_end(J); |
236 | J->cur.linktype = (uint8_t)linktype; |
237 | J->cur.link = (uint16_t)lnk; |
238 | /* Looping back at the same stack level? */ |
239 | if (lnk == J->cur.traceno && J->framedepth + J->retdepth == 0) { |
240 | if ((J->flags & JIT_F_OPT_LOOP)) /* Shall we try to create a loop? */ |
241 | goto nocanon; /* Do not canonicalize or we lose the narrowing. */ |
242 | if (J->cur.root) /* Otherwise ensure we always link to the root trace. */ |
243 | J->cur.link = J->cur.root; |
244 | } |
245 | canonicalize_slots(J); |
246 | nocanon: |
247 | /* Note: all loop ops must set J->pc to the following instruction! */ |
248 | lj_snap_add(J); /* Add loop snapshot. */ |
249 | J->needsnap = 0; |
250 | J->mergesnap = 1; /* In case recording continues. */ |
251 | } |
252 | |
253 | /* Search bytecode backwards for a int/num constant slot initializer. */ |
254 | static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) |
255 | { |
256 | /* This algorithm is rather simplistic and assumes quite a bit about |
257 | ** how the bytecode is generated. It works fine for FORI initializers, |
258 | ** but it won't necessarily work in other cases (e.g. iterator arguments). |
259 | ** It doesn't do anything fancy, either (like backpropagating MOVs). |
260 | */ |
261 | const BCIns *pc, *startpc = proto_bc(J->pt); |
262 | for (pc = endpc-1; pc > startpc; pc--) { |
263 | BCIns ins = *pc; |
264 | BCOp op = bc_op(ins); |
265 | /* First try to find the last instruction that stores to this slot. */ |
266 | if (bcmode_a(op) == BCMbase && bc_a(ins) <= slot) { |
267 | return 0; /* Multiple results, e.g. from a CALL or KNIL. */ |
268 | } else if (bcmode_a(op) == BCMdst && bc_a(ins) == slot) { |
269 | if (op == BC_KSHORT || op == BC_KNUM) { /* Found const. initializer. */ |
270 | /* Now try to verify there's no forward jump across it. */ |
271 | const BCIns *kpc = pc; |
272 | for (; pc > startpc; pc--) |
273 | if (bc_op(*pc) == BC_JMP) { |
274 | const BCIns *target = pc+bc_j(*pc)+1; |
275 | if (target > kpc && target <= endpc) |
276 | return 0; /* Conditional assignment. */ |
277 | } |
278 | if (op == BC_KSHORT) { |
279 | int32_t k = (int32_t)(int16_t)bc_d(ins); |
280 | return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, (lua_Number)k); |
281 | } else { |
282 | cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); |
283 | if (t == IRT_INT) { |
284 | int32_t k = numberVint(tv); |
285 | if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */ |
286 | return lj_ir_kint(J, k); |
287 | return 0; /* Type mismatch. */ |
288 | } else { |
289 | return lj_ir_knum(J, numberVnum(tv)); |
290 | } |
291 | } |
292 | } |
293 | return 0; /* Non-constant initializer. */ |
294 | } |
295 | } |
296 | return 0; /* No assignment to this slot found? */ |
297 | } |
298 | |
299 | /* Load and optionally convert a FORI argument from a slot. */ |
300 | static TRef fori_load(jit_State *J, BCReg slot, IRType t, int mode) |
301 | { |
302 | int conv = (tvisint(&J->L->base[slot]) != (t==IRT_INT)) ? IRSLOAD_CONVERT : 0; |
303 | return sloadt(J, (int32_t)slot, |
304 | t + (((mode & IRSLOAD_TYPECHECK) || |
305 | (conv && t == IRT_INT && !(mode >> 16))) ? |
306 | IRT_GUARD : 0), |
307 | mode + conv); |
308 | } |
309 | |
310 | /* Peek before FORI to find a const initializer. Otherwise load from slot. */ |
311 | static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, |
312 | IRType t, int mode) |
313 | { |
314 | TRef tr = J->base[slot]; |
315 | if (!tr) { |
316 | tr = find_kinit(J, fori, slot, t); |
317 | if (!tr) |
318 | tr = fori_load(J, slot, t, mode); |
319 | } |
320 | return tr; |
321 | } |
322 | |
323 | /* Return the direction of the FOR loop iterator. |
324 | ** It's important to exactly reproduce the semantics of the interpreter. |
325 | */ |
326 | static int rec_for_direction(cTValue *o) |
327 | { |
328 | return (tvisint(o) ? intV(o) : (int32_t)o->u32.hi) >= 0; |
329 | } |
330 | |
331 | /* Simulate the runtime behavior of the FOR loop iterator. */ |
332 | static LoopEvent rec_for_iter(IROp *op, cTValue *o, int isforl) |
333 | { |
334 | lua_Number stopv = numberVnum(&o[FORL_STOP]); |
335 | lua_Number idxv = numberVnum(&o[FORL_IDX]); |
336 | lua_Number stepv = numberVnum(&o[FORL_STEP]); |
337 | if (isforl) |
338 | idxv += stepv; |
339 | if (rec_for_direction(&o[FORL_STEP])) { |
340 | if (idxv <= stopv) { |
341 | *op = IR_LE; |
342 | return idxv + 2*stepv > stopv ? LOOPEV_ENTERLO : LOOPEV_ENTER; |
343 | } |
344 | *op = IR_GT; return LOOPEV_LEAVE; |
345 | } else { |
346 | if (stopv <= idxv) { |
347 | *op = IR_GE; |
348 | return idxv + 2*stepv < stopv ? LOOPEV_ENTERLO : LOOPEV_ENTER; |
349 | } |
350 | *op = IR_LT; return LOOPEV_LEAVE; |
351 | } |
352 | } |
353 | |
354 | /* Record checks for FOR loop overflow and step direction. */ |
355 | static void rec_for_check(jit_State *J, IRType t, int dir, |
356 | TRef stop, TRef step, int init) |
357 | { |
358 | if (!tref_isk(step)) { |
359 | /* Non-constant step: need a guard for the direction. */ |
360 | TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J); |
361 | emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero); |
362 | /* Add hoistable overflow checks for a narrowed FORL index. */ |
363 | if (init && t == IRT_INT) { |
364 | if (tref_isk(stop)) { |
365 | /* Constant stop: optimize check away or to a range check for step. */ |
366 | int32_t k = IR(tref_ref(stop))->i; |
367 | if (dir) { |
368 | if (k > 0) |
369 | emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k)); |
370 | } else { |
371 | if (k < 0) |
372 | emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k)); |
373 | } |
374 | } else { |
375 | /* Stop+step variable: need full overflow check. */ |
376 | TRef tr = emitir(IRTGI(IR_ADDOV), step, stop); |
377 | emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */ |
378 | } |
379 | } |
380 | } else if (init && t == IRT_INT && !tref_isk(stop)) { |
381 | /* Constant step: optimize overflow check to a range check for stop. */ |
382 | int32_t k = IR(tref_ref(step))->i; |
383 | k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k; |
384 | emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k)); |
385 | } |
386 | } |
387 | |
388 | /* Record a FORL instruction. */ |
389 | static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, |
390 | int init) |
391 | { |
392 | BCReg ra = bc_a(*fori); |
393 | cTValue *tv = &J->L->base[ra]; |
394 | TRef idx = J->base[ra+FORL_IDX]; |
395 | IRType t = idx ? tref_type(idx) : |
396 | (init || LJ_DUALNUM) ? lj_opt_narrow_forl(J, tv) : IRT_NUM; |
397 | int mode = IRSLOAD_INHERIT + |
398 | ((!LJ_DUALNUM || tvisint(tv) == (t == IRT_INT)) ? IRSLOAD_READONLY : 0); |
399 | TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); |
400 | TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); |
401 | int tc, dir = rec_for_direction(&tv[FORL_STEP]); |
402 | lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); |
403 | scev->t.irt = t; |
404 | scev->dir = dir; |
405 | scev->stop = tref_ref(stop); |
406 | scev->step = tref_ref(step); |
407 | rec_for_check(J, t, dir, stop, step, init); |
408 | scev->start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT)); |
409 | tc = (LJ_DUALNUM && |
410 | !(scev->start && irref_isk(scev->stop) && irref_isk(scev->step) && |
411 | tvisint(&tv[FORL_IDX]) == (t == IRT_INT))) ? |
412 | IRSLOAD_TYPECHECK : 0; |
413 | if (tc) { |
414 | J->base[ra+FORL_STOP] = stop; |
415 | J->base[ra+FORL_STEP] = step; |
416 | } |
417 | if (!idx) |
418 | idx = fori_load(J, ra+FORL_IDX, t, |
419 | IRSLOAD_INHERIT + tc + (J->scev.start << 16)); |
420 | if (!init) |
421 | J->base[ra+FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); |
422 | J->base[ra+FORL_EXT] = idx; |
423 | scev->idx = tref_ref(idx); |
424 | J->maxslot = ra+FORL_EXT+1; |
425 | } |
426 | |
427 | /* Record FORL/JFORL or FORI/JFORI. */ |
428 | static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) |
429 | { |
430 | BCReg ra = bc_a(*fori); |
431 | TValue *tv = &J->L->base[ra]; |
432 | TRef *tr = &J->base[ra]; |
433 | IROp op; |
434 | LoopEvent ev; |
435 | TRef stop; |
436 | IRType t; |
437 | if (isforl) { /* Handle FORL/JFORL opcodes. */ |
438 | TRef idx = tr[FORL_IDX]; |
439 | if (tref_ref(idx) == J->scev.idx) { |
440 | t = J->scev.t.irt; |
441 | stop = J->scev.stop; |
442 | idx = emitir(IRT(IR_ADD, t), idx, J->scev.step); |
443 | tr[FORL_EXT] = tr[FORL_IDX] = idx; |
444 | } else { |
445 | ScEvEntry scev; |
446 | rec_for_loop(J, fori, &scev, 0); |
447 | t = scev.t.irt; |
448 | stop = scev.stop; |
449 | } |
450 | } else { /* Handle FORI/JFORI opcodes. */ |
451 | BCReg i; |
452 | lj_meta_for(J->L, tv); |
453 | t = (LJ_DUALNUM || tref_isint(tr[FORL_IDX])) ? lj_opt_narrow_forl(J, tv) : |
454 | IRT_NUM; |
455 | for (i = FORL_IDX; i <= FORL_STEP; i++) { |
456 | if (!tr[i]) sload(J, ra+i); |
457 | lua_assert(tref_isnumber_str(tr[i])); |
458 | if (tref_isstr(tr[i])) |
459 | tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); |
460 | if (t == IRT_INT) { |
461 | if (!tref_isinteger(tr[i])) |
462 | tr[i] = emitir(IRTGI(IR_CONV), tr[i], IRCONV_INT_NUM|IRCONV_CHECK); |
463 | } else { |
464 | if (!tref_isnum(tr[i])) |
465 | tr[i] = emitir(IRTN(IR_CONV), tr[i], IRCONV_NUM_INT); |
466 | } |
467 | } |
468 | tr[FORL_EXT] = tr[FORL_IDX]; |
469 | stop = tr[FORL_STOP]; |
470 | rec_for_check(J, t, rec_for_direction(&tv[FORL_STEP]), |
471 | stop, tr[FORL_STEP], 1); |
472 | } |
473 | |
474 | ev = rec_for_iter(&op, tv, isforl); |
475 | if (ev == LOOPEV_LEAVE) { |
476 | J->maxslot = ra+FORL_EXT+1; |
477 | J->pc = fori+1; |
478 | } else { |
479 | J->maxslot = ra; |
480 | J->pc = fori+bc_j(*fori)+1; |
481 | } |
482 | lj_snap_add(J); |
483 | |
484 | emitir(IRTG(op, t), tr[FORL_IDX], stop); |
485 | |
486 | if (ev == LOOPEV_LEAVE) { |
487 | J->maxslot = ra; |
488 | J->pc = fori+bc_j(*fori)+1; |
489 | } else { |
490 | J->maxslot = ra+FORL_EXT+1; |
491 | J->pc = fori+1; |
492 | } |
493 | J->needsnap = 1; |
494 | return ev; |
495 | } |
496 | |
497 | /* Record ITERL/JITERL. */ |
498 | static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) |
499 | { |
500 | BCReg ra = bc_a(iterins); |
501 | lua_assert(J->base[ra] != 0); |
502 | if (!tref_isnil(J->base[ra])) { /* Looping back? */ |
503 | J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ |
504 | J->maxslot = ra-1+bc_b(J->pc[-1]); |
505 | J->pc += bc_j(iterins)+1; |
506 | return LOOPEV_ENTER; |
507 | } else { |
508 | J->maxslot = ra-3; |
509 | J->pc++; |
510 | return LOOPEV_LEAVE; |
511 | } |
512 | } |
513 | |
514 | /* Record LOOP/JLOOP. Now, that was easy. */ |
515 | static LoopEvent rec_loop(jit_State *J, BCReg ra) |
516 | { |
517 | if (ra < J->maxslot) J->maxslot = ra; |
518 | J->pc++; |
519 | return LOOPEV_ENTER; |
520 | } |
521 | |
522 | /* Check if a loop repeatedly failed to trace because it didn't loop back. */ |
523 | static int innerloopleft(jit_State *J, const BCIns *pc) |
524 | { |
525 | ptrdiff_t i; |
526 | for (i = 0; i < PENALTY_SLOTS; i++) |
527 | if (mref(J->penalty[i].pc, const BCIns) == pc) { |
528 | if ((J->penalty[i].reason == LJ_TRERR_LLEAVE || |
529 | J->penalty[i].reason == LJ_TRERR_LINNER) && |
530 | J->penalty[i].val >= 2*PENALTY_MIN) |
531 | return 1; |
532 | break; |
533 | } |
534 | return 0; |
535 | } |
536 | |
537 | /* Handle the case when an interpreted loop op is hit. */ |
538 | static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) |
539 | { |
540 | if (J->parent == 0) { |
541 | if (pc == J->startpc && J->framedepth + J->retdepth == 0) { |
542 | /* Same loop? */ |
543 | if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ |
544 | lj_trace_err(J, LJ_TRERR_LLEAVE); |
545 | rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ |
546 | } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ |
547 | /* It's usually better to abort here and wait until the inner loop |
548 | ** is traced. But if the inner loop repeatedly didn't loop back, |
549 | ** this indicates a low trip count. In this case try unrolling |
550 | ** an inner loop even in a root trace. But it's better to be a bit |
551 | ** more conservative here and only do it for very short loops. |
552 | */ |
553 | if (bc_j(*pc) != -1 && !innerloopleft(J, pc)) |
554 | lj_trace_err(J, LJ_TRERR_LINNER); /* Root trace hit an inner loop. */ |
555 | if ((ev != LOOPEV_ENTERLO && |
556 | J->loopref && J->cur.nins - J->loopref > 24) || --J->loopunroll < 0) |
557 | lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */ |
558 | J->loopref = J->cur.nins; |
559 | } |
560 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters an inner loop. */ |
561 | J->loopref = J->cur.nins; |
562 | if (--J->loopunroll < 0) |
563 | lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */ |
564 | } /* Side trace continues across a loop that's left or not entered. */ |
565 | } |
566 | |
567 | /* Handle the case when an already compiled loop op is hit. */ |
568 | static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) |
569 | { |
570 | if (J->parent == 0) { /* Root trace hit an inner loop. */ |
571 | /* Better let the inner loop spawn a side trace back here. */ |
572 | lj_trace_err(J, LJ_TRERR_LINNER); |
573 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ |
574 | J->instunroll = 0; /* Cannot continue across a compiled loop op. */ |
575 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) |
576 | rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ |
577 | else |
578 | rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ |
579 | } /* Side trace continues across a loop that's left or not entered. */ |
580 | } |
581 | |
582 | /* -- Record calls and returns -------------------------------------------- */ |
583 | |
584 | /* Specialize to the runtime value of the called function or its prototype. */ |
585 | static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) |
586 | { |
587 | TRef kfunc; |
588 | if (isluafunc(fn)) { |
589 | GCproto *pt = funcproto(fn); |
590 | /* Too many closures created? Probably not a monomorphic function. */ |
591 | if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ |
592 | TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); |
593 | emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); |
594 | (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ |
595 | return tr; |
596 | } |
597 | } |
598 | /* Otherwise specialize to the function (closure) value itself. */ |
599 | kfunc = lj_ir_kfunc(J, fn); |
600 | emitir(IRTG(IR_EQ, IRT_FUNC), tr, kfunc); |
601 | return kfunc; |
602 | } |
603 | |
604 | /* Record call setup. */ |
605 | static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) |
606 | { |
607 | RecordIndex ix; |
608 | TValue *functv = &J->L->base[func]; |
609 | TRef *fbase = &J->base[func]; |
610 | ptrdiff_t i; |
611 | for (i = 0; i <= nargs; i++) |
612 | (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ |
613 | if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ |
614 | ix.tab = fbase[0]; |
615 | copyTV(J->L, &ix.tabv, functv); |
616 | if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) |
617 | lj_trace_err(J, LJ_TRERR_NOMM); |
618 | for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ |
619 | fbase[i] = fbase[i-1]; |
620 | fbase[0] = ix.mobj; /* Replace function. */ |
621 | functv = &ix.mobjv; |
622 | } |
623 | fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); |
624 | J->maxslot = (BCReg)nargs; |
625 | } |
626 | |
627 | /* Record call. */ |
628 | void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) |
629 | { |
630 | rec_call_setup(J, func, nargs); |
631 | /* Bump frame. */ |
632 | J->framedepth++; |
633 | J->base += func+1; |
634 | J->baseslot += func+1; |
635 | } |
636 | |
637 | /* Record tail call. */ |
638 | void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) |
639 | { |
640 | rec_call_setup(J, func, nargs); |
641 | if (frame_isvarg(J->L->base - 1)) { |
642 | BCReg cbase = (BCReg)frame_delta(J->L->base - 1); |
643 | if (--J->framedepth < 0) |
644 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
645 | J->baseslot -= (BCReg)cbase; |
646 | J->base -= cbase; |
647 | func += cbase; |
648 | } |
649 | /* Move func + args down. */ |
650 | memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); |
651 | /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ |
652 | /* Tailcalls can form a loop, so count towards the loop unroll limit. */ |
653 | if (++J->tailcalled > J->loopunroll) |
654 | lj_trace_err(J, LJ_TRERR_LUNROLL); |
655 | } |
656 | |
657 | /* Check unroll limits for down-recursion. */ |
658 | static int check_downrec_unroll(jit_State *J, GCproto *pt) |
659 | { |
660 | IRRef ptref; |
661 | for (ptref = J->chain[IR_KGC]; ptref; ptref = IR(ptref)->prev) |
662 | if (ir_kgc(IR(ptref)) == obj2gco(pt)) { |
663 | int count = 0; |
664 | IRRef ref; |
665 | for (ref = J->chain[IR_RETF]; ref; ref = IR(ref)->prev) |
666 | if (IR(ref)->op1 == ptref) |
667 | count++; |
668 | if (count) { |
669 | if (J->pc == J->startpc) { |
670 | if (count + J->tailcalled > J->param[JIT_P_recunroll]) |
671 | return 1; |
672 | } else { |
673 | lj_trace_err(J, LJ_TRERR_DOWNREC); |
674 | } |
675 | } |
676 | } |
677 | return 0; |
678 | } |
679 | |
680 | /* Record return. */ |
681 | void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) |
682 | { |
683 | TValue *frame = J->L->base - 1; |
684 | ptrdiff_t i; |
685 | for (i = 0; i < gotresults; i++) |
686 | (void)getslot(J, rbase+i); /* Ensure all results have a reference. */ |
687 | while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */ |
688 | BCReg cbase = (BCReg)frame_delta(frame); |
689 | if (--J->framedepth < 0) |
690 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
691 | lua_assert(J->baseslot > 1); |
692 | gotresults++; |
693 | rbase += cbase; |
694 | J->baseslot -= (BCReg)cbase; |
695 | J->base -= cbase; |
696 | J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ |
697 | frame = frame_prevd(frame); |
698 | } |
699 | /* Return to lower frame via interpreter for unhandled cases. */ |
700 | if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && |
701 | (!frame_islua(frame) || |
702 | (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { |
703 | /* NYI: specialize to frame type and return directly, not via RET*. */ |
704 | for (i = 0; i < (ptrdiff_t)rbase; i++) |
705 | J->base[i] = 0; /* Purge dead slots. */ |
706 | J->maxslot = rbase + (BCReg)gotresults; |
707 | rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ |
708 | return; |
709 | } |
710 | if (frame_isvarg(frame)) { |
711 | BCReg cbase = (BCReg)frame_delta(frame); |
712 | if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ |
713 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
714 | lua_assert(J->baseslot > 1); |
715 | rbase += cbase; |
716 | J->baseslot -= (BCReg)cbase; |
717 | J->base -= cbase; |
718 | frame = frame_prevd(frame); |
719 | } |
720 | if (frame_islua(frame)) { /* Return to Lua frame. */ |
721 | BCIns callins = *(frame_pc(frame)-1); |
722 | ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; |
723 | BCReg cbase = bc_a(callins); |
724 | GCproto *pt = funcproto(frame_func(frame - (cbase+1))); |
725 | if ((pt->flags & PROTO_NOJIT)) |
726 | lj_trace_err(J, LJ_TRERR_CJITOFF); |
727 | if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { |
728 | if (check_downrec_unroll(J, pt)) { |
729 | J->maxslot = (BCReg)(rbase + gotresults); |
730 | lj_snap_purge(J); |
731 | rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ |
732 | return; |
733 | } |
734 | lj_snap_add(J); |
735 | } |
736 | for (i = 0; i < nresults; i++) /* Adjust results. */ |
737 | J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; |
738 | J->maxslot = cbase+(BCReg)nresults; |
739 | if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ |
740 | J->framedepth--; |
741 | lua_assert(J->baseslot > cbase+1); |
742 | J->baseslot -= cbase+1; |
743 | J->base -= cbase+1; |
744 | } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { |
745 | /* Return to lower frame would leave the loop in a root trace. */ |
746 | lj_trace_err(J, LJ_TRERR_LLEAVE); |
747 | } else { /* Return to lower frame. Guard for the target we return to. */ |
748 | TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); |
749 | TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); |
750 | emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); |
751 | J->retdepth++; |
752 | J->needsnap = 1; |
753 | lua_assert(J->baseslot == 1); |
754 | /* Shift result slots up and clear the slots of the new frame below. */ |
755 | memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); |
756 | memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); |
757 | } |
758 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ |
759 | ASMFunction cont = frame_contf(frame); |
760 | BCReg cbase = (BCReg)frame_delta(frame); |
761 | if ((J->framedepth -= 2) < 0) |
762 | lj_trace_err(J, LJ_TRERR_NYIRETL); |
763 | J->baseslot -= (BCReg)cbase; |
764 | J->base -= cbase; |
765 | J->maxslot = cbase-2; |
766 | if (cont == lj_cont_ra) { |
767 | /* Copy result to destination slot. */ |
768 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); |
769 | J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; |
770 | if (dst >= J->maxslot) J->maxslot = dst+1; |
771 | } else if (cont == lj_cont_nop) { |
772 | /* Nothing to do here. */ |
773 | } else if (cont == lj_cont_cat) { |
774 | lua_assert(0); |
775 | } else { |
776 | /* Result type already specialized. */ |
777 | lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); |
778 | } |
779 | } else { |
780 | lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ |
781 | } |
782 | lua_assert(J->baseslot >= 1); |
783 | } |
784 | |
785 | /* -- Metamethod handling ------------------------------------------------- */ |
786 | |
787 | /* Prepare to record call to metamethod. */ |
788 | static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) |
789 | { |
790 | BCReg s, top = curr_proto(J->L)->framesize; |
791 | TRef trcont; |
792 | setcont(&J->L->base[top], cont); |
793 | #if LJ_64 |
794 | trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin)); |
795 | #else |
796 | trcont = lj_ir_kptr(J, (void *)cont); |
797 | #endif |
798 | J->base[top] = trcont | TREF_CONT; |
799 | J->framedepth++; |
800 | for (s = J->maxslot; s < top; s++) |
801 | J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ |
802 | return top+1; |
803 | } |
804 | |
805 | /* Record metamethod lookup. */ |
806 | int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) |
807 | { |
808 | RecordIndex mix; |
809 | GCtab *mt; |
810 | if (tref_istab(ix->tab)) { |
811 | mt = tabref(tabV(&ix->tabv)->metatable); |
812 | mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META); |
813 | } else if (tref_isudata(ix->tab)) { |
814 | int udtype = udataV(&ix->tabv)->udtype; |
815 | mt = tabref(udataV(&ix->tabv)->metatable); |
816 | /* The metatables of special userdata objects are treated as immutable. */ |
817 | if (udtype != UDTYPE_USERDATA) { |
818 | cTValue *mo; |
819 | if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { |
820 | /* Specialize to the C library namespace object. */ |
821 | emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); |
822 | } else { |
823 | /* Specialize to the type of userdata. */ |
824 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); |
825 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, udtype)); |
826 | } |
827 | immutable_mt: |
828 | mo = lj_tab_getstr(mt, mmname_str(J2G(J), mm)); |
829 | if (!mo || tvisnil(mo)) |
830 | return 0; /* No metamethod. */ |
831 | /* Treat metamethod or index table as immutable, too. */ |
832 | if (!(tvisfunc(mo) || tvistab(mo))) |
833 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
834 | copyTV(J->L, &ix->mobjv, mo); |
835 | ix->mobj = lj_ir_kgc(J, gcV(mo), tvisfunc(mo) ? IRT_FUNC : IRT_TAB); |
836 | ix->mtv = mt; |
837 | ix->mt = TREF_NIL; /* Dummy value for comparison semantics. */ |
838 | return 1; /* Got metamethod or index table. */ |
839 | } |
840 | mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); |
841 | } else { |
842 | /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */ |
843 | mt = tabref(basemt_obj(J2G(J), &ix->tabv)); |
844 | if (mt == NULL) { |
845 | ix->mt = TREF_NIL; |
846 | return 0; /* No metamethod. */ |
847 | } |
848 | /* The cdata metatable is treated as immutable. */ |
849 | if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; |
850 | ix->mt = mix.tab = lj_ir_ktab(J, mt); |
851 | goto nocheck; |
852 | } |
853 | ix->mt = mt ? mix.tab : TREF_NIL; |
854 | emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mix.tab, lj_ir_knull(J, IRT_TAB)); |
855 | nocheck: |
856 | if (mt) { |
857 | GCstr *mmstr = mmname_str(J2G(J), mm); |
858 | cTValue *mo = lj_tab_getstr(mt, mmstr); |
859 | if (mo && !tvisnil(mo)) |
860 | copyTV(J->L, &ix->mobjv, mo); |
861 | ix->mtv = mt; |
862 | settabV(J->L, &mix.tabv, mt); |
863 | setstrV(J->L, &mix.keyv, mmstr); |
864 | mix.key = lj_ir_kstr(J, mmstr); |
865 | mix.val = 0; |
866 | mix.idxchain = 0; |
867 | ix->mobj = lj_record_idx(J, &mix); |
868 | return !tref_isnil(ix->mobj); /* 1 if metamethod found, 0 if not. */ |
869 | } |
870 | return 0; /* No metamethod. */ |
871 | } |
872 | |
873 | /* Record call to arithmetic metamethod. */ |
874 | static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) |
875 | { |
876 | /* Set up metamethod call first to save ix->tab and ix->tabv. */ |
877 | BCReg func = rec_mm_prep(J, lj_cont_ra); |
878 | TRef *base = J->base + func; |
879 | TValue *basev = J->L->base + func; |
880 | base[1] = ix->tab; base[2] = ix->key; |
881 | copyTV(J->L, basev+1, &ix->tabv); |
882 | copyTV(J->L, basev+2, &ix->keyv); |
883 | if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ |
884 | if (mm != MM_unm) { |
885 | ix->tab = ix->key; |
886 | copyTV(J->L, &ix->tabv, &ix->keyv); |
887 | if (lj_record_mm_lookup(J, ix, mm)) /* Lookup mm on 2nd operand. */ |
888 | goto ok; |
889 | } |
890 | lj_trace_err(J, LJ_TRERR_NOMM); |
891 | } |
892 | ok: |
893 | base[0] = ix->mobj; |
894 | copyTV(J->L, basev+0, &ix->mobjv); |
895 | lj_record_call(J, func, 2); |
896 | return 0; /* No result yet. */ |
897 | } |
898 | |
899 | /* Record call to __len metamethod. */ |
900 | static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) |
901 | { |
902 | RecordIndex ix; |
903 | ix.tab = tr; |
904 | copyTV(J->L, &ix.tabv, tv); |
905 | if (lj_record_mm_lookup(J, &ix, MM_len)) { |
906 | BCReg func = rec_mm_prep(J, lj_cont_ra); |
907 | TRef *base = J->base + func; |
908 | TValue *basev = J->L->base + func; |
909 | base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); |
910 | base[1] = tr; copyTV(J->L, basev+1, tv); |
911 | #if LJ_52 |
912 | base[2] = tr; copyTV(J->L, basev+2, tv); |
913 | #else |
914 | base[2] = TREF_NIL; setnilV(basev+2); |
915 | #endif |
916 | lj_record_call(J, func, 2); |
917 | } else { |
918 | if (LJ_52 && tref_istab(tr)) |
919 | return lj_ir_call(J, IRCALL_lj_tab_len, tr); |
920 | lj_trace_err(J, LJ_TRERR_NOMM); |
921 | } |
922 | return 0; /* No result yet. */ |
923 | } |
924 | |
925 | /* Call a comparison metamethod. */ |
926 | static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) |
927 | { |
928 | BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); |
929 | TRef *base = J->base + func; |
930 | TValue *tv = J->L->base + func; |
931 | base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; |
932 | copyTV(J->L, tv+0, &ix->mobjv); |
933 | copyTV(J->L, tv+1, &ix->valv); |
934 | copyTV(J->L, tv+2, &ix->keyv); |
935 | lj_record_call(J, func, 2); |
936 | } |
937 | |
938 | /* Record call to equality comparison metamethod (for tab and udata only). */ |
939 | static void rec_mm_equal(jit_State *J, RecordIndex *ix, int op) |
940 | { |
941 | ix->tab = ix->val; |
942 | copyTV(J->L, &ix->tabv, &ix->valv); |
943 | if (lj_record_mm_lookup(J, ix, MM_eq)) { /* Lookup mm on 1st operand. */ |
944 | cTValue *bv; |
945 | TRef mo1 = ix->mobj; |
946 | TValue mo1v; |
947 | copyTV(J->L, &mo1v, &ix->mobjv); |
948 | /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */ |
949 | bv = &ix->keyv; |
950 | if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) { |
951 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META); |
952 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); |
953 | } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) { |
954 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META); |
955 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); |
956 | } else { /* Lookup metamethod on 2nd operand and compare both. */ |
957 | ix->tab = ix->key; |
958 | copyTV(J->L, &ix->tabv, bv); |
959 | if (!lj_record_mm_lookup(J, ix, MM_eq) || |
960 | lj_record_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv)) |
961 | return; |
962 | } |
963 | rec_mm_callcomp(J, ix, op); |
964 | } |
965 | } |
966 | |
967 | /* Record call to ordered comparison metamethods (for arbitrary objects). */ |
968 | static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op) |
969 | { |
970 | ix->tab = ix->val; |
971 | copyTV(J->L, &ix->tabv, &ix->valv); |
972 | while (1) { |
973 | MMS mm = (op & 2) ? MM_le : MM_lt; /* Try __le + __lt or only __lt. */ |
974 | #if LJ_52 |
975 | if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ |
976 | ix->tab = ix->key; |
977 | copyTV(J->L, &ix->tabv, &ix->keyv); |
978 | if (!lj_record_mm_lookup(J, ix, mm)) /* Lookup mm on 2nd operand. */ |
979 | goto nomatch; |
980 | } |
981 | rec_mm_callcomp(J, ix, op); |
982 | return; |
983 | #else |
984 | if (lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ |
985 | cTValue *bv; |
986 | TRef mo1 = ix->mobj; |
987 | TValue mo1v; |
988 | copyTV(J->L, &mo1v, &ix->mobjv); |
989 | /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */ |
990 | bv = &ix->keyv; |
991 | if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) { |
992 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META); |
993 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); |
994 | } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) { |
995 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META); |
996 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); |
997 | } else { /* Lookup metamethod on 2nd operand and compare both. */ |
998 | ix->tab = ix->key; |
999 | copyTV(J->L, &ix->tabv, bv); |
1000 | if (!lj_record_mm_lookup(J, ix, mm) || |
1001 | lj_record_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv)) |
1002 | goto nomatch; |
1003 | } |
1004 | rec_mm_callcomp(J, ix, op); |
1005 | return; |
1006 | } |
1007 | #endif |
1008 | nomatch: |
1009 | /* Lookup failed. Retry with __lt and swapped operands. */ |
1010 | if (!(op & 2)) break; /* Already at __lt. Interpreter will throw. */ |
1011 | ix->tab = ix->key; ix->key = ix->val; ix->val = ix->tab; |
1012 | copyTV(J->L, &ix->tabv, &ix->keyv); |
1013 | copyTV(J->L, &ix->keyv, &ix->valv); |
1014 | copyTV(J->L, &ix->valv, &ix->tabv); |
1015 | op ^= 3; |
1016 | } |
1017 | } |
1018 | |
1019 | #if LJ_HASFFI |
1020 | /* Setup call to cdata comparison metamethod. */ |
1021 | static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) |
1022 | { |
1023 | lj_snap_add(J); |
1024 | if (tref_iscdata(ix->val)) { |
1025 | ix->tab = ix->val; |
1026 | copyTV(J->L, &ix->tabv, &ix->valv); |
1027 | } else { |
1028 | lua_assert(tref_iscdata(ix->key)); |
1029 | ix->tab = ix->key; |
1030 | copyTV(J->L, &ix->tabv, &ix->keyv); |
1031 | } |
1032 | lj_record_mm_lookup(J, ix, mm); |
1033 | rec_mm_callcomp(J, ix, op); |
1034 | } |
1035 | #endif |
1036 | |
1037 | /* -- Indexed access ------------------------------------------------------ */ |
1038 | |
1039 | /* Record bounds-check. */ |
1040 | static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) |
1041 | { |
1042 | /* Try to emit invariant bounds checks. */ |
1043 | if ((J->flags & (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) == |
1044 | (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) { |
1045 | IRRef ref = tref_ref(ikey); |
1046 | IRIns *ir = IR(ref); |
1047 | int32_t ofs = 0; |
1048 | IRRef ofsref = 0; |
1049 | /* Handle constant offsets. */ |
1050 | if (ir->o == IR_ADD && irref_isk(ir->op2)) { |
1051 | ofsref = ir->op2; |
1052 | ofs = IR(ofsref)->i; |
1053 | ref = ir->op1; |
1054 | ir = IR(ref); |
1055 | } |
1056 | /* Got scalar evolution analysis results for this reference? */ |
1057 | if (ref == J->scev.idx) { |
1058 | int32_t stop; |
1059 | lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); |
1060 | stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); |
1061 | /* Runtime value for stop of loop is within bounds? */ |
1062 | if ((int64_t)stop + ofs < (int64_t)asize) { |
1063 | /* Emit invariant bounds check for stop. */ |
1064 | emitir(IRTG(IR_ABC, IRT_P32), asizeref, ofs == 0 ? J->scev.stop : |
1065 | emitir(IRTI(IR_ADD), J->scev.stop, ofsref)); |
1066 | /* Emit invariant bounds check for start, if not const or negative. */ |
1067 | if (!(J->scev.dir && J->scev.start && |
1068 | (int64_t)IR(J->scev.start)->i + ofs >= 0)) |
1069 | emitir(IRTG(IR_ABC, IRT_P32), asizeref, ikey); |
1070 | return; |
1071 | } |
1072 | } |
1073 | } |
1074 | emitir(IRTGI(IR_ABC), asizeref, ikey); /* Emit regular bounds check. */ |
1075 | } |
1076 | |
1077 | /* Record indexed key lookup. */ |
1078 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix) |
1079 | { |
1080 | TRef key; |
1081 | GCtab *t = tabV(&ix->tabv); |
1082 | ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ |
1083 | |
1084 | /* Integer keys are looked up in the array part first. */ |
1085 | key = ix->key; |
1086 | if (tref_isnumber(key)) { |
1087 | int32_t k = numberVint(&ix->keyv); |
1088 | if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k) |
1089 | k = LJ_MAX_ASIZE; |
1090 | if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ |
1091 | TRef ikey = lj_opt_narrow_index(J, key); |
1092 | TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); |
1093 | if ((MSize)k < t->asize) { /* Currently an array key? */ |
1094 | TRef arrayref; |
1095 | rec_idx_abc(J, asizeref, ikey, t->asize); |
1096 | arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); |
1097 | return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); |
1098 | } else { /* Currently not in array (may be an array extension)? */ |
1099 | emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ |
1100 | if (k == 0 && tref_isk(key)) |
1101 | key = lj_ir_knum_zero(J); /* Canonicalize 0 or +-0.0 to +0.0. */ |
1102 | /* And continue with the hash lookup. */ |
1103 | } |
1104 | } else if (!tref_isk(key)) { |
1105 | /* We can rule out const numbers which failed the integerness test |
1106 | ** above. But all other numbers are potential array keys. |
1107 | */ |
1108 | if (t->asize == 0) { /* True sparse tables have an empty array part. */ |
1109 | /* Guard that the array part stays empty. */ |
1110 | TRef tmp = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); |
1111 | emitir(IRTGI(IR_EQ), tmp, lj_ir_kint(J, 0)); |
1112 | } else { |
1113 | lj_trace_err(J, LJ_TRERR_NYITMIX); |
1114 | } |
1115 | } |
1116 | } |
1117 | |
1118 | /* Otherwise the key is located in the hash part. */ |
1119 | if (t->hmask == 0) { /* Shortcut for empty hash part. */ |
1120 | /* Guard that the hash part stays empty. */ |
1121 | TRef tmp = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); |
1122 | emitir(IRTGI(IR_EQ), tmp, lj_ir_kint(J, 0)); |
1123 | return lj_ir_kkptr(J, niltvg(J2G(J))); |
1124 | } |
1125 | if (tref_isinteger(key)) /* Hash keys are based on numbers, not ints. */ |
1126 | key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); |
1127 | if (tref_isk(key)) { |
1128 | /* Optimize lookup of constant hash keys. */ |
1129 | MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); |
1130 | if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && |
1131 | hslot <= 65535*(MSize)sizeof(Node)) { |
1132 | TRef node, kslot; |
1133 | TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); |
1134 | emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); |
1135 | node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); |
1136 | kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); |
1137 | return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); |
1138 | } |
1139 | } |
1140 | /* Fall back to a regular hash lookup. */ |
1141 | return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); |
1142 | } |
1143 | |
1144 | /* Determine whether a key is NOT one of the fast metamethod names. */ |
1145 | static int nommstr(jit_State *J, TRef key) |
1146 | { |
1147 | if (tref_isstr(key)) { |
1148 | if (tref_isk(key)) { |
1149 | GCstr *str = ir_kstr(IR(tref_ref(key))); |
1150 | uint32_t mm; |
1151 | for (mm = 0; mm <= MM_FAST; mm++) |
1152 | if (mmname_str(J2G(J), mm) == str) |
1153 | return 0; /* MUST be one the fast metamethod names. */ |
1154 | } else { |
1155 | return 0; /* Variable string key MAY be a metamethod name. */ |
1156 | } |
1157 | } |
1158 | return 1; /* CANNOT be a metamethod name. */ |
1159 | } |
1160 | |
1161 | /* Record indexed load/store. */ |
1162 | TRef lj_record_idx(jit_State *J, RecordIndex *ix) |
1163 | { |
1164 | TRef xref; |
1165 | IROp xrefop, loadop; |
1166 | cTValue *oldv; |
1167 | |
1168 | while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ |
1169 | /* Never call raw lj_record_idx() on non-table. */ |
1170 | lua_assert(ix->idxchain != 0); |
1171 | if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) |
1172 | lj_trace_err(J, LJ_TRERR_NOMM); |
1173 | handlemm: |
1174 | if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ |
1175 | BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); |
1176 | TRef *base = J->base + func; |
1177 | TValue *tv = J->L->base + func; |
1178 | base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; |
1179 | setfuncV(J->L, tv+0, funcV(&ix->mobjv)); |
1180 | copyTV(J->L, tv+1, &ix->tabv); |
1181 | copyTV(J->L, tv+2, &ix->keyv); |
1182 | if (ix->val) { |
1183 | base[3] = ix->val; |
1184 | copyTV(J->L, tv+3, &ix->valv); |
1185 | lj_record_call(J, func, 3); /* mobj(tab, key, val) */ |
1186 | return 0; |
1187 | } else { |
1188 | lj_record_call(J, func, 2); /* res = mobj(tab, key) */ |
1189 | return 0; /* No result yet. */ |
1190 | } |
1191 | } |
1192 | /* Otherwise retry lookup with metaobject. */ |
1193 | ix->tab = ix->mobj; |
1194 | copyTV(J->L, &ix->tabv, &ix->mobjv); |
1195 | if (--ix->idxchain == 0) |
1196 | lj_trace_err(J, LJ_TRERR_IDXLOOP); |
1197 | } |
1198 | |
1199 | /* First catch nil and NaN keys for tables. */ |
1200 | if (tvisnil(&ix->keyv) || (tvisnum(&ix->keyv) && tvisnan(&ix->keyv))) { |
1201 | if (ix->val) /* Better fail early. */ |
1202 | lj_trace_err(J, LJ_TRERR_STORENN); |
1203 | if (tref_isk(ix->key)) { |
1204 | if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) |
1205 | goto handlemm; |
1206 | return TREF_NIL; |
1207 | } |
1208 | } |
1209 | |
1210 | /* Record the key lookup. */ |
1211 | xref = rec_idx_key(J, ix); |
1212 | xrefop = IR(tref_ref(xref))->o; |
1213 | loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; |
1214 | /* The lj_meta_tset() inconsistency is gone, but better play safe. */ |
1215 | oldv = xrefop == IR_KKPTR ? (cTValue *)ir_kptr(IR(tref_ref(xref))) : ix->oldv; |
1216 | |
1217 | if (ix->val == 0) { /* Indexed load */ |
1218 | IRType t = itype2irt(oldv); |
1219 | TRef res; |
1220 | if (oldv == niltvg(J2G(J))) { |
1221 | emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1222 | res = TREF_NIL; |
1223 | } else { |
1224 | res = emitir(IRTG(loadop, t), xref, 0); |
1225 | } |
1226 | if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) |
1227 | goto handlemm; |
1228 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ |
1229 | return res; |
1230 | } else { /* Indexed store. */ |
1231 | GCtab *mt = tabref(tabV(&ix->tabv)->metatable); |
1232 | int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); |
1233 | if (tvisnil(oldv)) { /* Previous value was nil? */ |
1234 | /* Need to duplicate the hasmm check for the early guards. */ |
1235 | int hasmm = 0; |
1236 | if (ix->idxchain && mt) { |
1237 | cTValue *mo = lj_tab_getstr(mt, mmname_str(J2G(J), MM_newindex)); |
1238 | hasmm = mo && !tvisnil(mo); |
1239 | } |
1240 | if (hasmm) |
1241 | emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ |
1242 | else if (xrefop == IR_HREF) |
1243 | emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), |
1244 | xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1245 | if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { |
1246 | lua_assert(hasmm); |
1247 | goto handlemm; |
1248 | } |
1249 | lua_assert(!hasmm); |
1250 | if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ |
1251 | TRef key = ix->key; |
1252 | if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ |
1253 | key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); |
1254 | xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); |
1255 | keybarrier = 0; /* NEWREF already takes care of the key barrier. */ |
1256 | } |
1257 | } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { |
1258 | /* Cannot derive that the previous value was non-nil, must do checks. */ |
1259 | if (xrefop == IR_HREF) /* Guard against store to niltv. */ |
1260 | emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); |
1261 | if (ix->idxchain) { /* Metamethod lookup required? */ |
1262 | /* A check for NULL metatable is cheaper (hoistable) than a load. */ |
1263 | if (!mt) { |
1264 | TRef mtref = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META); |
1265 | emitir(IRTG(IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB)); |
1266 | } else { |
1267 | IRType t = itype2irt(oldv); |
1268 | emitir(IRTG(loadop, t), xref, 0); /* Guard for non-nil value. */ |
1269 | } |
1270 | } |
1271 | } else { |
1272 | keybarrier = 0; /* Previous non-nil value kept the key alive. */ |
1273 | } |
1274 | /* Convert int to number before storing. */ |
1275 | if (!LJ_DUALNUM && tref_isinteger(ix->val)) |
1276 | ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT); |
1277 | emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); |
1278 | if (keybarrier || tref_isgcv(ix->val)) |
1279 | emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); |
1280 | /* Invalidate neg. metamethod cache for stores with certain string keys. */ |
1281 | if (!nommstr(J, ix->key)) { |
1282 | TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); |
1283 | emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); |
1284 | } |
1285 | J->needsnap = 1; |
1286 | return 0; |
1287 | } |
1288 | } |
1289 | |
1290 | /* -- Upvalue access ------------------------------------------------------ */ |
1291 | |
1292 | /* Check whether upvalue is immutable and ok to constify. */ |
1293 | static int rec_upvalue_constify(jit_State *J, GCupval *uvp) |
1294 | { |
1295 | if (uvp->immutable) { |
1296 | cTValue *o = uvval(uvp); |
1297 | /* Don't constify objects that may retain large amounts of memory. */ |
1298 | #if LJ_HASFFI |
1299 | if (tviscdata(o)) { |
1300 | GCcdata *cd = cdataV(o); |
1301 | if (!cdataisv(cd) && !(cd->marked & LJ_GC_CDATA_FIN)) { |
1302 | CType *ct = ctype_raw(ctype_ctsG(J2G(J)), cd->ctypeid); |
1303 | if (!ctype_hassize(ct->info) || ct->size <= 16) |
1304 | return 1; |
1305 | } |
1306 | return 0; |
1307 | } |
1308 | #else |
1309 | UNUSED(J); |
1310 | #endif |
1311 | if (!(tvistab(o) || tvisudata(o) || tvisthread(o))) |
1312 | return 1; |
1313 | } |
1314 | return 0; |
1315 | } |
1316 | |
1317 | /* Record upvalue load/store. */ |
1318 | static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) |
1319 | { |
1320 | GCupval *uvp = &gcref(J->fn->l.uvptr[uv])->uv; |
1321 | TRef fn = getcurrf(J); |
1322 | IRRef uref; |
1323 | int needbarrier = 0; |
1324 | if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */ |
1325 | TRef tr, kfunc; |
1326 | lua_assert(val == 0); |
1327 | if (!tref_isk(fn)) { /* Late specialization of current function. */ |
1328 | if (J->pt->flags >= PROTO_CLC_POLY) |
1329 | goto noconstify; |
1330 | kfunc = lj_ir_kfunc(J, J->fn); |
1331 | emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); |
1332 | J->base[-1] = TREF_FRAME | kfunc; |
1333 | fn = kfunc; |
1334 | } |
1335 | tr = lj_record_constify(J, uvval(uvp)); |
1336 | if (tr) |
1337 | return tr; |
1338 | } |
1339 | noconstify: |
1340 | /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ |
1341 | uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); |
1342 | if (!uvp->closed) { |
1343 | /* In current stack? */ |
1344 | if (uvval(uvp) >= tvref(J->L->stack) && |
1345 | uvval(uvp) < tvref(J->L->maxstack)) { |
1346 | int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); |
1347 | if (slot >= 0) { /* Aliases an SSA slot? */ |
1348 | slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ |
1349 | /* NYI: add IR to guard that it's still aliasing the same slot. */ |
1350 | if (val == 0) { |
1351 | return getslot(J, slot); |
1352 | } else { |
1353 | J->base[slot] = val; |
1354 | if (slot >= (int32_t)J->maxslot) J->maxslot = (BCReg)(slot+1); |
1355 | return 0; |
1356 | } |
1357 | } |
1358 | } |
1359 | uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv)); |
1360 | } else { |
1361 | needbarrier = 1; |
1362 | uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv)); |
1363 | } |
1364 | if (val == 0) { /* Upvalue load */ |
1365 | IRType t = itype2irt(uvval(uvp)); |
1366 | TRef res = emitir(IRTG(IR_ULOAD, t), uref, 0); |
1367 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */ |
1368 | return res; |
1369 | } else { /* Upvalue store. */ |
1370 | /* Convert int to number before storing. */ |
1371 | if (!LJ_DUALNUM && tref_isinteger(val)) |
1372 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); |
1373 | emitir(IRT(IR_USTORE, tref_type(val)), uref, val); |
1374 | if (needbarrier && tref_isgcv(val)) |
1375 | emitir(IRT(IR_OBAR, IRT_NIL), uref, val); |
1376 | J->needsnap = 1; |
1377 | return 0; |
1378 | } |
1379 | } |
1380 | |
1381 | /* -- Record calls to Lua functions --------------------------------------- */ |
1382 | |
1383 | /* Check unroll limits for calls. */ |
1384 | static void check_call_unroll(jit_State *J, TraceNo lnk) |
1385 | { |
1386 | cTValue *frame = J->L->base - 1; |
1387 | void *pc = mref(frame_func(frame)->l.pc, void); |
1388 | int32_t depth = J->framedepth; |
1389 | int32_t count = 0; |
1390 | if ((J->pt->flags & PROTO_VARARG)) depth--; /* Vararg frame still missing. */ |
1391 | for (; depth > 0; depth--) { /* Count frames with same prototype. */ |
1392 | if (frame_iscont(frame)) depth--; |
1393 | frame = frame_prev(frame); |
1394 | if (mref(frame_func(frame)->l.pc, void) == pc) |
1395 | count++; |
1396 | } |
1397 | if (J->pc == J->startpc) { |
1398 | if (count + J->tailcalled > J->param[JIT_P_recunroll]) { |
1399 | J->pc++; |
1400 | if (J->framedepth + J->retdepth == 0) |
1401 | rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ |
1402 | else |
1403 | rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ |
1404 | } |
1405 | } else { |
1406 | if (count > J->param[JIT_P_callunroll]) { |
1407 | if (lnk) { /* Possible tail- or up-recursion. */ |
1408 | lj_trace_flush(J, lnk); /* Flush trace that only returns. */ |
1409 | /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ |
1410 | hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4)); |
1411 | } |
1412 | lj_trace_err(J, LJ_TRERR_CUNROLL); |
1413 | } |
1414 | } |
1415 | } |
1416 | |
1417 | /* Record Lua function setup. */ |
1418 | static void rec_func_setup(jit_State *J) |
1419 | { |
1420 | GCproto *pt = J->pt; |
1421 | BCReg s, numparams = pt->numparams; |
1422 | if ((pt->flags & PROTO_NOJIT)) |
1423 | lj_trace_err(J, LJ_TRERR_CJITOFF); |
1424 | if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS) |
1425 | lj_trace_err(J, LJ_TRERR_STACKOV); |
1426 | /* Fill up missing parameters with nil. */ |
1427 | for (s = J->maxslot; s < numparams; s++) |
1428 | J->base[s] = TREF_NIL; |
1429 | /* The remaining slots should never be read before they are written. */ |
1430 | J->maxslot = numparams; |
1431 | } |
1432 | |
1433 | /* Record Lua vararg function setup. */ |
1434 | static void rec_func_vararg(jit_State *J) |
1435 | { |
1436 | GCproto *pt = J->pt; |
1437 | BCReg s, fixargs, vframe = J->maxslot+1; |
1438 | lua_assert((pt->flags & PROTO_VARARG)); |
1439 | if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) |
1440 | lj_trace_err(J, LJ_TRERR_STACKOV); |
1441 | J->base[vframe-1] = J->base[-1]; /* Copy function up. */ |
1442 | /* Copy fixarg slots up and set their original slots to nil. */ |
1443 | fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; |
1444 | for (s = 0; s < fixargs; s++) { |
1445 | J->base[vframe+s] = J->base[s]; |
1446 | J->base[s] = TREF_NIL; |
1447 | } |
1448 | J->maxslot = fixargs; |
1449 | J->framedepth++; |
1450 | J->base += vframe; |
1451 | J->baseslot += vframe; |
1452 | } |
1453 | |
1454 | /* Record entry to a Lua function. */ |
1455 | static void rec_func_lua(jit_State *J) |
1456 | { |
1457 | rec_func_setup(J); |
1458 | check_call_unroll(J, 0); |
1459 | } |
1460 | |
1461 | /* Record entry to an already compiled function. */ |
1462 | static void rec_func_jit(jit_State *J, TraceNo lnk) |
1463 | { |
1464 | GCtrace *T; |
1465 | rec_func_setup(J); |
1466 | T = traceref(J, lnk); |
1467 | if (T->linktype == LJ_TRLINK_RETURN) { /* Trace returns to interpreter? */ |
1468 | check_call_unroll(J, lnk); |
1469 | /* Temporarily unpatch JFUNC* to continue recording across function. */ |
1470 | J->patchins = *J->pc; |
1471 | J->patchpc = (BCIns *)J->pc; |
1472 | *J->patchpc = T->startins; |
1473 | return; |
1474 | } |
1475 | J->instunroll = 0; /* Cannot continue across a compiled function. */ |
1476 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) |
1477 | rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ |
1478 | else |
1479 | rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ |
1480 | } |
1481 | |
1482 | /* -- Vararg handling ----------------------------------------------------- */ |
1483 | |
1484 | /* Detect y = select(x, ...) idiom. */ |
1485 | static int select_detect(jit_State *J) |
1486 | { |
1487 | BCIns ins = J->pc[1]; |
1488 | if (bc_op(ins) == BC_CALLM && bc_b(ins) == 2 && bc_c(ins) == 1) { |
1489 | cTValue *func = &J->L->base[bc_a(ins)]; |
1490 | if (tvisfunc(func) && funcV(func)->c.ffid == FF_select) |
1491 | return 1; |
1492 | } |
1493 | return 0; |
1494 | } |
1495 | |
1496 | /* Record vararg instruction. */ |
1497 | static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) |
1498 | { |
1499 | int32_t numparams = J->pt->numparams; |
1500 | ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; |
1501 | lua_assert(frame_isvarg(J->L->base-1)); |
1502 | if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ |
1503 | ptrdiff_t i; |
1504 | if (nvararg < 0) nvararg = 0; |
1505 | if (nresults == -1) { |
1506 | nresults = nvararg; |
1507 | J->maxslot = dst + (BCReg)nvararg; |
1508 | } else if (dst + nresults > J->maxslot) { |
1509 | J->maxslot = dst + (BCReg)nresults; |
1510 | } |
1511 | for (i = 0; i < nresults; i++) |
1512 | J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; |
1513 | } else { /* Unknown number of varargs passed to trace. */ |
1514 | TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); |
1515 | int32_t frofs = 8*(1+numparams)+FRAME_VARG; |
1516 | if (nresults >= 0) { /* Known fixed number of results. */ |
1517 | ptrdiff_t i; |
1518 | if (nvararg > 0) { |
1519 | ptrdiff_t nload = nvararg >= nresults ? nresults : nvararg; |
1520 | TRef vbase; |
1521 | if (nvararg >= nresults) |
1522 | emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); |
1523 | else |
1524 | emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1))); |
1525 | vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); |
1526 | vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); |
1527 | for (i = 0; i < nload; i++) { |
1528 | IRType t = itype2irt(&J->L->base[i-1-nvararg]); |
1529 | TRef aref = emitir(IRT(IR_AREF, IRT_P32), |
1530 | vbase, lj_ir_kint(J, (int32_t)i)); |
1531 | TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); |
1532 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ |
1533 | J->base[dst+i] = tr; |
1534 | } |
1535 | } else { |
1536 | emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); |
1537 | nvararg = 0; |
1538 | } |
1539 | for (i = nvararg; i < nresults; i++) |
1540 | J->base[dst+i] = TREF_NIL; |
1541 | if (dst + (BCReg)nresults > J->maxslot) |
1542 | J->maxslot = dst + (BCReg)nresults; |
1543 | } else if (select_detect(J)) { /* y = select(x, ...) */ |
1544 | TRef tridx = J->base[dst-1]; |
1545 | TRef tr = TREF_NIL; |
1546 | ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]); |
1547 | if (idx < 0) goto nyivarg; |
1548 | if (idx != 0 && !tref_isinteger(tridx)) |
1549 | tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX); |
1550 | if (idx != 0 && tref_isk(tridx)) { |
1551 | emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT), |
1552 | fr, lj_ir_kint(J, frofs+8*(int32_t)idx)); |
1553 | frofs -= 8; /* Bias for 1-based index. */ |
1554 | } else if (idx <= nvararg) { /* Compute size. */ |
1555 | TRef tmp = emitir(IRTI(IR_ADD), fr, lj_ir_kint(J, -frofs)); |
1556 | if (numparams) |
1557 | emitir(IRTGI(IR_GE), tmp, lj_ir_kint(J, 0)); |
1558 | tr = emitir(IRTI(IR_BSHR), tmp, lj_ir_kint(J, 3)); |
1559 | if (idx != 0) { |
1560 | tridx = emitir(IRTI(IR_ADD), tridx, lj_ir_kint(J, -1)); |
1561 | rec_idx_abc(J, tr, tridx, (uint32_t)nvararg); |
1562 | } |
1563 | } else { |
1564 | TRef tmp = lj_ir_kint(J, frofs); |
1565 | if (idx != 0) { |
1566 | TRef tmp2 = emitir(IRTI(IR_BSHL), tridx, lj_ir_kint(J, 3)); |
1567 | tmp = emitir(IRTI(IR_ADD), tmp2, tmp); |
1568 | } else { |
1569 | tr = lj_ir_kint(J, 0); |
1570 | } |
1571 | emitir(IRTGI(IR_LT), fr, tmp); |
1572 | } |
1573 | if (idx != 0 && idx <= nvararg) { |
1574 | IRType t; |
1575 | TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); |
1576 | vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); |
1577 | t = itype2irt(&J->L->base[idx-2-nvararg]); |
1578 | aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); |
1579 | tr = emitir(IRTG(IR_VLOAD, t), aref, 0); |
1580 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ |
1581 | } |
1582 | J->base[dst-2] = tr; |
1583 | J->maxslot = dst-1; |
1584 | J->bcskip = 2; /* Skip CALLM + select. */ |
1585 | } else { |
1586 | nyivarg: |
1587 | setintV(&J->errinfo, BC_VARG); |
1588 | lj_trace_err_info(J, LJ_TRERR_NYIBC); |
1589 | } |
1590 | } |
1591 | } |
1592 | |
1593 | /* -- Record allocations -------------------------------------------------- */ |
1594 | |
1595 | static TRef rec_tnew(jit_State *J, uint32_t ah) |
1596 | { |
1597 | uint32_t asize = ah & 0x7ff; |
1598 | uint32_t hbits = ah >> 11; |
1599 | if (asize == 0x7ff) asize = 0x801; |
1600 | return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); |
1601 | } |
1602 | |
1603 | /* -- Record bytecode ops ------------------------------------------------- */ |
1604 | |
1605 | /* Prepare for comparison. */ |
1606 | static void rec_comp_prep(jit_State *J) |
1607 | { |
1608 | /* Prevent merging with snapshot #0 (GC exit) since we fixup the PC. */ |
1609 | if (J->cur.nsnap == 1 && J->cur.snap[0].ref == J->cur.nins) |
1610 | emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); |
1611 | lj_snap_add(J); |
1612 | } |
1613 | |
1614 | /* Fixup comparison. */ |
1615 | static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) |
1616 | { |
1617 | BCIns jmpins = pc[1]; |
1618 | const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); |
1619 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
1620 | /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ |
1621 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); |
1622 | J->needsnap = 1; |
1623 | if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); |
1624 | lj_snap_shrink(J); /* Shrink last snapshot if possible. */ |
1625 | } |
1626 | |
1627 | /* Record the next bytecode instruction (_before_ it's executed). */ |
1628 | void lj_record_ins(jit_State *J) |
1629 | { |
1630 | cTValue *lbase; |
1631 | RecordIndex ix; |
1632 | const BCIns *pc; |
1633 | BCIns ins; |
1634 | BCOp op; |
1635 | TRef ra, rb, rc; |
1636 | |
1637 | /* Perform post-processing action before recording the next instruction. */ |
1638 | if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { |
1639 | switch (J->postproc) { |
1640 | case LJ_POST_FIXCOMP: /* Fixup comparison. */ |
1641 | pc = frame_pc(&J2G(J)->tmptv); |
1642 | rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); |
1643 | /* fallthrough */ |
1644 | case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ |
1645 | case LJ_POST_FIXGUARDSNAP: /* Fixup and emit pending guard and snapshot. */ |
1646 | if (!tvistruecond(&J2G(J)->tmptv2)) { |
1647 | J->fold.ins.o ^= 1; /* Flip guard to opposite. */ |
1648 | if (J->postproc == LJ_POST_FIXGUARDSNAP) { |
1649 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
1650 | J->cur.snapmap[snap->mapofs+snap->nent-1]--; /* False -> true. */ |
1651 | } |
1652 | } |
1653 | lj_opt_fold(J); /* Emit pending guard. */ |
1654 | /* fallthrough */ |
1655 | case LJ_POST_FIXBOOL: |
1656 | if (!tvistruecond(&J2G(J)->tmptv2)) { |
1657 | BCReg s; |
1658 | TValue *tv = J->L->base; |
1659 | for (s = 0; s < J->maxslot; s++) /* Fixup stack slot (if any). */ |
1660 | if (J->base[s] == TREF_TRUE && tvisfalse(&tv[s])) { |
1661 | J->base[s] = TREF_FALSE; |
1662 | break; |
1663 | } |
1664 | } |
1665 | break; |
1666 | case LJ_POST_FIXCONST: |
1667 | { |
1668 | BCReg s; |
1669 | TValue *tv = J->L->base; |
1670 | for (s = 0; s < J->maxslot; s++) /* Constify stack slots (if any). */ |
1671 | if (J->base[s] == TREF_NIL && !tvisnil(&tv[s])) |
1672 | J->base[s] = lj_record_constify(J, &tv[s]); |
1673 | } |
1674 | break; |
1675 | case LJ_POST_FFRETRY: /* Suppress recording of retried fast function. */ |
1676 | if (bc_op(*J->pc) >= BC__MAX) |
1677 | return; |
1678 | break; |
1679 | default: lua_assert(0); break; |
1680 | } |
1681 | J->postproc = LJ_POST_NONE; |
1682 | } |
1683 | |
1684 | /* Need snapshot before recording next bytecode (e.g. after a store). */ |
1685 | if (J->needsnap) { |
1686 | J->needsnap = 0; |
1687 | lj_snap_purge(J); |
1688 | lj_snap_add(J); |
1689 | J->mergesnap = 1; |
1690 | } |
1691 | |
1692 | /* Skip some bytecodes. */ |
1693 | if (LJ_UNLIKELY(J->bcskip > 0)) { |
1694 | J->bcskip--; |
1695 | return; |
1696 | } |
1697 | |
1698 | /* Record only closed loops for root traces. */ |
1699 | pc = J->pc; |
1700 | if (J->framedepth == 0 && |
1701 | (MSize)((char *)pc - (char *)J->bc_min) >= J->bc_extent) |
1702 | lj_trace_err(J, LJ_TRERR_LLEAVE); |
1703 | |
1704 | #ifdef LUA_USE_ASSERT |
1705 | rec_check_slots(J); |
1706 | rec_check_ir(J); |
1707 | #endif |
1708 | |
1709 | /* Keep a copy of the runtime values of var/num/str operands. */ |
1710 | #define rav (&ix.valv) |
1711 | #define rbv (&ix.tabv) |
1712 | #define rcv (&ix.keyv) |
1713 | |
1714 | lbase = J->L->base; |
1715 | ins = *pc; |
1716 | op = bc_op(ins); |
1717 | ra = bc_a(ins); |
1718 | ix.val = 0; |
1719 | switch (bcmode_a(op)) { |
1720 | case BCMvar: |
1721 | copyTV(J->L, rav, &lbase[ra]); ix.val = ra = getslot(J, ra); break; |
1722 | default: break; /* Handled later. */ |
1723 | } |
1724 | rb = bc_b(ins); |
1725 | rc = bc_c(ins); |
1726 | switch (bcmode_b(op)) { |
1727 | case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */ |
1728 | case BCMvar: |
1729 | copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break; |
1730 | default: break; /* Handled later. */ |
1731 | } |
1732 | switch (bcmode_c(op)) { |
1733 | case BCMvar: |
1734 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; |
1735 | case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; |
1736 | case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); |
1737 | copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : |
1738 | lj_ir_knumint(J, numV(tv)); } break; |
1739 | case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); |
1740 | setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; |
1741 | default: break; /* Handled later. */ |
1742 | } |
1743 | |
1744 | switch (op) { |
1745 | |
1746 | /* -- Comparison ops ---------------------------------------------------- */ |
1747 | |
1748 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: |
1749 | #if LJ_HASFFI |
1750 | if (tref_iscdata(ra) || tref_iscdata(rc)) { |
1751 | rec_mm_comp_cdata(J, &ix, op, ((int)op & 2) ? MM_le : MM_lt); |
1752 | break; |
1753 | } |
1754 | #endif |
1755 | /* Emit nothing for two numeric or string consts. */ |
1756 | if (!(tref_isk2(ra,rc) && tref_isnumber_str(ra) && tref_isnumber_str(rc))) { |
1757 | IRType ta = tref_isinteger(ra) ? IRT_INT : tref_type(ra); |
1758 | IRType tc = tref_isinteger(rc) ? IRT_INT : tref_type(rc); |
1759 | int irop; |
1760 | if (ta != tc) { |
1761 | /* Widen mixed number/int comparisons to number/number comparison. */ |
1762 | if (ta == IRT_INT && tc == IRT_NUM) { |
1763 | ra = emitir(IRTN(IR_CONV), ra, IRCONV_NUM_INT); |
1764 | ta = IRT_NUM; |
1765 | } else if (ta == IRT_NUM && tc == IRT_INT) { |
1766 | rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); |
1767 | } else if (LJ_52) { |
1768 | ta = IRT_NIL; /* Force metamethod for different types. */ |
1769 | } else if (!((ta == IRT_FALSE || ta == IRT_TRUE) && |
1770 | (tc == IRT_FALSE || tc == IRT_TRUE))) { |
1771 | break; /* Interpreter will throw for two different types. */ |
1772 | } |
1773 | } |
1774 | rec_comp_prep(J); |
1775 | irop = (int)op - (int)BC_ISLT + (int)IR_LT; |
1776 | if (ta == IRT_NUM) { |
1777 | if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */ |
1778 | if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop)) |
1779 | irop ^= 5; |
1780 | } else if (ta == IRT_INT) { |
1781 | if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop)) |
1782 | irop ^= 1; |
1783 | } else if (ta == IRT_STR) { |
1784 | if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; |
1785 | ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); |
1786 | rc = lj_ir_kint(J, 0); |
1787 | ta = IRT_INT; |
1788 | } else { |
1789 | rec_mm_comp(J, &ix, (int)op); |
1790 | break; |
1791 | } |
1792 | emitir(IRTG(irop, ta), ra, rc); |
1793 | rec_comp_fixup(J, J->pc, ((int)op ^ irop) & 1); |
1794 | } |
1795 | break; |
1796 | |
1797 | case BC_ISEQV: case BC_ISNEV: |
1798 | case BC_ISEQS: case BC_ISNES: |
1799 | case BC_ISEQN: case BC_ISNEN: |
1800 | case BC_ISEQP: case BC_ISNEP: |
1801 | #if LJ_HASFFI |
1802 | if (tref_iscdata(ra) || tref_iscdata(rc)) { |
1803 | rec_mm_comp_cdata(J, &ix, op, MM_eq); |
1804 | break; |
1805 | } |
1806 | #endif |
1807 | /* Emit nothing for two non-table, non-udata consts. */ |
1808 | if (!(tref_isk2(ra, rc) && !(tref_istab(ra) || tref_isudata(ra)))) { |
1809 | int diff; |
1810 | rec_comp_prep(J); |
1811 | diff = lj_record_objcmp(J, ra, rc, rav, rcv); |
1812 | if (diff == 2 || !(tref_istab(ra) || tref_isudata(ra))) |
1813 | rec_comp_fixup(J, J->pc, ((int)op & 1) == !diff); |
1814 | else if (diff == 1) /* Only check __eq if different, but same type. */ |
1815 | rec_mm_equal(J, &ix, (int)op); |
1816 | } |
1817 | break; |
1818 | |
1819 | /* -- Unary test and copy ops ------------------------------------------- */ |
1820 | |
1821 | case BC_ISTC: case BC_ISFC: |
1822 | if ((op & 1) == tref_istruecond(rc)) |
1823 | rc = 0; /* Don't store if condition is not true. */ |
1824 | /* fallthrough */ |
1825 | case BC_IST: case BC_ISF: /* Type specialization suffices. */ |
1826 | if (bc_a(pc[1]) < J->maxslot) |
1827 | J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ |
1828 | break; |
1829 | |
1830 | /* -- Unary ops --------------------------------------------------------- */ |
1831 | |
1832 | case BC_NOT: |
1833 | /* Type specialization already forces const result. */ |
1834 | rc = tref_istruecond(rc) ? TREF_FALSE : TREF_TRUE; |
1835 | break; |
1836 | |
1837 | case BC_LEN: |
1838 | if (tref_isstr(rc)) |
1839 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); |
1840 | else if (!LJ_52 && tref_istab(rc)) |
1841 | rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); |
1842 | else |
1843 | rc = rec_mm_len(J, rc, rcv); |
1844 | break; |
1845 | |
1846 | /* -- Arithmetic ops ---------------------------------------------------- */ |
1847 | |
1848 | case BC_UNM: |
1849 | if (tref_isnumber_str(rc)) { |
1850 | rc = lj_opt_narrow_unm(J, rc, rcv); |
1851 | } else { |
1852 | ix.tab = rc; |
1853 | copyTV(J->L, &ix.tabv, rcv); |
1854 | rc = rec_mm_arith(J, &ix, MM_unm); |
1855 | } |
1856 | break; |
1857 | |
1858 | case BC_ADDNV: case BC_SUBNV: case BC_MULNV: case BC_DIVNV: case BC_MODNV: |
1859 | /* Swap rb/rc and rbv/rcv. rav is temp. */ |
1860 | ix.tab = rc; ix.key = rc = rb; rb = ix.tab; |
1861 | copyTV(J->L, rav, rbv); |
1862 | copyTV(J->L, rbv, rcv); |
1863 | copyTV(J->L, rcv, rav); |
1864 | if (op == BC_MODNV) |
1865 | goto recmod; |
1866 | /* fallthrough */ |
1867 | case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN: |
1868 | case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: { |
1869 | MMS mm = bcmode_mm(op); |
1870 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) |
1871 | rc = lj_opt_narrow_arith(J, rb, rc, rbv, rcv, |
1872 | (int)mm - (int)MM_add + (int)IR_ADD); |
1873 | else |
1874 | rc = rec_mm_arith(J, &ix, mm); |
1875 | break; |
1876 | } |
1877 | |
1878 | case BC_MODVN: case BC_MODVV: |
1879 | recmod: |
1880 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) |
1881 | rc = lj_opt_narrow_mod(J, rb, rc, rcv); |
1882 | else |
1883 | rc = rec_mm_arith(J, &ix, MM_mod); |
1884 | break; |
1885 | |
1886 | case BC_POW: |
1887 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) |
1888 | rc = lj_opt_narrow_pow(J, lj_ir_tonum(J, rb), rc, rcv); |
1889 | else |
1890 | rc = rec_mm_arith(J, &ix, MM_pow); |
1891 | break; |
1892 | |
1893 | /* -- Constant and move ops --------------------------------------------- */ |
1894 | |
1895 | case BC_MOV: |
1896 | /* Clear gap of method call to avoid resurrecting previous refs. */ |
1897 | if (ra > J->maxslot) J->base[ra-1] = 0; |
1898 | break; |
1899 | case BC_KSTR: case BC_KNUM: case BC_KPRI: |
1900 | break; |
1901 | case BC_KSHORT: |
1902 | rc = lj_ir_kint(J, (int32_t)(int16_t)rc); |
1903 | break; |
1904 | case BC_KNIL: |
1905 | while (ra <= rc) |
1906 | J->base[ra++] = TREF_NIL; |
1907 | if (rc >= J->maxslot) J->maxslot = rc+1; |
1908 | break; |
1909 | #if LJ_HASFFI |
1910 | case BC_KCDATA: |
1911 | rc = lj_ir_kgc(J, proto_kgc(J->pt, ~(ptrdiff_t)rc), IRT_CDATA); |
1912 | break; |
1913 | #endif |
1914 | |
1915 | /* -- Upvalue and function ops ------------------------------------------ */ |
1916 | |
1917 | case BC_UGET: |
1918 | rc = rec_upvalue(J, rc, 0); |
1919 | break; |
1920 | case BC_USETV: case BC_USETS: case BC_USETN: case BC_USETP: |
1921 | rec_upvalue(J, ra, rc); |
1922 | break; |
1923 | |
1924 | /* -- Table ops --------------------------------------------------------- */ |
1925 | |
1926 | case BC_GGET: case BC_GSET: |
1927 | settabV(J->L, &ix.tabv, tabref(J->fn->l.env)); |
1928 | ix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), getcurrf(J), IRFL_FUNC_ENV); |
1929 | ix.idxchain = LJ_MAX_IDXCHAIN; |
1930 | rc = lj_record_idx(J, &ix); |
1931 | break; |
1932 | |
1933 | case BC_TGETB: case BC_TSETB: |
1934 | setintV(&ix.keyv, (int32_t)rc); |
1935 | ix.key = lj_ir_kint(J, (int32_t)rc); |
1936 | /* fallthrough */ |
1937 | case BC_TGETV: case BC_TGETS: case BC_TSETV: case BC_TSETS: |
1938 | ix.idxchain = LJ_MAX_IDXCHAIN; |
1939 | rc = lj_record_idx(J, &ix); |
1940 | break; |
1941 | |
1942 | case BC_TNEW: |
1943 | rc = rec_tnew(J, rc); |
1944 | break; |
1945 | case BC_TDUP: |
1946 | rc = emitir(IRTG(IR_TDUP, IRT_TAB), |
1947 | lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); |
1948 | break; |
1949 | |
1950 | /* -- Calls and vararg handling ----------------------------------------- */ |
1951 | |
1952 | case BC_ITERC: |
1953 | J->base[ra] = getslot(J, ra-3); |
1954 | J->base[ra+1] = getslot(J, ra-2); |
1955 | J->base[ra+2] = getslot(J, ra-1); |
1956 | { /* Do the actual copy now because lj_record_call needs the values. */ |
1957 | TValue *b = &J->L->base[ra]; |
1958 | copyTV(J->L, b, b-3); |
1959 | copyTV(J->L, b+1, b-2); |
1960 | copyTV(J->L, b+2, b-1); |
1961 | } |
1962 | lj_record_call(J, ra, (ptrdiff_t)rc-1); |
1963 | break; |
1964 | |
1965 | /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ |
1966 | case BC_CALLM: |
1967 | rc = (BCReg)(J->L->top - J->L->base) - ra; |
1968 | /* fallthrough */ |
1969 | case BC_CALL: |
1970 | lj_record_call(J, ra, (ptrdiff_t)rc-1); |
1971 | break; |
1972 | |
1973 | case BC_CALLMT: |
1974 | rc = (BCReg)(J->L->top - J->L->base) - ra; |
1975 | /* fallthrough */ |
1976 | case BC_CALLT: |
1977 | lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); |
1978 | break; |
1979 | |
1980 | case BC_VARG: |
1981 | rec_varg(J, ra, (ptrdiff_t)rb-1); |
1982 | break; |
1983 | |
1984 | /* -- Returns ----------------------------------------------------------- */ |
1985 | |
1986 | case BC_RETM: |
1987 | /* L->top is set to L->base+ra+rc+NRESULTS-1, see lj_dispatch_ins(). */ |
1988 | rc = (BCReg)(J->L->top - J->L->base) - ra + 1; |
1989 | /* fallthrough */ |
1990 | case BC_RET: case BC_RET0: case BC_RET1: |
1991 | lj_record_ret(J, ra, (ptrdiff_t)rc-1); |
1992 | break; |
1993 | |
1994 | /* -- Loops and branches ------------------------------------------------ */ |
1995 | |
1996 | case BC_FORI: |
1997 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) |
1998 | J->loopref = J->cur.nins; |
1999 | break; |
2000 | case BC_JFORI: |
2001 | lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); |
2002 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ |
2003 | rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); |
2004 | /* Continue tracing if the loop is not entered. */ |
2005 | break; |
2006 | |
2007 | case BC_FORL: |
2008 | rec_loop_interp(J, pc, rec_for(J, pc+((ptrdiff_t)rc-BCBIAS_J), 1)); |
2009 | break; |
2010 | case BC_ITERL: |
2011 | rec_loop_interp(J, pc, rec_iterl(J, *pc)); |
2012 | break; |
2013 | case BC_LOOP: |
2014 | rec_loop_interp(J, pc, rec_loop(J, ra)); |
2015 | break; |
2016 | |
2017 | case BC_JFORL: |
2018 | rec_loop_jit(J, rc, rec_for(J, pc+bc_j(traceref(J, rc)->startins), 1)); |
2019 | break; |
2020 | case BC_JITERL: |
2021 | rec_loop_jit(J, rc, rec_iterl(J, traceref(J, rc)->startins)); |
2022 | break; |
2023 | case BC_JLOOP: |
2024 | rec_loop_jit(J, rc, rec_loop(J, ra)); |
2025 | break; |
2026 | |
2027 | case BC_IFORL: |
2028 | case BC_IITERL: |
2029 | case BC_ILOOP: |
2030 | case BC_IFUNCF: |
2031 | case BC_IFUNCV: |
2032 | lj_trace_err(J, LJ_TRERR_BLACKL); |
2033 | break; |
2034 | |
2035 | case BC_JMP: |
2036 | if (ra < J->maxslot) |
2037 | J->maxslot = ra; /* Shrink used slots. */ |
2038 | break; |
2039 | |
2040 | /* -- Function headers -------------------------------------------------- */ |
2041 | |
2042 | case BC_FUNCF: |
2043 | rec_func_lua(J); |
2044 | break; |
2045 | case BC_JFUNCF: |
2046 | rec_func_jit(J, rc); |
2047 | break; |
2048 | |
2049 | case BC_FUNCV: |
2050 | rec_func_vararg(J); |
2051 | rec_func_lua(J); |
2052 | break; |
2053 | case BC_JFUNCV: |
2054 | lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */ |
2055 | break; |
2056 | |
2057 | case BC_FUNCC: |
2058 | case BC_FUNCCW: |
2059 | lj_ffrecord_func(J); |
2060 | break; |
2061 | |
2062 | default: |
2063 | if (op >= BC__MAX) { |
2064 | lj_ffrecord_func(J); |
2065 | break; |
2066 | } |
2067 | /* fallthrough */ |
2068 | case BC_ITERN: |
2069 | case BC_ISNEXT: |
2070 | case BC_CAT: |
2071 | case BC_UCLO: |
2072 | case BC_FNEW: |
2073 | case BC_TSETM: |
2074 | setintV(&J->errinfo, (int32_t)op); |
2075 | lj_trace_err_info(J, LJ_TRERR_NYIBC); |
2076 | break; |
2077 | } |
2078 | |
2079 | /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ |
2080 | if (bcmode_a(op) == BCMdst && rc) { |
2081 | J->base[ra] = rc; |
2082 | if (ra >= J->maxslot) J->maxslot = ra+1; |
2083 | } |
2084 | |
2085 | #undef rav |
2086 | #undef rbv |
2087 | #undef rcv |
2088 | |
2089 | /* Limit the number of recorded IR instructions. */ |
2090 | if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) |
2091 | lj_trace_err(J, LJ_TRERR_TRACEOV); |
2092 | } |
2093 | |
2094 | /* -- Recording setup ----------------------------------------------------- */ |
2095 | |
2096 | /* Setup recording for a root trace started by a hot loop. */ |
2097 | static const BCIns *rec_setup_root(jit_State *J) |
2098 | { |
2099 | /* Determine the next PC and the bytecode range for the loop. */ |
2100 | const BCIns *pcj, *pc = J->pc; |
2101 | BCIns ins = *pc; |
2102 | BCReg ra = bc_a(ins); |
2103 | switch (bc_op(ins)) { |
2104 | case BC_FORL: |
2105 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); |
2106 | pc += 1+bc_j(ins); |
2107 | J->bc_min = pc; |
2108 | break; |
2109 | case BC_ITERL: |
2110 | lua_assert(bc_op(pc[-1]) == BC_ITERC); |
2111 | J->maxslot = ra + bc_b(pc[-1]) - 1; |
2112 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); |
2113 | pc += 1+bc_j(ins); |
2114 | lua_assert(bc_op(pc[-1]) == BC_JMP); |
2115 | J->bc_min = pc; |
2116 | break; |
2117 | case BC_LOOP: |
2118 | /* Only check BC range for real loops, but not for "repeat until true". */ |
2119 | pcj = pc + bc_j(ins); |
2120 | ins = *pcj; |
2121 | if (bc_op(ins) == BC_JMP && bc_j(ins) < 0) { |
2122 | J->bc_min = pcj+1 + bc_j(ins); |
2123 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); |
2124 | } |
2125 | J->maxslot = ra; |
2126 | pc++; |
2127 | break; |
2128 | case BC_RET: |
2129 | case BC_RET0: |
2130 | case BC_RET1: |
2131 | /* No bytecode range check for down-recursive root traces. */ |
2132 | J->maxslot = ra + bc_d(ins) - 1; |
2133 | break; |
2134 | case BC_FUNCF: |
2135 | /* No bytecode range check for root traces started by a hot call. */ |
2136 | J->maxslot = J->pt->numparams; |
2137 | pc++; |
2138 | break; |
2139 | default: |
2140 | lua_assert(0); |
2141 | break; |
2142 | } |
2143 | return pc; |
2144 | } |
2145 | |
2146 | /* Setup for recording a new trace. */ |
2147 | void lj_record_setup(jit_State *J) |
2148 | { |
2149 | uint32_t i; |
2150 | |
2151 | /* Initialize state related to current trace. */ |
2152 | memset(J->slot, 0, sizeof(J->slot)); |
2153 | memset(J->chain, 0, sizeof(J->chain)); |
2154 | memset(J->bpropcache, 0, sizeof(J->bpropcache)); |
2155 | J->scev.idx = REF_NIL; |
2156 | |
2157 | J->baseslot = 1; /* Invoking function is at base[-1]. */ |
2158 | J->base = J->slot + J->baseslot; |
2159 | J->maxslot = 0; |
2160 | J->framedepth = 0; |
2161 | J->retdepth = 0; |
2162 | |
2163 | J->instunroll = J->param[JIT_P_instunroll]; |
2164 | J->loopunroll = J->param[JIT_P_loopunroll]; |
2165 | J->tailcalled = 0; |
2166 | J->loopref = 0; |
2167 | |
2168 | J->bc_min = NULL; /* Means no limit. */ |
2169 | J->bc_extent = ~(MSize)0; |
2170 | |
2171 | /* Emit instructions for fixed references. Also triggers initial IR alloc. */ |
2172 | emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); |
2173 | for (i = 0; i <= 2; i++) { |
2174 | IRIns *ir = IR(REF_NIL-i); |
2175 | ir->i = 0; |
2176 | ir->t.irt = (uint8_t)(IRT_NIL+i); |
2177 | ir->o = IR_KPRI; |
2178 | ir->prev = 0; |
2179 | } |
2180 | J->cur.nk = REF_TRUE; |
2181 | |
2182 | J->startpc = J->pc; |
2183 | setmref(J->cur.startpc, J->pc); |
2184 | if (J->parent) { /* Side trace. */ |
2185 | GCtrace *T = traceref(J, J->parent); |
2186 | TraceNo root = T->root ? T->root : J->parent; |
2187 | J->cur.root = (uint16_t)root; |
2188 | J->cur.startins = BCINS_AD(BC_JMP, 0, 0); |
2189 | /* Check whether we could at least potentially form an extra loop. */ |
2190 | if (J->exitno == 0 && T->snap[0].nent == 0) { |
2191 | /* We can narrow a FORL for some side traces, too. */ |
2192 | if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI && |
2193 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { |
2194 | lj_snap_add(J); |
2195 | rec_for_loop(J, J->pc-1, &J->scev, 1); |
2196 | goto sidecheck; |
2197 | } |
2198 | } else { |
2199 | J->startpc = NULL; /* Prevent forming an extra loop. */ |
2200 | } |
2201 | lj_snap_replay(J, T); |
2202 | sidecheck: |
2203 | if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || |
2204 | T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + |
2205 | J->param[JIT_P_tryside]) { |
2206 | rec_stop(J, LJ_TRLINK_INTERP, 0); |
2207 | } |
2208 | } else { /* Root trace. */ |
2209 | J->cur.root = 0; |
2210 | J->cur.startins = *J->pc; |
2211 | J->pc = rec_setup_root(J); |
2212 | /* Note: the loop instruction itself is recorded at the end and not |
2213 | ** at the start! So snapshot #0 needs to point to the *next* instruction. |
2214 | */ |
2215 | lj_snap_add(J); |
2216 | if (bc_op(J->cur.startins) == BC_FORL) |
2217 | rec_for_loop(J, J->pc-1, &J->scev, 1); |
2218 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) |
2219 | lj_trace_err(J, LJ_TRERR_STACKOV); |
2220 | } |
2221 | #ifdef LUAJIT_ENABLE_CHECKHOOK |
2222 | /* Regularly check for instruction/line hooks from compiled code and |
2223 | ** exit to the interpreter if the hooks are set. |
2224 | ** |
2225 | ** This is a compile-time option and disabled by default, since the |
2226 | ** hook checks may be quite expensive in tight loops. |
2227 | ** |
2228 | ** Note this is only useful if hooks are *not* set most of the time. |
2229 | ** Use this only if you want to *asynchronously* interrupt the execution. |
2230 | ** |
2231 | ** You can set the instruction hook via lua_sethook() with a count of 1 |
2232 | ** from a signal handler or another native thread. Please have a look |
2233 | ** at the first few functions in luajit.c for an example (Ctrl-C handler). |
2234 | */ |
2235 | { |
2236 | TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), |
2237 | lj_ir_kptr(J, &J2G(J)->hookmask), IRXLOAD_VOLATILE); |
2238 | tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (LUA_MASKLINE|LUA_MASKCOUNT))); |
2239 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, 0)); |
2240 | } |
2241 | #endif |
2242 | } |
2243 | |
2244 | #undef IR |
2245 | #undef emitir_raw |
2246 | #undef emitir |
2247 | |
2248 | #endif |
2249 | |