1/*
2** Common definitions for the JIT compiler.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_JIT_H
7#define _LJ_JIT_H
8
9#include "lj_obj.h"
10#include "lj_ir.h"
11
12/* -- JIT engine flags ---------------------------------------------------- */
13
14/* General JIT engine flags. 4 bits. */
15#define JIT_F_ON 0x00000001
16
17/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */
18#define JIT_F_CPU 0x00000010
19
20#if LJ_TARGET_X86ORX64
21
22#define JIT_F_SSE3 (JIT_F_CPU << 0)
23#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
24#define JIT_F_BMI2 (JIT_F_CPU << 2)
25
26
27#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
28
29#elif LJ_TARGET_ARM
30
31#define JIT_F_ARMV6_ (JIT_F_CPU << 0)
32#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1)
33#define JIT_F_ARMV7 (JIT_F_CPU << 2)
34#define JIT_F_ARMV8 (JIT_F_CPU << 3)
35#define JIT_F_VFPV2 (JIT_F_CPU << 4)
36#define JIT_F_VFPV3 (JIT_F_CPU << 5)
37
38#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
39#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
40#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3)
41
42#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3"
43
44#elif LJ_TARGET_PPC
45
46#define JIT_F_SQRT (JIT_F_CPU << 0)
47#define JIT_F_ROUND (JIT_F_CPU << 1)
48
49#define JIT_F_CPUSTRING "\4SQRT\5ROUND"
50
51#elif LJ_TARGET_MIPS
52
53#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0)
54
55#if LJ_TARGET_MIPS32
56#if LJ_TARGET_MIPSR6
57#define JIT_F_CPUSTRING "\010MIPS32R6"
58#else
59#define JIT_F_CPUSTRING "\010MIPS32R2"
60#endif
61#else
62#if LJ_TARGET_MIPSR6
63#define JIT_F_CPUSTRING "\010MIPS64R6"
64#else
65#define JIT_F_CPUSTRING "\010MIPS64R2"
66#endif
67#endif
68
69#else
70
71#define JIT_F_CPUSTRING ""
72
73#endif
74
75/* Optimization flags. 12 bits. */
76#define JIT_F_OPT 0x00010000
77#define JIT_F_OPT_MASK 0x0fff0000
78
79#define JIT_F_OPT_FOLD (JIT_F_OPT << 0)
80#define JIT_F_OPT_CSE (JIT_F_OPT << 1)
81#define JIT_F_OPT_DCE (JIT_F_OPT << 2)
82#define JIT_F_OPT_FWD (JIT_F_OPT << 3)
83#define JIT_F_OPT_DSE (JIT_F_OPT << 4)
84#define JIT_F_OPT_NARROW (JIT_F_OPT << 5)
85#define JIT_F_OPT_LOOP (JIT_F_OPT << 6)
86#define JIT_F_OPT_ABC (JIT_F_OPT << 7)
87#define JIT_F_OPT_SINK (JIT_F_OPT << 8)
88#define JIT_F_OPT_FUSE (JIT_F_OPT << 9)
89
90/* Optimizations names for -O. Must match the order above. */
91#define JIT_F_OPTSTRING \
92 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
93
94/* Optimization levels set a fixed combination of flags. */
95#define JIT_F_OPT_0 0
96#define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE)
97#define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP)
98#define JIT_F_OPT_3 (JIT_F_OPT_2|\
99 JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
100#define JIT_F_OPT_DEFAULT JIT_F_OPT_3
101
102/* -- JIT engine parameters ----------------------------------------------- */
103
104#if LJ_TARGET_WINDOWS || LJ_64
105/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
106#define JIT_P_sizemcode_DEFAULT 64
107#else
108/* Could go as low as 4K, but the mmap() overhead would be rather high. */
109#define JIT_P_sizemcode_DEFAULT 32
110#endif
111
112/* Optimization parameters and their defaults. Length is a char in octal! */
113#define JIT_PARAMDEF(_) \
114 _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \
115 _(\011, maxrecord, 4000) /* Max. # of recorded IR instructions. */ \
116 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
117 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
118 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
119 _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
120 \
121 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
122 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
123 _(\007, tryside, 4) /* # of attempts to compile a side trace. */ \
124 \
125 _(\012, instunroll, 4) /* Max. unroll for instable loops. */ \
126 _(\012, loopunroll, 15) /* Max. unroll for loop ops in side traces. */ \
127 _(\012, callunroll, 3) /* Max. unroll for recursive calls. */ \
128 _(\011, recunroll, 2) /* Min. unroll for true recursion. */ \
129 \
130 /* Size of each machine code area (in KBytes). */ \
131 _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \
132 /* Max. total size of all machine code areas (in KBytes). */ \
133 _(\010, maxmcode, 512) \
134 /* End of list. */
135
136enum {
137#define JIT_PARAMENUM(len, name, value) JIT_P_##name,
138JIT_PARAMDEF(JIT_PARAMENUM)
139#undef JIT_PARAMENUM
140 JIT_P__MAX
141};
142
143#define JIT_PARAMSTR(len, name, value) #len #name
144#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR)
145
146/* -- JIT engine data structures ------------------------------------------ */
147
148/* Trace compiler state. */
149typedef enum {
150 LJ_TRACE_IDLE, /* Trace compiler idle. */
151 LJ_TRACE_ACTIVE = 0x10,
152 LJ_TRACE_RECORD, /* Bytecode recording active. */
153 LJ_TRACE_START, /* New trace started. */
154 LJ_TRACE_END, /* End of trace. */
155 LJ_TRACE_ASM, /* Assemble trace. */
156 LJ_TRACE_ERR /* Trace aborted with error. */
157} TraceState;
158
159/* Post-processing action. */
160typedef enum {
161 LJ_POST_NONE, /* No action. */
162 LJ_POST_FIXCOMP, /* Fixup comparison and emit pending guard. */
163 LJ_POST_FIXGUARD, /* Fixup and emit pending guard. */
164 LJ_POST_FIXGUARDSNAP, /* Fixup and emit pending guard and snapshot. */
165 LJ_POST_FIXBOOL, /* Fixup boolean result. */
166 LJ_POST_FIXCONST, /* Fixup constant results. */
167 LJ_POST_FFRETRY /* Suppress recording of retried fast functions. */
168} PostProc;
169
170/* Machine code type. */
171#if LJ_TARGET_X86ORX64
172typedef uint8_t MCode;
173#else
174typedef uint32_t MCode;
175#endif
176
177/* Linked list of MCode areas. */
178typedef struct MCLink {
179 MCode *next; /* Next area. */
180 size_t size; /* Size of current area. */
181} MCLink;
182
183/* Stack snapshot header. */
184typedef struct SnapShot {
185 uint32_t mapofs; /* Offset into snapshot map. */
186 IRRef1 ref; /* First IR ref for this snapshot. */
187 uint16_t mcofs; /* Offset into machine code in MCode units. */
188 uint8_t nslots; /* Number of valid slots. */
189 uint8_t topslot; /* Maximum frame extent. */
190 uint8_t nent; /* Number of compressed entries. */
191 uint8_t count; /* Count of taken exits for this snapshot. */
192} SnapShot;
193
194#define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */
195
196/* Compressed snapshot entry. */
197typedef uint32_t SnapEntry;
198
199#define SNAP_FRAME 0x010000 /* Frame slot. */
200#define SNAP_CONT 0x020000 /* Continuation slot. */
201#define SNAP_NORESTORE 0x040000 /* No need to restore slot. */
202#define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */
203LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME);
204LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
205
206#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
207#define SNAP_TR(slot, tr) \
208 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
209#if !LJ_FR2
210#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
211#endif
212#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
213#define snap_ref(sn) ((sn) & 0xffff)
214#define snap_slot(sn) ((BCReg)((sn) >> 24))
215#define snap_isframe(sn) ((sn) & SNAP_FRAME)
216#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
217
218static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn)
219{
220#if LJ_FR2
221 uint64_t pcbase;
222 memcpy(&pcbase, sn, sizeof(uint64_t));
223 return (const BCIns *)(pcbase >> 8);
224#else
225 return (const BCIns *)(uintptr_t)*sn;
226#endif
227}
228
229/* Snapshot and exit numbers. */
230typedef uint32_t SnapNo;
231typedef uint32_t ExitNo;
232
233/* Trace number. */
234typedef uint32_t TraceNo; /* Used to pass around trace numbers. */
235typedef uint16_t TraceNo1; /* Stored trace number. */
236
237/* Type of link. ORDER LJ_TRLINK */
238typedef enum {
239 LJ_TRLINK_NONE, /* Incomplete trace. No link, yet. */
240 LJ_TRLINK_ROOT, /* Link to other root trace. */
241 LJ_TRLINK_LOOP, /* Loop to same trace. */
242 LJ_TRLINK_TAILREC, /* Tail-recursion. */
243 LJ_TRLINK_UPREC, /* Up-recursion. */
244 LJ_TRLINK_DOWNREC, /* Down-recursion. */
245 LJ_TRLINK_INTERP, /* Fallback to interpreter. */
246 LJ_TRLINK_RETURN, /* Return to interpreter. */
247 LJ_TRLINK_STITCH /* Trace stitching. */
248} TraceLink;
249
250/* Trace object. */
251typedef struct GCtrace {
252 GCHeader;
253 uint16_t nsnap; /* Number of snapshots. */
254 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
255#if LJ_GC64
256 uint32_t unused_gc64;
257#endif
258 GCRef gclist;
259 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
260 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
261 uint32_t nsnapmap; /* Number of snapshot map elements. */
262 SnapShot *snap; /* Snapshot array. */
263 SnapEntry *snapmap; /* Snapshot map. */
264 GCRef startpt; /* Starting prototype. */
265 MRef startpc; /* Bytecode PC of starting instruction. */
266 BCIns startins; /* Original bytecode of starting instruction. */
267 MSize szmcode; /* Size of machine code. */
268 MCode *mcode; /* Start of machine code. */
269 MSize mcloop; /* Offset of loop start in machine code. */
270 uint16_t nchild; /* Number of child traces (root trace only). */
271 uint16_t spadjust; /* Stack pointer adjustment (offset in bytes). */
272 TraceNo1 traceno; /* Trace number. */
273 TraceNo1 link; /* Linked trace (or self for loops). */
274 TraceNo1 root; /* Root trace of side trace (or 0 for root traces). */
275 TraceNo1 nextroot; /* Next root trace for same prototype. */
276 TraceNo1 nextside; /* Next side trace of same root trace. */
277 uint8_t sinktags; /* Trace has SINK tags. */
278 uint8_t topslot; /* Top stack slot already checked to be allocated. */
279 uint8_t linktype; /* Type of link. */
280 uint8_t unused1;
281#ifdef LUAJIT_USE_GDBJIT
282 void *gdbjit_entry; /* GDB JIT entry. */
283#endif
284} GCtrace;
285
286#define gco2trace(o) check_exp((o)->gch.gct == ~LJ_TTRACE, (GCtrace *)(o))
287#define traceref(J, n) \
288 check_exp((n)>0 && (MSize)(n)<J->sizetrace, (GCtrace *)gcref(J->trace[(n)]))
289
290LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCtrace, gclist));
291
292static LJ_AINLINE MSize snap_nextofs(GCtrace *T, SnapShot *snap)
293{
294 if (snap+1 == &T->snap[T->nsnap])
295 return T->nsnapmap;
296 else
297 return (snap+1)->mapofs;
298}
299
300/* Round-robin penalty cache for bytecodes leading to aborted traces. */
301typedef struct HotPenalty {
302 MRef pc; /* Starting bytecode PC. */
303 uint16_t val; /* Penalty value, i.e. hotcount start. */
304 uint16_t reason; /* Abort reason (really TraceErr). */
305} HotPenalty;
306
307#define PENALTY_SLOTS 64 /* Penalty cache slot. Must be a power of 2. */
308#define PENALTY_MIN (36*2) /* Minimum penalty value. */
309#define PENALTY_MAX 60000 /* Maximum penalty value. */
310#define PENALTY_RNDBITS 4 /* # of random bits to add to penalty value. */
311
312/* Round-robin backpropagation cache for narrowing conversions. */
313typedef struct BPropEntry {
314 IRRef1 key; /* Key: original reference. */
315 IRRef1 val; /* Value: reference after conversion. */
316 IRRef mode; /* Mode for this entry (currently IRCONV_*). */
317} BPropEntry;
318
319/* Number of slots for the backpropagation cache. Must be a power of 2. */
320#define BPROP_SLOTS 16
321
322/* Scalar evolution analysis cache. */
323typedef struct ScEvEntry {
324 MRef pc; /* Bytecode PC of FORI. */
325 IRRef1 idx; /* Index reference. */
326 IRRef1 start; /* Constant start reference. */
327 IRRef1 stop; /* Constant stop reference. */
328 IRRef1 step; /* Constant step reference. */
329 IRType1 t; /* Scalar type. */
330 uint8_t dir; /* Direction. 1: +, 0: -. */
331} ScEvEntry;
332
333/* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */
334typedef struct RBCHashEntry {
335 MRef pc; /* Bytecode PC. */
336 GCRef pt; /* Prototype. */
337 IRRef ref; /* IR reference. */
338} RBCHashEntry;
339
340/* Number of slots in the reverse bytecode hash table. Must be a power of 2. */
341#define RBCHASH_SLOTS 8
342
343/* 128 bit SIMD constants. */
344enum {
345 LJ_KSIMD_ABS,
346 LJ_KSIMD_NEG,
347 LJ_KSIMD__MAX
348};
349
350enum {
351#if LJ_TARGET_X86ORX64
352 LJ_K64_TOBIT, /* 2^52 + 2^51 */
353 LJ_K64_2P64, /* 2^64 */
354 LJ_K64_M2P64, /* -2^64 */
355#if LJ_32
356 LJ_K64_M2P64_31, /* -2^64 or -2^31 */
357#else
358 LJ_K64_M2P64_31 = LJ_K64_M2P64,
359#endif
360#endif
361#if LJ_TARGET_MIPS
362 LJ_K64_2P31, /* 2^31 */
363#if LJ_64
364 LJ_K64_2P63, /* 2^63 */
365 LJ_K64_M2P64, /* -2^64 */
366#endif
367#endif
368 LJ_K64__MAX,
369};
370
371enum {
372#if LJ_TARGET_X86ORX64
373 LJ_K32_M2P64_31, /* -2^64 or -2^31 */
374#endif
375#if LJ_TARGET_PPC
376 LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
377 LJ_K32_2P52, /* 2^52 */
378#endif
379#if LJ_TARGET_PPC || LJ_TARGET_MIPS
380 LJ_K32_2P31, /* 2^31 */
381#endif
382#if LJ_TARGET_MIPS64
383 LJ_K32_2P63, /* 2^63 */
384 LJ_K32_M2P64, /* -2^64 */
385#endif
386 LJ_K32__MAX
387};
388
389/* Get 16 byte aligned pointer to SIMD constant. */
390#define LJ_KSIMD(J, n) \
391 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
392
393/* Set/reset flag to activate the SPLIT pass for the current trace. */
394#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
395#define lj_needsplit(J) (J->needsplit = 1)
396#define lj_resetsplit(J) (J->needsplit = 0)
397#else
398#define lj_needsplit(J) UNUSED(J)
399#define lj_resetsplit(J) UNUSED(J)
400#endif
401
402/* Fold state is used to fold instructions on-the-fly. */
403typedef struct FoldState {
404 IRIns ins; /* Currently emitted instruction. */
405 IRIns left[2]; /* Instruction referenced by left operand. */
406 IRIns right[2]; /* Instruction referenced by right operand. */
407} FoldState;
408
409/* JIT compiler state. */
410typedef struct jit_State {
411 GCtrace cur; /* Current trace. */
412 GCtrace *curfinal; /* Final address of current trace (set during asm). */
413
414 lua_State *L; /* Current Lua state. */
415 const BCIns *pc; /* Current PC. */
416 GCfunc *fn; /* Current function. */
417 GCproto *pt; /* Current prototype. */
418 TRef *base; /* Current frame base, points into J->slots. */
419
420 uint32_t flags; /* JIT engine flags. */
421 BCReg maxslot; /* Relative to baseslot. */
422 BCReg baseslot; /* Current frame base, offset into J->slots. */
423
424 uint8_t mergesnap; /* Allowed to merge with next snapshot. */
425 uint8_t needsnap; /* Need snapshot before recording next bytecode. */
426 IRType1 guardemit; /* Accumulated IRT_GUARD for emitted instructions. */
427 uint8_t bcskip; /* Number of bytecode instructions to skip. */
428
429 FoldState fold; /* Fold state. */
430
431 const BCIns *bc_min; /* Start of allowed bytecode range for root trace. */
432 MSize bc_extent; /* Extent of the range. */
433
434 TraceState state; /* Trace compiler state. */
435
436 int32_t instunroll; /* Unroll counter for instable loops. */
437 int32_t loopunroll; /* Unroll counter for loop ops in side traces. */
438 int32_t tailcalled; /* Number of successive tailcalls. */
439 int32_t framedepth; /* Current frame depth. */
440 int32_t retdepth; /* Return frame depth (count of RETF). */
441
442 uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */
443 TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
444 TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */
445
446 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
447 IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
448 IRRef irbotlim; /* Lower limit of instuction buffer (biased). */
449 IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */
450
451 MSize sizesnap; /* Size of temp. snapshot buffer. */
452 SnapShot *snapbuf; /* Temp. snapshot buffer. */
453 SnapEntry *snapmapbuf; /* Temp. snapshot map buffer. */
454 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
455
456 PostProc postproc; /* Required post-processing after execution. */
457#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
458 uint8_t needsplit; /* Need SPLIT pass. */
459#endif
460 uint8_t retryrec; /* Retry recording. */
461
462 GCRef *trace; /* Array of traces. */
463 TraceNo freetrace; /* Start of scan for next free trace. */
464 MSize sizetrace; /* Size of trace array. */
465 IRRef1 ktrace; /* Reference to KGC with GCtrace. */
466
467 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
468 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
469
470 int32_t param[JIT_P__MAX]; /* JIT engine parameters. */
471
472 MCode *exitstubgroup[LJ_MAX_EXITSTUBGR]; /* Exit stub group addresses. */
473
474 HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */
475 uint32_t penaltyslot; /* Round-robin index into penalty slots. */
476
477#ifdef LUAJIT_ENABLE_TABLE_BUMP
478 RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */
479#endif
480
481 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */
482 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */
483
484 ScEvEntry scev; /* Scalar evolution analysis cache slots. */
485
486 const BCIns *startpc; /* Bytecode PC of starting instruction. */
487 TraceNo parent; /* Parent of current side trace (0 for root traces). */
488 ExitNo exitno; /* Exit number in parent of current side trace. */
489 int exitcode; /* Exit code from unwound trace. */
490
491 BCIns *patchpc; /* PC for pending re-patch. */
492 BCIns patchins; /* Instruction for pending re-patch. */
493
494 int mcprot; /* Protection of current mcode area. */
495 MCode *mcarea; /* Base of current mcode area. */
496 MCode *mctop; /* Top of current mcode area. */
497 MCode *mcbot; /* Bottom of current mcode area. */
498 size_t szmcarea; /* Size of current mcode area. */
499 size_t szallmcarea; /* Total size of all allocated mcode areas. */
500
501 TValue errinfo; /* Additional info element for trace errors. */
502
503#if LJ_HASPROFILE
504 GCproto *prev_pt; /* Previous prototype. */
505 BCLine prev_line; /* Previous line. */
506 int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
507#endif
508} jit_State;
509
510#ifdef LUA_USE_ASSERT
511#define lj_assertJ(c, ...) lj_assertG_(J2G(J), (c), __VA_ARGS__)
512#else
513#define lj_assertJ(c, ...) ((void)J)
514#endif
515
516#endif
517