1 | /* |
2 | ** Definitions for x86 and x64 CPUs. |
3 | ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h |
4 | */ |
5 | |
6 | #ifndef _LJ_TARGET_X86_H |
7 | #define _LJ_TARGET_X86_H |
8 | |
9 | /* -- Registers IDs ------------------------------------------------------- */ |
10 | |
11 | #if LJ_64 |
12 | #define GPRDEF(_) \ |
13 | _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \ |
14 | _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D) |
15 | #define FPRDEF(_) \ |
16 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \ |
17 | _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15) |
18 | #else |
19 | #define GPRDEF(_) \ |
20 | _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) |
21 | #define FPRDEF(_) \ |
22 | _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) |
23 | #endif |
24 | #define VRIDDEF(_) \ |
25 | _(MRM) _(RIP) |
26 | |
27 | #define RIDENUM(name) RID_##name, |
28 | |
29 | enum { |
30 | GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ |
31 | FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ |
32 | RID_MAX, |
33 | RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ |
34 | RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */ |
35 | |
36 | /* Calling conventions. */ |
37 | RID_SP = RID_ESP, |
38 | RID_RET = RID_EAX, |
39 | #if LJ_64 |
40 | RID_FPRET = RID_XMM0, |
41 | #else |
42 | RID_RETLO = RID_EAX, |
43 | RID_RETHI = RID_EDX, |
44 | #endif |
45 | |
46 | /* These definitions must match with the *.dasc file(s): */ |
47 | RID_BASE = RID_EDX, /* Interpreter BASE. */ |
48 | #if LJ_64 && !LJ_ABI_WIN |
49 | RID_LPC = RID_EBX, /* Interpreter PC. */ |
50 | RID_DISPATCH = RID_R14D, /* Interpreter DISPATCH table. */ |
51 | #else |
52 | RID_LPC = RID_ESI, /* Interpreter PC. */ |
53 | RID_DISPATCH = RID_EBX, /* Interpreter DISPATCH table. */ |
54 | #endif |
55 | |
56 | /* Register ranges [min, max) and number of registers. */ |
57 | RID_MIN_GPR = RID_EAX, |
58 | RID_MIN_FPR = RID_XMM0, |
59 | RID_MAX_GPR = RID_MIN_FPR, |
60 | RID_MAX_FPR = RID_MAX, |
61 | RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, |
62 | RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR, |
63 | }; |
64 | |
65 | /* -- Register sets ------------------------------------------------------- */ |
66 | |
67 | /* Make use of all registers, except the stack pointer (and maybe DISPATCH). */ |
68 | #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \ |
69 | - RID2RSET(RID_ESP) \ |
70 | - LJ_GC64*RID2RSET(RID_DISPATCH)) |
71 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) |
72 | #define RSET_ALL (RSET_GPR|RSET_FPR) |
73 | #define RSET_INIT RSET_ALL |
74 | |
75 | #if LJ_64 |
76 | /* Note: this requires the use of FORCE_REX! */ |
77 | #define RSET_GPR8 RSET_GPR |
78 | #else |
79 | #define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1)) |
80 | #endif |
81 | |
82 | /* ABI-specific register sets. */ |
83 | #define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX)) |
84 | #if LJ_64 |
85 | #if LJ_ABI_WIN |
86 | /* Windows x64 ABI. */ |
87 | #define RSET_SCRATCH \ |
88 | (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) |
89 | #define REGARG_GPRS \ |
90 | (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) |
91 | #define REGARG_NUMGPR 4 |
92 | #define REGARG_NUMFPR 4 |
93 | #define REGARG_FIRSTFPR RID_XMM0 |
94 | #define REGARG_LASTFPR RID_XMM3 |
95 | #define STACKARG_OFS (4*8) |
96 | #else |
97 | /* The rest of the civilized x64 world has a common ABI. */ |
98 | #define RSET_SCRATCH \ |
99 | (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) |
100 | #define REGARG_GPRS \ |
101 | (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \ |
102 | <<5))<<5))<<5))<<5))<<5)) |
103 | #define REGARG_NUMGPR 6 |
104 | #define REGARG_NUMFPR 8 |
105 | #define REGARG_FIRSTFPR RID_XMM0 |
106 | #define REGARG_LASTFPR RID_XMM7 |
107 | #define STACKARG_OFS 0 |
108 | #endif |
109 | #else |
110 | /* Common x86 ABI. */ |
111 | #define RSET_SCRATCH (RSET_ACD|RSET_FPR) |
112 | #define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ |
113 | #define REGARG_NUMGPR 2 /* Fastcall only. */ |
114 | #define REGARG_NUMFPR 0 |
115 | #define STACKARG_OFS 0 |
116 | #endif |
117 | |
118 | #if LJ_64 |
119 | /* Prefer the low 8 regs of each type to reduce REX prefixes. */ |
120 | #undef rset_picktop |
121 | #define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) |
122 | #endif |
123 | |
124 | /* -- Spill slots --------------------------------------------------------- */ |
125 | |
126 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. |
127 | ** |
128 | ** SPS_FIXED: Available fixed spill slots in interpreter frame. |
129 | ** This definition must match with the *.dasc file(s). |
130 | ** |
131 | ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. |
132 | */ |
133 | #if LJ_64 |
134 | #if LJ_ABI_WIN |
135 | #define SPS_FIXED (4*2) |
136 | #define SPS_FIRST (4*2) /* Don't use callee register save area. */ |
137 | #else |
138 | #if LJ_GC64 |
139 | #define SPS_FIXED 2 |
140 | #else |
141 | #define SPS_FIXED 4 |
142 | #endif |
143 | #define SPS_FIRST 2 |
144 | #endif |
145 | #else |
146 | #define SPS_FIXED 6 |
147 | #define SPS_FIRST 2 |
148 | #endif |
149 | |
150 | #define SPOFS_TMP 0 |
151 | |
152 | #define sps_scale(slot) (4 * (int32_t)(slot)) |
153 | #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) |
154 | |
155 | /* -- Exit state ---------------------------------------------------------- */ |
156 | |
157 | /* This definition must match with the *.dasc file(s). */ |
158 | typedef struct { |
159 | lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ |
160 | intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ |
161 | int32_t spill[256]; /* Spill slots. */ |
162 | } ExitState; |
163 | |
164 | /* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */ |
165 | #define EXITSTUB_SPACING (2+2) |
166 | #define EXITSTUBS_PER_GROUP 32 |
167 | |
168 | #define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */ |
169 | |
170 | /* -- x86 ModRM operand encoding ------------------------------------------ */ |
171 | |
172 | typedef enum { |
173 | XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0, |
174 | XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0, |
175 | XM_MASK = 0xc0 |
176 | } x86Mode; |
177 | |
178 | /* Structure to hold variable ModRM operand. */ |
179 | typedef struct { |
180 | int32_t ofs; /* Offset. */ |
181 | uint8_t base; /* Base register or RID_NONE. */ |
182 | uint8_t idx; /* Index register or RID_NONE. */ |
183 | uint8_t scale; /* Index scale (XM_SCALE1 .. XM_SCALE8). */ |
184 | } x86ModRM; |
185 | |
186 | /* -- Opcodes ------------------------------------------------------------- */ |
187 | |
188 | /* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */ |
189 | #define XO_(o) ((uint32_t)(0x0000fe + (0x##o<<24))) |
190 | #define XO_FPU(a,b) ((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24))) |
191 | #define XO_0f(o) ((uint32_t)(0x0f00fd + (0x##o<<24))) |
192 | #define XO_66(o) ((uint32_t)(0x6600fd + (0x##o<<24))) |
193 | #define XO_660f(o) ((uint32_t)(0x0f66fc + (0x##o<<24))) |
194 | #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) |
195 | #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) |
196 | |
197 | #define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24))) |
198 | #define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24))) |
199 | #define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24))) |
200 | #define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24))) |
201 | |
202 | /* This list of x86 opcodes is not intended to be complete. Opcodes are only |
203 | ** included when needed. Take a look at DynASM or jit.dis_x86 to see the |
204 | ** whole mess. |
205 | */ |
206 | typedef enum { |
207 | /* Fixed length opcodes. XI_* prefix. */ |
208 | XI_O16 = 0x66, |
209 | XI_NOP = 0x90, |
210 | XI_XCHGa = 0x90, |
211 | XI_CALL = 0xe8, |
212 | XI_JMP = 0xe9, |
213 | XI_JMPs = 0xeb, |
214 | XI_PUSH = 0x50, /* Really 50+r. */ |
215 | XI_JCCs = 0x70, /* Really 7x. */ |
216 | XI_JCCn = 0x80, /* Really 0f8x. */ |
217 | XI_LEA = 0x8d, |
218 | XI_MOVrib = 0xb0, /* Really b0+r. */ |
219 | XI_MOVri = 0xb8, /* Really b8+r. */ |
220 | XI_ARITHib = 0x80, |
221 | XI_ARITHi = 0x81, |
222 | XI_ARITHi8 = 0x83, |
223 | XI_PUSHi8 = 0x6a, |
224 | XI_TESTb = 0x84, |
225 | XI_TEST = 0x85, |
226 | XI_INT3 = 0xcc, |
227 | XI_MOVmi = 0xc7, |
228 | XI_GROUP5 = 0xff, |
229 | |
230 | /* Note: little-endian byte-order! */ |
231 | XI_FLDZ = 0xeed9, |
232 | XI_FLD1 = 0xe8d9, |
233 | XI_FDUP = 0xc0d9, /* Really fld st0. */ |
234 | XI_FPOP = 0xd8dd, /* Really fstp st0. */ |
235 | XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ |
236 | XI_FRNDINT = 0xfcd9, |
237 | XI_FSCALE = 0xfdd9, |
238 | XI_FYL2X = 0xf1d9, |
239 | |
240 | /* VEX-encoded instructions. XV_* prefix. */ |
241 | XV_RORX = XV_f20f3a(f0), |
242 | XV_SARX = XV_f30f38(f7), |
243 | XV_SHLX = XV_660f38(f7), |
244 | XV_SHRX = XV_f20f38(f7), |
245 | |
246 | /* Variable-length opcodes. XO_* prefix. */ |
247 | XO_OR = XO_(0b), |
248 | XO_MOV = XO_(8b), |
249 | XO_MOVto = XO_(89), |
250 | XO_MOVtow = XO_66(89), |
251 | XO_MOVtob = XO_(88), |
252 | XO_MOVmi = XO_(c7), |
253 | XO_MOVmib = XO_(c6), |
254 | XO_LEA = XO_(8d), |
255 | XO_ARITHib = XO_(80), |
256 | XO_ARITHi = XO_(81), |
257 | XO_ARITHi8 = XO_(83), |
258 | XO_ARITHiw8 = XO_66(83), |
259 | XO_SHIFTi = XO_(c1), |
260 | XO_SHIFT1 = XO_(d1), |
261 | XO_SHIFTcl = XO_(d3), |
262 | XO_IMUL = XO_0f(af), |
263 | XO_IMULi = XO_(69), |
264 | XO_IMULi8 = XO_(6b), |
265 | XO_CMP = XO_(3b), |
266 | XO_TESTb = XO_(84), |
267 | XO_TEST = XO_(85), |
268 | XO_GROUP3b = XO_(f6), |
269 | XO_GROUP3 = XO_(f7), |
270 | XO_GROUP5b = XO_(fe), |
271 | XO_GROUP5 = XO_(ff), |
272 | XO_MOVZXb = XO_0f(b6), |
273 | XO_MOVZXw = XO_0f(b7), |
274 | XO_MOVSXb = XO_0f(be), |
275 | XO_MOVSXw = XO_0f(bf), |
276 | XO_MOVSXd = XO_(63), |
277 | XO_BSWAP = XO_0f(c8), |
278 | XO_CMOV = XO_0f(40), |
279 | |
280 | XO_MOVSD = XO_f20f(10), |
281 | XO_MOVSDto = XO_f20f(11), |
282 | XO_MOVSS = XO_f30f(10), |
283 | XO_MOVSSto = XO_f30f(11), |
284 | XO_MOVLPD = XO_660f(12), |
285 | XO_MOVAPS = XO_0f(28), |
286 | XO_XORPS = XO_0f(57), |
287 | XO_ANDPS = XO_0f(54), |
288 | XO_ADDSD = XO_f20f(58), |
289 | XO_SUBSD = XO_f20f(5c), |
290 | XO_MULSD = XO_f20f(59), |
291 | XO_DIVSD = XO_f20f(5e), |
292 | XO_SQRTSD = XO_f20f(51), |
293 | XO_MINSD = XO_f20f(5d), |
294 | XO_MAXSD = XO_f20f(5f), |
295 | XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ |
296 | XO_UCOMISD = XO_660f(2e), |
297 | XO_CVTSI2SD = XO_f20f(2a), |
298 | XO_CVTTSD2SI= XO_f20f(2c), |
299 | XO_CVTSI2SS = XO_f30f(2a), |
300 | XO_CVTTSS2SI= XO_f30f(2c), |
301 | XO_CVTSS2SD = XO_f30f(5a), |
302 | XO_CVTSD2SS = XO_f20f(5a), |
303 | XO_ADDSS = XO_f30f(58), |
304 | XO_MOVD = XO_660f(6e), |
305 | XO_MOVDto = XO_660f(7e), |
306 | |
307 | XO_FLDd = XO_(d9), XOg_FLDd = 0, |
308 | XO_FLDq = XO_(dd), XOg_FLDq = 0, |
309 | XO_FILDd = XO_(db), XOg_FILDd = 0, |
310 | XO_FILDq = XO_(df), XOg_FILDq = 5, |
311 | XO_FSTPd = XO_(d9), XOg_FSTPd = 3, |
312 | XO_FSTPq = XO_(dd), XOg_FSTPq = 3, |
313 | XO_FISTPq = XO_(df), XOg_FISTPq = 7, |
314 | XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1, |
315 | XO_FADDq = XO_(dc), XOg_FADDq = 0, |
316 | XO_FLDCW = XO_(d9), XOg_FLDCW = 5, |
317 | XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7 |
318 | } x86Op; |
319 | |
320 | /* x86 opcode groups. */ |
321 | typedef uint32_t x86Group; |
322 | |
323 | #define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g))) |
324 | #define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g) |
325 | #define XG_TOXOi(xg) ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000))) |
326 | #define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000))) |
327 | |
328 | #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) |
329 | #define XO_ARITHw(a) ((x86Op)(0x036600fd + ((a)<<27))) |
330 | |
331 | typedef enum { |
332 | XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP, |
333 | XOg_X_IMUL |
334 | } x86Arith; |
335 | |
336 | typedef enum { |
337 | XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR |
338 | } x86Shift; |
339 | |
340 | typedef enum { |
341 | XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV |
342 | } x86Group3; |
343 | |
344 | typedef enum { |
345 | XOg_INC, XOg_DEC, XOg_CALL, XOg_CALLfar, XOg_JMP, XOg_JMPfar, XOg_PUSH |
346 | } x86Group5; |
347 | |
348 | /* x86 condition codes. */ |
349 | typedef enum { |
350 | CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE, |
351 | CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE, |
352 | CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB, |
353 | CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE, |
354 | CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL, |
355 | CC_NG = CC_LE, CC_G = CC_NLE |
356 | } x86CC; |
357 | |
358 | #endif |
359 | |