| 1 | /* | 
| 2 | ** Definitions for x86 and x64 CPUs. | 
| 3 | ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h | 
| 4 | */ | 
| 5 |  | 
| 6 | #ifndef _LJ_TARGET_X86_H | 
| 7 | #define _LJ_TARGET_X86_H | 
| 8 |  | 
| 9 | /* -- Registers IDs ------------------------------------------------------- */ | 
| 10 |  | 
| 11 | #if LJ_64 | 
| 12 | #define GPRDEF(_) \ | 
| 13 |   _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \ | 
| 14 |   _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D) | 
| 15 | #define FPRDEF(_) \ | 
| 16 |   _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \ | 
| 17 |   _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15) | 
| 18 | #else | 
| 19 | #define GPRDEF(_) \ | 
| 20 |   _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) | 
| 21 | #define FPRDEF(_) \ | 
| 22 |   _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) | 
| 23 | #endif | 
| 24 | #define VRIDDEF(_) \ | 
| 25 |   _(MRM) _(RIP) | 
| 26 |  | 
| 27 | #define RIDENUM(name)	RID_##name, | 
| 28 |  | 
| 29 | enum { | 
| 30 |   GPRDEF(RIDENUM)		/* General-purpose registers (GPRs). */ | 
| 31 |   FPRDEF(RIDENUM)		/* Floating-point registers (FPRs). */ | 
| 32 |   RID_MAX, | 
| 33 |   RID_MRM = RID_MAX,		/* Pseudo-id for ModRM operand. */ | 
| 34 |   RID_RIP = RID_MAX+5,		/* Pseudo-id for RIP (x64 only), rm bits = 5. */ | 
| 35 |  | 
| 36 |   /* Calling conventions. */ | 
| 37 |   RID_SP = RID_ESP, | 
| 38 |   RID_RET = RID_EAX, | 
| 39 | #if LJ_64 | 
| 40 |   RID_FPRET = RID_XMM0, | 
| 41 | #else | 
| 42 |   RID_RETLO = RID_EAX, | 
| 43 |   RID_RETHI = RID_EDX, | 
| 44 | #endif | 
| 45 |  | 
| 46 |   /* These definitions must match with the *.dasc file(s): */ | 
| 47 |   RID_BASE = RID_EDX,		/* Interpreter BASE. */ | 
| 48 | #if LJ_64 && !LJ_ABI_WIN | 
| 49 |   RID_LPC = RID_EBX,		/* Interpreter PC. */ | 
| 50 |   RID_DISPATCH = RID_R14D,	/* Interpreter DISPATCH table. */ | 
| 51 | #else | 
| 52 |   RID_LPC = RID_ESI,		/* Interpreter PC. */ | 
| 53 |   RID_DISPATCH = RID_EBX,	/* Interpreter DISPATCH table. */ | 
| 54 | #endif | 
| 55 |  | 
| 56 |   /* Register ranges [min, max) and number of registers. */ | 
| 57 |   RID_MIN_GPR = RID_EAX, | 
| 58 |   RID_MIN_FPR = RID_XMM0, | 
| 59 |   RID_MAX_GPR = RID_MIN_FPR, | 
| 60 |   RID_MAX_FPR = RID_MAX, | 
| 61 |   RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, | 
| 62 |   RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR, | 
| 63 | }; | 
| 64 |  | 
| 65 | /* -- Register sets ------------------------------------------------------- */ | 
| 66 |  | 
| 67 | /* Make use of all registers, except the stack pointer (and maybe DISPATCH). */ | 
| 68 | #define RSET_GPR	(RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \ | 
| 69 | 			 - RID2RSET(RID_ESP) \ | 
| 70 | 			 - LJ_GC64*RID2RSET(RID_DISPATCH)) | 
| 71 | #define RSET_FPR	(RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) | 
| 72 | #define RSET_ALL	(RSET_GPR|RSET_FPR) | 
| 73 | #define RSET_INIT	RSET_ALL | 
| 74 |  | 
| 75 | #if LJ_64 | 
| 76 | /* Note: this requires the use of FORCE_REX! */ | 
| 77 | #define RSET_GPR8	RSET_GPR | 
| 78 | #else | 
| 79 | #define RSET_GPR8	(RSET_RANGE(RID_EAX, RID_EBX+1)) | 
| 80 | #endif | 
| 81 |  | 
| 82 | /* ABI-specific register sets. */ | 
| 83 | #define RSET_ACD	(RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX)) | 
| 84 | #if LJ_64 | 
| 85 | #if LJ_ABI_WIN | 
| 86 | /* Windows x64 ABI. */ | 
| 87 | #define RSET_SCRATCH \ | 
| 88 |   (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) | 
| 89 | #define REGARG_GPRS \ | 
| 90 |   (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) | 
| 91 | #define REGARG_NUMGPR	4 | 
| 92 | #define REGARG_NUMFPR	4 | 
| 93 | #define REGARG_FIRSTFPR	RID_XMM0 | 
| 94 | #define REGARG_LASTFPR	RID_XMM3 | 
| 95 | #define STACKARG_OFS	(4*8) | 
| 96 | #else | 
| 97 | /* The rest of the civilized x64 world has a common ABI. */ | 
| 98 | #define RSET_SCRATCH \ | 
| 99 |   (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) | 
| 100 | #define REGARG_GPRS \ | 
| 101 |   (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \ | 
| 102 |    <<5))<<5))<<5))<<5))<<5)) | 
| 103 | #define REGARG_NUMGPR	6 | 
| 104 | #define REGARG_NUMFPR	8 | 
| 105 | #define REGARG_FIRSTFPR	RID_XMM0 | 
| 106 | #define REGARG_LASTFPR	RID_XMM7 | 
| 107 | #define STACKARG_OFS	0 | 
| 108 | #endif | 
| 109 | #else | 
| 110 | /* Common x86 ABI. */ | 
| 111 | #define RSET_SCRATCH	(RSET_ACD|RSET_FPR) | 
| 112 | #define REGARG_GPRS	(RID_ECX|(RID_EDX<<5))  /* Fastcall only. */ | 
| 113 | #define REGARG_NUMGPR	2  /* Fastcall only. */ | 
| 114 | #define REGARG_NUMFPR	0 | 
| 115 | #define STACKARG_OFS	0 | 
| 116 | #endif | 
| 117 |  | 
| 118 | #if LJ_64 | 
| 119 | /* Prefer the low 8 regs of each type to reduce REX prefixes. */ | 
| 120 | #undef rset_picktop | 
| 121 | #define rset_picktop(rs)	(lj_fls(lj_bswap(rs)) ^ 0x18) | 
| 122 | #endif | 
| 123 |  | 
| 124 | /* -- Spill slots --------------------------------------------------------- */ | 
| 125 |  | 
| 126 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. | 
| 127 | ** | 
| 128 | ** SPS_FIXED: Available fixed spill slots in interpreter frame. | 
| 129 | ** This definition must match with the *.dasc file(s). | 
| 130 | ** | 
| 131 | ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. | 
| 132 | */ | 
| 133 | #if LJ_64 | 
| 134 | #if LJ_ABI_WIN | 
| 135 | #define SPS_FIXED	(4*2) | 
| 136 | #define SPS_FIRST	(4*2)	/* Don't use callee register save area. */ | 
| 137 | #else | 
| 138 | #if LJ_GC64 | 
| 139 | #define SPS_FIXED	2 | 
| 140 | #else | 
| 141 | #define SPS_FIXED	4 | 
| 142 | #endif | 
| 143 | #define SPS_FIRST	2 | 
| 144 | #endif | 
| 145 | #else | 
| 146 | #define SPS_FIXED	6 | 
| 147 | #define SPS_FIRST	2 | 
| 148 | #endif | 
| 149 |  | 
| 150 | #define SPOFS_TMP	0 | 
| 151 |  | 
| 152 | #define sps_scale(slot)		(4 * (int32_t)(slot)) | 
| 153 | #define sps_align(slot)		(((slot) - SPS_FIXED + 3) & ~3) | 
| 154 |  | 
| 155 | /* -- Exit state ---------------------------------------------------------- */ | 
| 156 |  | 
| 157 | /* This definition must match with the *.dasc file(s). */ | 
| 158 | typedef struct { | 
| 159 |   lua_Number fpr[RID_NUM_FPR];	/* Floating-point registers. */ | 
| 160 |   intptr_t gpr[RID_NUM_GPR];	/* General-purpose registers. */ | 
| 161 |   int32_t spill[256];		/* Spill slots. */ | 
| 162 | } ExitState; | 
| 163 |  | 
| 164 | /* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */ | 
| 165 | #define EXITSTUB_SPACING	(2+2) | 
| 166 | #define EXITSTUBS_PER_GROUP	32 | 
| 167 |  | 
| 168 | #define EXITTRACE_VMSTATE	1	/* g->vmstate has traceno on exit. */ | 
| 169 |  | 
| 170 | /* -- x86 ModRM operand encoding ------------------------------------------ */ | 
| 171 |  | 
| 172 | typedef enum { | 
| 173 |   XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0, | 
| 174 |   XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0, | 
| 175 |   XM_MASK = 0xc0 | 
| 176 | } x86Mode; | 
| 177 |  | 
| 178 | /* Structure to hold variable ModRM operand. */ | 
| 179 | typedef struct { | 
| 180 |   int32_t ofs;		/* Offset. */ | 
| 181 |   uint8_t base;		/* Base register or RID_NONE. */ | 
| 182 |   uint8_t idx;		/* Index register or RID_NONE. */ | 
| 183 |   uint8_t scale;	/* Index scale (XM_SCALE1 .. XM_SCALE8). */ | 
| 184 | } x86ModRM; | 
| 185 |  | 
| 186 | /* -- Opcodes ------------------------------------------------------------- */ | 
| 187 |  | 
| 188 | /* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */ | 
| 189 | #define XO_(o)		((uint32_t)(0x0000fe + (0x##o<<24))) | 
| 190 | #define XO_FPU(a,b)	((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24))) | 
| 191 | #define XO_0f(o)	((uint32_t)(0x0f00fd + (0x##o<<24))) | 
| 192 | #define XO_66(o)	((uint32_t)(0x6600fd + (0x##o<<24))) | 
| 193 | #define XO_660f(o)	((uint32_t)(0x0f66fc + (0x##o<<24))) | 
| 194 | #define XO_f20f(o)	((uint32_t)(0x0ff2fc + (0x##o<<24))) | 
| 195 | #define XO_f30f(o)	((uint32_t)(0x0ff3fc + (0x##o<<24))) | 
| 196 |  | 
| 197 | #define XV_660f38(o)	((uint32_t)(0x79e2c4 + (0x##o<<24))) | 
| 198 | #define XV_f20f38(o)	((uint32_t)(0x7be2c4 + (0x##o<<24))) | 
| 199 | #define XV_f20f3a(o)	((uint32_t)(0x7be3c4 + (0x##o<<24))) | 
| 200 | #define XV_f30f38(o)	((uint32_t)(0x7ae2c4 + (0x##o<<24))) | 
| 201 |  | 
| 202 | /* This list of x86 opcodes is not intended to be complete. Opcodes are only | 
| 203 | ** included when needed. Take a look at DynASM or jit.dis_x86 to see the | 
| 204 | ** whole mess. | 
| 205 | */ | 
| 206 | typedef enum { | 
| 207 |   /* Fixed length opcodes. XI_* prefix. */ | 
| 208 |   XI_O16 =	0x66, | 
| 209 |   XI_NOP =	0x90, | 
| 210 |   XI_XCHGa =	0x90, | 
| 211 |   XI_CALL =	0xe8, | 
| 212 |   XI_JMP =	0xe9, | 
| 213 |   XI_JMPs =	0xeb, | 
| 214 |   XI_PUSH =	0x50, /* Really 50+r. */ | 
| 215 |   XI_JCCs =	0x70, /* Really 7x. */ | 
| 216 |   XI_JCCn =	0x80, /* Really 0f8x. */ | 
| 217 |   XI_LEA =	0x8d, | 
| 218 |   XI_MOVrib =	0xb0, /* Really b0+r. */ | 
| 219 |   XI_MOVri =	0xb8, /* Really b8+r. */ | 
| 220 |   XI_ARITHib =	0x80, | 
| 221 |   XI_ARITHi =	0x81, | 
| 222 |   XI_ARITHi8 =	0x83, | 
| 223 |   XI_PUSHi8 =	0x6a, | 
| 224 |   XI_TESTb =	0x84, | 
| 225 |   XI_TEST =	0x85, | 
| 226 |   XI_INT3 =	0xcc, | 
| 227 |   XI_MOVmi =	0xc7, | 
| 228 |   XI_GROUP5 =	0xff, | 
| 229 |  | 
| 230 |   /* Note: little-endian byte-order! */ | 
| 231 |   XI_FLDZ =	0xeed9, | 
| 232 |   XI_FLD1 =	0xe8d9, | 
| 233 |   XI_FDUP =	0xc0d9,  /* Really fld st0. */ | 
| 234 |   XI_FPOP =	0xd8dd,  /* Really fstp st0. */ | 
| 235 |   XI_FPOP1 =	0xd9dd,  /* Really fstp st1. */ | 
| 236 |   XI_FRNDINT =	0xfcd9, | 
| 237 |   XI_FSCALE =	0xfdd9, | 
| 238 |   XI_FYL2X =	0xf1d9, | 
| 239 |  | 
| 240 |   /* VEX-encoded instructions. XV_* prefix. */ | 
| 241 |   XV_RORX =	XV_f20f3a(f0), | 
| 242 |   XV_SARX =	XV_f30f38(f7), | 
| 243 |   XV_SHLX =	XV_660f38(f7), | 
| 244 |   XV_SHRX =	XV_f20f38(f7), | 
| 245 |  | 
| 246 |   /* Variable-length opcodes. XO_* prefix. */ | 
| 247 |   XO_OR =	XO_(0b), | 
| 248 |   XO_MOV =	XO_(8b), | 
| 249 |   XO_MOVto =	XO_(89), | 
| 250 |   XO_MOVtow =	XO_66(89), | 
| 251 |   XO_MOVtob =	XO_(88), | 
| 252 |   XO_MOVmi =	XO_(c7), | 
| 253 |   XO_MOVmib =	XO_(c6), | 
| 254 |   XO_LEA =	XO_(8d), | 
| 255 |   XO_ARITHib =	XO_(80), | 
| 256 |   XO_ARITHi =	XO_(81), | 
| 257 |   XO_ARITHi8 =	XO_(83), | 
| 258 |   XO_ARITHiw8 =	XO_66(83), | 
| 259 |   XO_SHIFTi =	XO_(c1), | 
| 260 |   XO_SHIFT1 =	XO_(d1), | 
| 261 |   XO_SHIFTcl =	XO_(d3), | 
| 262 |   XO_IMUL =	XO_0f(af), | 
| 263 |   XO_IMULi =	XO_(69), | 
| 264 |   XO_IMULi8 =	XO_(6b), | 
| 265 |   XO_CMP =	XO_(3b), | 
| 266 |   XO_TESTb =	XO_(84), | 
| 267 |   XO_TEST =	XO_(85), | 
| 268 |   XO_GROUP3b =	XO_(f6), | 
| 269 |   XO_GROUP3 =	XO_(f7), | 
| 270 |   XO_GROUP5b =	XO_(fe), | 
| 271 |   XO_GROUP5 =	XO_(ff), | 
| 272 |   XO_MOVZXb =	XO_0f(b6), | 
| 273 |   XO_MOVZXw =	XO_0f(b7), | 
| 274 |   XO_MOVSXb =	XO_0f(be), | 
| 275 |   XO_MOVSXw =	XO_0f(bf), | 
| 276 |   XO_MOVSXd =	XO_(63), | 
| 277 |   XO_BSWAP =	XO_0f(c8), | 
| 278 |   XO_CMOV =	XO_0f(40), | 
| 279 |  | 
| 280 |   XO_MOVSD =	XO_f20f(10), | 
| 281 |   XO_MOVSDto =	XO_f20f(11), | 
| 282 |   XO_MOVSS =	XO_f30f(10), | 
| 283 |   XO_MOVSSto =	XO_f30f(11), | 
| 284 |   XO_MOVLPD =	XO_660f(12), | 
| 285 |   XO_MOVAPS =	XO_0f(28), | 
| 286 |   XO_XORPS =	XO_0f(57), | 
| 287 |   XO_ANDPS =	XO_0f(54), | 
| 288 |   XO_ADDSD =	XO_f20f(58), | 
| 289 |   XO_SUBSD =	XO_f20f(5c), | 
| 290 |   XO_MULSD =	XO_f20f(59), | 
| 291 |   XO_DIVSD =	XO_f20f(5e), | 
| 292 |   XO_SQRTSD =	XO_f20f(51), | 
| 293 |   XO_MINSD =	XO_f20f(5d), | 
| 294 |   XO_MAXSD =	XO_f20f(5f), | 
| 295 |   XO_ROUNDSD =	0x0b3a0ffc,  /* Really 66 0f 3a 0b. See asm_fpmath. */ | 
| 296 |   XO_UCOMISD =	XO_660f(2e), | 
| 297 |   XO_CVTSI2SD =	XO_f20f(2a), | 
| 298 |   XO_CVTTSD2SI=	XO_f20f(2c), | 
| 299 |   XO_CVTSI2SS =	XO_f30f(2a), | 
| 300 |   XO_CVTTSS2SI=	XO_f30f(2c), | 
| 301 |   XO_CVTSS2SD =	XO_f30f(5a), | 
| 302 |   XO_CVTSD2SS =	XO_f20f(5a), | 
| 303 |   XO_ADDSS =	XO_f30f(58), | 
| 304 |   XO_MOVD =	XO_660f(6e), | 
| 305 |   XO_MOVDto =	XO_660f(7e), | 
| 306 |  | 
| 307 |   XO_FLDd =	XO_(d9), XOg_FLDd = 0, | 
| 308 |   XO_FLDq =	XO_(dd), XOg_FLDq = 0, | 
| 309 |   XO_FILDd =	XO_(db), XOg_FILDd = 0, | 
| 310 |   XO_FILDq =	XO_(df), XOg_FILDq = 5, | 
| 311 |   XO_FSTPd =	XO_(d9), XOg_FSTPd = 3, | 
| 312 |   XO_FSTPq =	XO_(dd), XOg_FSTPq = 3, | 
| 313 |   XO_FISTPq =	XO_(df), XOg_FISTPq = 7, | 
| 314 |   XO_FISTTPq =	XO_(dd), XOg_FISTTPq = 1, | 
| 315 |   XO_FADDq =	XO_(dc), XOg_FADDq = 0, | 
| 316 |   XO_FLDCW =	XO_(d9), XOg_FLDCW = 5, | 
| 317 |   XO_FNSTCW =	XO_(d9), XOg_FNSTCW = 7 | 
| 318 | } x86Op; | 
| 319 |  | 
| 320 | /* x86 opcode groups. */ | 
| 321 | typedef uint32_t x86Group; | 
| 322 |  | 
| 323 | #define XG_(i8, i, g)	((x86Group)(((i8) << 16) + ((i) << 8) + (g))) | 
| 324 | #define XG_ARITHi(g)	XG_(XI_ARITHi8, XI_ARITHi, g) | 
| 325 | #define XG_TOXOi(xg)	((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000))) | 
| 326 | #define XG_TOXOi8(xg)	((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000))) | 
| 327 |  | 
| 328 | #define XO_ARITH(a)	((x86Op)(0x030000fe + ((a)<<27))) | 
| 329 | #define XO_ARITHw(a)	((x86Op)(0x036600fd + ((a)<<27))) | 
| 330 |  | 
| 331 | typedef enum { | 
| 332 |   XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP, | 
| 333 |   XOg_X_IMUL | 
| 334 | } x86Arith; | 
| 335 |  | 
| 336 | typedef enum { | 
| 337 |   XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR | 
| 338 | } x86Shift; | 
| 339 |  | 
| 340 | typedef enum { | 
| 341 |   XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV | 
| 342 | } x86Group3; | 
| 343 |  | 
| 344 | typedef enum { | 
| 345 |   XOg_INC, XOg_DEC, XOg_CALL, XOg_CALLfar, XOg_JMP, XOg_JMPfar, XOg_PUSH | 
| 346 | } x86Group5; | 
| 347 |  | 
| 348 | /* x86 condition codes. */ | 
| 349 | typedef enum { | 
| 350 |   CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE, | 
| 351 |   CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE, | 
| 352 |   CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB, | 
| 353 |   CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE, | 
| 354 |   CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL, | 
| 355 |   CC_NG = CC_LE, CC_G = CC_NLE | 
| 356 | } x86CC; | 
| 357 |  | 
| 358 | #endif | 
| 359 |  |