1/*
2** x86/x64 instruction emitter.
3** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6/* -- Emit basic instructions --------------------------------------------- */
7
8#define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7)))
9
10#if LJ_64
11#define REXRB(p, rr, rb) \
12 { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \
13 if (rex != 0x40) *--(p) = rex; }
14#define FORCE_REX 0x200
15#define REX_64 (FORCE_REX|0x080000)
16#define VEX_64 0x800000
17#else
18#define REXRB(p, rr, rb) ((void)0)
19#define FORCE_REX 0
20#define REX_64 0
21#define VEX_64 0
22#endif
23#if LJ_GC64
24#define REX_GC64 REX_64
25#else
26#define REX_GC64 0
27#endif
28
29#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
30#define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4)
31#define emit_u32(as, u) (*(uint32_t *)(as->mcp-4) = (u), as->mcp -= 4)
32
33#define emit_x87op(as, xo) \
34 (*(uint16_t *)(as->mcp-2) = (uint16_t)(xo), as->mcp -= 2)
35
36/* op */
37static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
38 MCode *p, int delta)
39{
40 int n = (int8_t)xo;
41 if (n == -60) { /* VEX-encoded instruction */
42#if LJ_64
43 xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13;
44#endif
45 *(uint32_t *)(p+delta-5) = (uint32_t)xo;
46 return p+delta-5;
47 }
48#if defined(__GNUC__) || defined(__clang__)
49 if (__builtin_constant_p(xo) && n == -2)
50 p[delta-2] = (MCode)(xo >> 24);
51 else if (__builtin_constant_p(xo) && n == -3)
52 *(uint16_t *)(p+delta-3) = (uint16_t)(xo >> 16);
53 else
54#endif
55 *(uint32_t *)(p+delta-5) = (uint32_t)xo;
56 p += n + delta;
57#if LJ_64
58 {
59 uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1);
60 if (rex != 0x40) {
61 rex |= (rr >> 16);
62 if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); }
63 else if ((xo & 0xffffff) == 0x6600fd) { *p = (MCode)rex; rex = 0x66; }
64 *--p = (MCode)rex;
65 }
66 }
67#else
68 UNUSED(rr); UNUSED(rb); UNUSED(rx);
69#endif
70 return p;
71}
72
73/* op + modrm */
74#define emit_opm(xo, mode, rr, rb, p, delta) \
75 (p[(delta)-1] = MODRM((mode), (rr), (rb)), \
76 emit_op((xo), (rr), (rb), 0, (p), (delta)))
77
78/* op + modrm + sib */
79#define emit_opmx(xo, mode, scale, rr, rb, rx, p) \
80 (p[-1] = MODRM((scale), (rx), (rb)), \
81 p[-2] = MODRM((mode), (rr), RID_ESP), \
82 emit_op((xo), (rr), (rb), (rx), (p), -1))
83
84/* op r1, r2 */
85static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2)
86{
87 MCode *p = as->mcp;
88 as->mcp = emit_opm(xo, XM_REG, r1, r2, p, 0);
89}
90
91#if LJ_64 && defined(LUA_USE_ASSERT)
92/* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */
93static int32_t ptr2addr(const void *p)
94{
95 lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G range");
96 return i32ptr(p);
97}
98#else
99#define ptr2addr(p) (i32ptr((p)))
100#endif
101
102/* op r, [base+ofs] */
103static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
104{
105 MCode *p = as->mcp;
106 x86Mode mode;
107 if (ra_hasreg(rb)) {
108 if (LJ_GC64 && rb == RID_RIP) {
109 mode = XM_OFS0;
110 p -= 4;
111 *(int32_t *)p = ofs;
112 } else if (ofs == 0 && (rb&7) != RID_EBP) {
113 mode = XM_OFS0;
114 } else if (checki8(ofs)) {
115 *--p = (MCode)ofs;
116 mode = XM_OFS8;
117 } else {
118 p -= 4;
119 *(int32_t *)p = ofs;
120 mode = XM_OFS32;
121 }
122 if ((rb&7) == RID_ESP)
123 *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
124 } else {
125 *(int32_t *)(p-4) = ofs;
126#if LJ_64
127 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
128 p -= 5;
129 rb = RID_ESP;
130#else
131 p -= 4;
132 rb = RID_EBP;
133#endif
134 mode = XM_OFS0;
135 }
136 as->mcp = emit_opm(xo, mode, rr, rb, p, 0);
137}
138
139/* op r, [base+idx*scale+ofs] */
140static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx,
141 x86Mode scale, int32_t ofs)
142{
143 MCode *p = as->mcp;
144 x86Mode mode;
145 if (ofs == 0 && (rb&7) != RID_EBP) {
146 mode = XM_OFS0;
147 } else if (checki8(ofs)) {
148 mode = XM_OFS8;
149 *--p = (MCode)ofs;
150 } else {
151 mode = XM_OFS32;
152 p -= 4;
153 *(int32_t *)p = ofs;
154 }
155 as->mcp = emit_opmx(xo, mode, scale, rr, rb, rx, p);
156}
157
158/* op r, i */
159static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i)
160{
161 MCode *p = as->mcp;
162 x86Op xo;
163 if (checki8(i)) {
164 *--p = (MCode)i;
165 xo = XG_TOXOi8(xg);
166 } else {
167 p -= 4;
168 *(int32_t *)p = i;
169 xo = XG_TOXOi(xg);
170 }
171 as->mcp = emit_opm(xo, XM_REG, (Reg)(xg & 7) | (rb & REX_64), rb, p, 0);
172}
173
174/* op [base+ofs], i */
175static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs,
176 int32_t i)
177{
178 x86Op xo;
179 if (checki8(i)) {
180 emit_i8(as, i);
181 xo = XG_TOXOi8(xg);
182 } else {
183 emit_i32(as, i);
184 xo = XG_TOXOi(xg);
185 }
186 emit_rmro(as, xo, (Reg)(xg & 7), rb, ofs);
187}
188
189#define emit_shifti(as, xg, r, i) \
190 (emit_i8(as, (i)), emit_rr(as, XO_SHIFTi, (Reg)(xg), (r)))
191
192/* op r, rm/mrm */
193static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
194{
195 MCode *p = as->mcp;
196 x86Mode mode = XM_REG;
197 if (rb == RID_MRM) {
198 rb = as->mrm.base;
199 if (rb == RID_NONE) {
200 rb = RID_EBP;
201 mode = XM_OFS0;
202 p -= 4;
203 *(int32_t *)p = as->mrm.ofs;
204 if (as->mrm.idx != RID_NONE)
205 goto mrmidx;
206#if LJ_64
207 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
208 rb = RID_ESP;
209#endif
210 } else if (LJ_GC64 && rb == RID_RIP) {
211 lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have index");
212 mode = XM_OFS0;
213 p -= 4;
214 *(int32_t *)p = as->mrm.ofs;
215 } else {
216 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
217 mode = XM_OFS0;
218 } else if (checki8(as->mrm.ofs)) {
219 *--p = (MCode)as->mrm.ofs;
220 mode = XM_OFS8;
221 } else {
222 p -= 4;
223 *(int32_t *)p = as->mrm.ofs;
224 mode = XM_OFS32;
225 }
226 if (as->mrm.idx != RID_NONE) {
227 mrmidx:
228 as->mcp = emit_opmx(xo, mode, as->mrm.scale, rr, rb, as->mrm.idx, p);
229 return;
230 }
231 if ((rb&7) == RID_ESP)
232 *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
233 }
234 }
235 as->mcp = emit_opm(xo, mode, rr, rb, p, 0);
236}
237
238/* op rm/mrm, i */
239static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
240{
241 x86Op xo;
242 if (checki8(i)) {
243 emit_i8(as, i);
244 xo = XG_TOXOi8(xg);
245 } else {
246 emit_i32(as, i);
247 xo = XG_TOXOi(xg);
248 }
249 emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64));
250}
251
252/* -- Emit loads/stores --------------------------------------------------- */
253
254/* mov [base+ofs], i */
255static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
256{
257 emit_i32(as, i);
258 emit_rmro(as, XO_MOVmi, 0, base, ofs);
259}
260
261/* mov [base+ofs], r */
262#define emit_movtomro(as, r, base, ofs) \
263 emit_rmro(as, XO_MOVto, (r), (base), (ofs))
264
265/* Get/set global_State fields. */
266#define emit_opgl(as, xo, r, field) \
267 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
268#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
269#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
270
271#define emit_setvmstate(as, i) \
272 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
273
274/* mov r, i / xor r, r */
275static void emit_loadi(ASMState *as, Reg r, int32_t i)
276{
277 /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP/jcc. */
278 if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP ||
279 (as->curins+1 < as->T->nins &&
280 IR(as->curins+1)->o == IR_HIOP))) &&
281 !((*as->mcp == 0x0f && (as->mcp[1] & 0xf0) == XI_JCCn) ||
282 (*as->mcp & 0xf0) == XI_JCCs)) {
283 emit_rr(as, XO_ARITH(XOg_XOR), r, r);
284 } else {
285 MCode *p = as->mcp;
286 *(int32_t *)(p-4) = i;
287 p[-5] = (MCode)(XI_MOVri+(r&7));
288 p -= 5;
289 REXRB(p, 0, r);
290 as->mcp = p;
291 }
292}
293
294#if LJ_GC64
295#define dispofs(as, k) \
296 ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
297#define mcpofs(as, k) \
298 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
299#define mctopofs(as, k) \
300 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
301/* mov r, addr */
302#define emit_loada(as, r, addr) \
303 emit_loadu64(as, (r), (uintptr_t)(addr))
304#else
305/* mov r, addr */
306#define emit_loada(as, r, addr) \
307 emit_loadi(as, (r), ptr2addr((addr)))
308#endif
309
310#if LJ_64
311/* mov r, imm64 or shorter 32 bit extended load. */
312static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
313{
314 if (checku32(u64)) { /* 32 bit load clears upper 32 bits. */
315 emit_loadi(as, r, (int32_t)u64);
316 } else if (checki32((int64_t)u64)) { /* Sign-extended 32 bit load. */
317 MCode *p = as->mcp;
318 *(int32_t *)(p-4) = (int32_t)u64;
319 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
320#if LJ_GC64
321 } else if (checki32(dispofs(as, u64))) {
322 emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
323 } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
324 /* Since as->realign assumes the code size doesn't change, check
325 ** RIP-relative addressing reachability for both as->mcp and as->mctop.
326 */
327 emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
328#endif
329 } else { /* Full-size 64 bit load. */
330 MCode *p = as->mcp;
331 *(uint64_t *)(p-8) = u64;
332 p[-9] = (MCode)(XI_MOVri+(r&7));
333 p[-10] = 0x48 + ((r>>3)&1);
334 p -= 10;
335 as->mcp = p;
336 }
337}
338#endif
339
340/* op r, [addr] */
341static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
342{
343#if LJ_GC64
344 if (checki32(dispofs(as, addr))) {
345 emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
346 } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
347 emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
348 } else if (!checki32((intptr_t)addr)) {
349 Reg ra = (rr & 15);
350 if (xo != XO_MOV) {
351 /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */
352 uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch;
353 uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0;
354 ra = RID_DISPATCH;
355 if (checku32(dispaddr)) {
356 emit_loadi(as, ra, (int32_t)dispaddr);
357 } else { /* Full-size 64 bit load. */
358 MCode *p = as->mcp;
359 *(uint64_t *)(p-8) = dispaddr;
360 p[-9] = (MCode)(XI_MOVri+(ra&7));
361 p[-10] = 0x48 + ((ra>>3)&1);
362 p -= 10;
363 as->mcp = p;
364 }
365 if (xo == XO_GROUP3b) emit_i8(as, i8);
366 }
367 emit_rmro(as, xo, rr, ra, 0);
368 emit_loadu64(as, ra, (uintptr_t)addr);
369 } else
370#endif
371 {
372 MCode *p = as->mcp;
373 *(int32_t *)(p-4) = ptr2addr(addr);
374#if LJ_64
375 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
376 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
377#else
378 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
379#endif
380 }
381}
382
383/* Load 64 bit IR constant into register. */
384static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
385{
386 Reg r64;
387 x86Op xo;
388 const uint64_t *k = &ir_k64(ir)->u64;
389 if (rset_test(RSET_FPR, r)) {
390 r64 = r;
391 xo = XO_MOVSD;
392 } else {
393 r64 = r | REX_64;
394 xo = XO_MOV;
395 }
396 if (*k == 0) {
397 emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
398#if LJ_GC64
399 } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
400 (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
401 emit_rma(as, xo, r64, k);
402 } else {
403 if (ir->i) {
404 lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i),
405 "bad interned 64 bit constant");
406 } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
407 emit_loadu64(as, r, *k);
408 return;
409 } else {
410 /* If all else fails, add the FP constant at the MCode area bottom. */
411 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
412 *(uint64_t *)as->mcbot = *k;
413 ir->i = (int32_t)(as->mctop - as->mcbot);
414 as->mcbot += 8;
415 as->mclim = as->mcbot + MCLIM_REDZONE;
416 lj_mcode_commitbot(as->J, as->mcbot);
417 }
418 emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
419#else
420 } else {
421 emit_rma(as, xo, r64, k);
422#endif
423 }
424}
425
426/* -- Emit control-flow instructions -------------------------------------- */
427
428/* Label for short jumps. */
429typedef MCode *MCLabel;
430
431#if LJ_32 && LJ_HASFFI
432/* jmp short target */
433static void emit_sjmp(ASMState *as, MCLabel target)
434{
435 MCode *p = as->mcp;
436 ptrdiff_t delta = target - p;
437 lj_assertA(delta == (int8_t)delta, "short jump target out of range");
438 p[-1] = (MCode)(int8_t)delta;
439 p[-2] = XI_JMPs;
440 as->mcp = p - 2;
441}
442#endif
443
444/* jcc short target */
445static void emit_sjcc(ASMState *as, int cc, MCLabel target)
446{
447 MCode *p = as->mcp;
448 ptrdiff_t delta = target - p;
449 lj_assertA(delta == (int8_t)delta, "short jump target out of range");
450 p[-1] = (MCode)(int8_t)delta;
451 p[-2] = (MCode)(XI_JCCs+(cc&15));
452 as->mcp = p - 2;
453}
454
455/* jcc short (pending target) */
456static MCLabel emit_sjcc_label(ASMState *as, int cc)
457{
458 MCode *p = as->mcp;
459 p[-1] = 0;
460 p[-2] = (MCode)(XI_JCCs+(cc&15));
461 as->mcp = p - 2;
462 return p;
463}
464
465/* Fixup jcc short target. */
466static void emit_sfixup(ASMState *as, MCLabel source)
467{
468 source[-1] = (MCode)(as->mcp-source);
469}
470
471/* Return label pointing to current PC. */
472#define emit_label(as) ((as)->mcp)
473
474/* Compute relative 32 bit offset for jump and call instructions. */
475static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target)
476{
477 ptrdiff_t delta = target - p;
478 UNUSED(J);
479 lj_assertJ(delta == (int32_t)delta, "jump target out of range");
480 return (int32_t)delta;
481}
482
483/* jcc target */
484static void emit_jcc(ASMState *as, int cc, MCode *target)
485{
486 MCode *p = as->mcp;
487 *(int32_t *)(p-4) = jmprel(as->J, p, target);
488 p[-5] = (MCode)(XI_JCCn+(cc&15));
489 p[-6] = 0x0f;
490 as->mcp = p - 6;
491}
492
493/* jmp target */
494static void emit_jmp(ASMState *as, MCode *target)
495{
496 MCode *p = as->mcp;
497 *(int32_t *)(p-4) = jmprel(as->J, p, target);
498 p[-5] = XI_JMP;
499 as->mcp = p - 5;
500}
501
502/* call target */
503static void emit_call_(ASMState *as, MCode *target)
504{
505 MCode *p = as->mcp;
506#if LJ_64
507 if (target-p != (int32_t)(target-p)) {
508 /* Assumes RID_RET is never an argument to calls and always clobbered. */
509 emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET);
510 emit_loadu64(as, RID_RET, (uint64_t)target);
511 return;
512 }
513#endif
514 *(int32_t *)(p-4) = jmprel(as->J, p, target);
515 p[-5] = XI_CALL;
516 as->mcp = p - 5;
517}
518
519#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f))
520
521/* -- Emit generic operations --------------------------------------------- */
522
523/* Use 64 bit operations to handle 64 bit IR types. */
524#if LJ_64
525#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
526#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0))
527#else
528#define REX_64IR(ir, r) (r)
529#define VEX_64IR(ir, r) (r)
530#endif
531
532/* Generic move between two regs. */
533static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
534{
535 UNUSED(ir);
536 if (dst < RID_MAX_GPR)
537 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
538 else
539 emit_rr(as, XO_MOVAPS, dst, src);
540}
541
542/* Generic load of register with base and (small) offset address. */
543static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
544{
545 if (r < RID_MAX_GPR)
546 emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
547 else
548 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
549}
550
551/* Generic store of register with base and (small) offset address. */
552static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
553{
554 if (r < RID_MAX_GPR)
555 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
556 else
557 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
558}
559
560/* Add offset to pointer. */
561static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
562{
563 if (ofs) {
564 emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
565 }
566}
567
568#define emit_spsub(as, ofs) emit_addptr(as, RID_ESP|REX_64, -(ofs))
569
570/* Prefer rematerialization of BASE/L from global_State over spills. */
571#define emit_canremat(ref) ((ref) <= REF_BASE)
572
573