1/*
2 * This file is part of the MicroPython project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2013, 2014 Damien P. George
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27#include <stdint.h>
28#include <stdio.h>
29#include <assert.h>
30#include <string.h>
31
32#include "py/mpconfig.h"
33
34// wrapper around everything in this file
35#if MICROPY_EMIT_X64
36
37#include "py/asmx64.h"
38
39/* all offsets are measured in multiples of 8 bytes */
40#define WORD_SIZE (8)
41
42#define OPCODE_NOP (0x90)
43#define OPCODE_PUSH_R64 (0x50) /* +rq */
44#define OPCODE_PUSH_I64 (0x68)
45#define OPCODE_PUSH_M64 (0xff) /* /6 */
46#define OPCODE_POP_R64 (0x58) /* +rq */
47#define OPCODE_RET (0xc3)
48#define OPCODE_MOV_I8_TO_R8 (0xb0) /* +rb */
49#define OPCODE_MOV_I64_TO_R64 (0xb8) /* +rq */
50#define OPCODE_MOV_I32_TO_RM32 (0xc7)
51#define OPCODE_MOV_R8_TO_RM8 (0x88) /* /r */
52#define OPCODE_MOV_R64_TO_RM64 (0x89) /* /r */
53#define OPCODE_MOV_RM64_TO_R64 (0x8b) /* /r */
54#define OPCODE_MOVZX_RM8_TO_R64 (0xb6) /* 0x0f 0xb6/r */
55#define OPCODE_MOVZX_RM16_TO_R64 (0xb7) /* 0x0f 0xb7/r */
56#define OPCODE_LEA_MEM_TO_R64 (0x8d) /* /r */
57#define OPCODE_AND_R64_TO_RM64 (0x21) /* /r */
58#define OPCODE_OR_R64_TO_RM64 (0x09) /* /r */
59#define OPCODE_XOR_R64_TO_RM64 (0x31) /* /r */
60#define OPCODE_ADD_R64_TO_RM64 (0x01) /* /r */
61#define OPCODE_ADD_I32_TO_RM32 (0x81) /* /0 */
62#define OPCODE_ADD_I8_TO_RM32 (0x83) /* /0 */
63#define OPCODE_SUB_R64_FROM_RM64 (0x29)
64#define OPCODE_SUB_I32_FROM_RM64 (0x81) /* /5 */
65#define OPCODE_SUB_I8_FROM_RM64 (0x83) /* /5 */
66// #define OPCODE_SHL_RM32_BY_I8 (0xc1) /* /4 */
67// #define OPCODE_SHR_RM32_BY_I8 (0xc1) /* /5 */
68// #define OPCODE_SAR_RM32_BY_I8 (0xc1) /* /7 */
69#define OPCODE_SHL_RM64_CL (0xd3) /* /4 */
70#define OPCODE_SHR_RM64_CL (0xd3) /* /5 */
71#define OPCODE_SAR_RM64_CL (0xd3) /* /7 */
72// #define OPCODE_CMP_I32_WITH_RM32 (0x81) /* /7 */
73// #define OPCODE_CMP_I8_WITH_RM32 (0x83) /* /7 */
74#define OPCODE_CMP_R64_WITH_RM64 (0x39) /* /r */
75// #define OPCODE_CMP_RM32_WITH_R32 (0x3b)
76#define OPCODE_TEST_R8_WITH_RM8 (0x84) /* /r */
77#define OPCODE_TEST_R64_WITH_RM64 (0x85) /* /r */
78#define OPCODE_JMP_REL8 (0xeb)
79#define OPCODE_JMP_REL32 (0xe9)
80#define OPCODE_JMP_RM64 (0xff) /* /4 */
81#define OPCODE_JCC_REL8 (0x70) /* | jcc type */
82#define OPCODE_JCC_REL32_A (0x0f)
83#define OPCODE_JCC_REL32_B (0x80) /* | jcc type */
84#define OPCODE_SETCC_RM8_A (0x0f)
85#define OPCODE_SETCC_RM8_B (0x90) /* | jcc type, /0 */
86#define OPCODE_CALL_REL32 (0xe8)
87#define OPCODE_CALL_RM32 (0xff) /* /2 */
88#define OPCODE_LEAVE (0xc9)
89
90#define MODRM_R64(x) (((x) & 0x7) << 3)
91#define MODRM_RM_DISP0 (0x00)
92#define MODRM_RM_DISP8 (0x40)
93#define MODRM_RM_DISP32 (0x80)
94#define MODRM_RM_REG (0xc0)
95#define MODRM_RM_R64(x) ((x) & 0x7)
96
97#define OP_SIZE_PREFIX (0x66)
98
99#define REX_PREFIX (0x40)
100#define REX_W (0x08) // width
101#define REX_R (0x04) // register
102#define REX_X (0x02) // index
103#define REX_B (0x01) // base
104#define REX_W_FROM_R64(r64) ((r64) >> 0 & 0x08)
105#define REX_R_FROM_R64(r64) ((r64) >> 1 & 0x04)
106#define REX_X_FROM_R64(r64) ((r64) >> 2 & 0x02)
107#define REX_B_FROM_R64(r64) ((r64) >> 3 & 0x01)
108
109#define IMM32_L0(x) ((x) & 0xff)
110#define IMM32_L1(x) (((x) >> 8) & 0xff)
111#define IMM32_L2(x) (((x) >> 16) & 0xff)
112#define IMM32_L3(x) (((x) >> 24) & 0xff)
113#define IMM64_L4(x) (((x) >> 32) & 0xff)
114#define IMM64_L5(x) (((x) >> 40) & 0xff)
115#define IMM64_L6(x) (((x) >> 48) & 0xff)
116#define IMM64_L7(x) (((x) >> 56) & 0xff)
117
118#define UNSIGNED_FIT8(x) (((x) & 0xffffffffffffff00) == 0)
119#define UNSIGNED_FIT32(x) (((x) & 0xffffffff00000000) == 0)
120#define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
121
122static inline byte *asm_x64_get_cur_to_write_bytes(asm_x64_t *as, int n) {
123 return mp_asm_base_get_cur_to_write_bytes(&as->base, n);
124}
125
126STATIC void asm_x64_write_byte_1(asm_x64_t *as, byte b1) {
127 byte *c = asm_x64_get_cur_to_write_bytes(as, 1);
128 if (c != NULL) {
129 c[0] = b1;
130 }
131}
132
133STATIC void asm_x64_write_byte_2(asm_x64_t *as, byte b1, byte b2) {
134 byte *c = asm_x64_get_cur_to_write_bytes(as, 2);
135 if (c != NULL) {
136 c[0] = b1;
137 c[1] = b2;
138 }
139}
140
141STATIC void asm_x64_write_byte_3(asm_x64_t *as, byte b1, byte b2, byte b3) {
142 byte *c = asm_x64_get_cur_to_write_bytes(as, 3);
143 if (c != NULL) {
144 c[0] = b1;
145 c[1] = b2;
146 c[2] = b3;
147 }
148}
149
150STATIC void asm_x64_write_word32(asm_x64_t *as, int w32) {
151 byte *c = asm_x64_get_cur_to_write_bytes(as, 4);
152 if (c != NULL) {
153 c[0] = IMM32_L0(w32);
154 c[1] = IMM32_L1(w32);
155 c[2] = IMM32_L2(w32);
156 c[3] = IMM32_L3(w32);
157 }
158}
159
160STATIC void asm_x64_write_word64(asm_x64_t *as, int64_t w64) {
161 byte *c = asm_x64_get_cur_to_write_bytes(as, 8);
162 if (c != NULL) {
163 c[0] = IMM32_L0(w64);
164 c[1] = IMM32_L1(w64);
165 c[2] = IMM32_L2(w64);
166 c[3] = IMM32_L3(w64);
167 c[4] = IMM64_L4(w64);
168 c[5] = IMM64_L5(w64);
169 c[6] = IMM64_L6(w64);
170 c[7] = IMM64_L7(w64);
171 }
172}
173
174/* unused
175STATIC void asm_x64_write_word32_to(asm_x64_t *as, int offset, int w32) {
176 byte* c;
177 assert(offset + 4 <= as->code_size);
178 c = as->code_base + offset;
179 c[0] = IMM32_L0(w32);
180 c[1] = IMM32_L1(w32);
181 c[2] = IMM32_L2(w32);
182 c[3] = IMM32_L3(w32);
183}
184*/
185
186STATIC void asm_x64_write_r64_disp(asm_x64_t *as, int r64, int disp_r64, int disp_offset) {
187 uint8_t rm_disp;
188 if (disp_offset == 0 && (disp_r64 & 7) != ASM_X64_REG_RBP) {
189 rm_disp = MODRM_RM_DISP0;
190 } else if (SIGNED_FIT8(disp_offset)) {
191 rm_disp = MODRM_RM_DISP8;
192 } else {
193 rm_disp = MODRM_RM_DISP32;
194 }
195 asm_x64_write_byte_1(as, MODRM_R64(r64) | rm_disp | MODRM_RM_R64(disp_r64));
196 if ((disp_r64 & 7) == ASM_X64_REG_RSP) {
197 // Special case for rsp and r12, they need a SIB byte
198 asm_x64_write_byte_1(as, 0x24);
199 }
200 if (rm_disp == MODRM_RM_DISP8) {
201 asm_x64_write_byte_1(as, IMM32_L0(disp_offset));
202 } else if (rm_disp == MODRM_RM_DISP32) {
203 asm_x64_write_word32(as, disp_offset);
204 }
205}
206
207STATIC void asm_x64_generic_r64_r64(asm_x64_t *as, int dest_r64, int src_r64, int op) {
208 asm_x64_write_byte_3(as, REX_PREFIX | REX_W | REX_R_FROM_R64(src_r64) | REX_B_FROM_R64(dest_r64), op, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
209}
210
211void asm_x64_nop(asm_x64_t *as) {
212 asm_x64_write_byte_1(as, OPCODE_NOP);
213}
214
215void asm_x64_push_r64(asm_x64_t *as, int src_r64) {
216 if (src_r64 < 8) {
217 asm_x64_write_byte_1(as, OPCODE_PUSH_R64 | src_r64);
218 } else {
219 asm_x64_write_byte_2(as, REX_PREFIX | REX_B, OPCODE_PUSH_R64 | (src_r64 & 7));
220 }
221}
222
223/*
224void asm_x64_push_i32(asm_x64_t *as, int src_i32) {
225 asm_x64_write_byte_1(as, OPCODE_PUSH_I64);
226 asm_x64_write_word32(as, src_i32); // will be sign extended to 64 bits
227}
228*/
229
230/*
231void asm_x64_push_disp(asm_x64_t *as, int src_r64, int src_offset) {
232 assert(src_r64 < 8);
233 asm_x64_write_byte_1(as, OPCODE_PUSH_M64);
234 asm_x64_write_r64_disp(as, 6, src_r64, src_offset);
235}
236*/
237
238void asm_x64_pop_r64(asm_x64_t *as, int dest_r64) {
239 if (dest_r64 < 8) {
240 asm_x64_write_byte_1(as, OPCODE_POP_R64 | dest_r64);
241 } else {
242 asm_x64_write_byte_2(as, REX_PREFIX | REX_B, OPCODE_POP_R64 | (dest_r64 & 7));
243 }
244}
245
246STATIC void asm_x64_ret(asm_x64_t *as) {
247 asm_x64_write_byte_1(as, OPCODE_RET);
248}
249
250void asm_x64_mov_r64_r64(asm_x64_t *as, int dest_r64, int src_r64) {
251 asm_x64_generic_r64_r64(as, dest_r64, src_r64, OPCODE_MOV_R64_TO_RM64);
252}
253
254void asm_x64_mov_r8_to_mem8(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp) {
255 if (src_r64 < 8 && dest_r64 < 8) {
256 asm_x64_write_byte_1(as, OPCODE_MOV_R8_TO_RM8);
257 } else {
258 asm_x64_write_byte_2(as, REX_PREFIX | REX_R_FROM_R64(src_r64) | REX_B_FROM_R64(dest_r64), OPCODE_MOV_R8_TO_RM8);
259 }
260 asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp);
261}
262
263void asm_x64_mov_r16_to_mem16(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp) {
264 if (src_r64 < 8 && dest_r64 < 8) {
265 asm_x64_write_byte_2(as, OP_SIZE_PREFIX, OPCODE_MOV_R64_TO_RM64);
266 } else {
267 asm_x64_write_byte_3(as, OP_SIZE_PREFIX, REX_PREFIX | REX_R_FROM_R64(src_r64) | REX_B_FROM_R64(dest_r64), OPCODE_MOV_R64_TO_RM64);
268 }
269 asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp);
270}
271
272void asm_x64_mov_r32_to_mem32(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp) {
273 if (src_r64 < 8 && dest_r64 < 8) {
274 asm_x64_write_byte_1(as, OPCODE_MOV_R64_TO_RM64);
275 } else {
276 asm_x64_write_byte_2(as, REX_PREFIX | REX_R_FROM_R64(src_r64) | REX_B_FROM_R64(dest_r64), OPCODE_MOV_R64_TO_RM64);
277 }
278 asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp);
279}
280
281void asm_x64_mov_r64_to_mem64(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp) {
282 // use REX prefix for 64 bit operation
283 asm_x64_write_byte_2(as, REX_PREFIX | REX_W | REX_R_FROM_R64(src_r64) | REX_B_FROM_R64(dest_r64), OPCODE_MOV_R64_TO_RM64);
284 asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp);
285}
286
287void asm_x64_mov_mem8_to_r64zx(asm_x64_t *as, int src_r64, int src_disp, int dest_r64) {
288 assert(src_r64 < 8);
289 if (dest_r64 < 8) {
290 asm_x64_write_byte_2(as, 0x0f, OPCODE_MOVZX_RM8_TO_R64);
291 } else {
292 asm_x64_write_byte_3(as, REX_PREFIX | REX_R, 0x0f, OPCODE_MOVZX_RM8_TO_R64);
293 }
294 asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
295}
296
297void asm_x64_mov_mem16_to_r64zx(asm_x64_t *as, int src_r64, int src_disp, int dest_r64) {
298 assert(src_r64 < 8);
299 if (dest_r64 < 8) {
300 asm_x64_write_byte_2(as, 0x0f, OPCODE_MOVZX_RM16_TO_R64);
301 } else {
302 asm_x64_write_byte_3(as, REX_PREFIX | REX_R, 0x0f, OPCODE_MOVZX_RM16_TO_R64);
303 }
304 asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
305}
306
307void asm_x64_mov_mem32_to_r64zx(asm_x64_t *as, int src_r64, int src_disp, int dest_r64) {
308 assert(src_r64 < 8);
309 if (dest_r64 < 8) {
310 asm_x64_write_byte_1(as, OPCODE_MOV_RM64_TO_R64);
311 } else {
312 asm_x64_write_byte_2(as, REX_PREFIX | REX_R, OPCODE_MOV_RM64_TO_R64);
313 }
314 asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
315}
316
317void asm_x64_mov_mem64_to_r64(asm_x64_t *as, int src_r64, int src_disp, int dest_r64) {
318 // use REX prefix for 64 bit operation
319 asm_x64_write_byte_2(as, REX_PREFIX | REX_W | REX_R_FROM_R64(dest_r64) | REX_B_FROM_R64(src_r64), OPCODE_MOV_RM64_TO_R64);
320 asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
321}
322
323STATIC void asm_x64_lea_disp_to_r64(asm_x64_t *as, int src_r64, int src_disp, int dest_r64) {
324 // use REX prefix for 64 bit operation
325 assert(src_r64 < 8);
326 assert(dest_r64 < 8);
327 asm_x64_write_byte_2(as, REX_PREFIX | REX_W, OPCODE_LEA_MEM_TO_R64);
328 asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
329}
330
331/*
332void asm_x64_mov_i8_to_r8(asm_x64_t *as, int src_i8, int dest_r64) {
333 assert(dest_r64 < 8);
334 asm_x64_write_byte_2(as, OPCODE_MOV_I8_TO_R8 | dest_r64, src_i8);
335}
336*/
337
338size_t asm_x64_mov_i32_to_r64(asm_x64_t *as, int src_i32, int dest_r64) {
339 // cpu defaults to i32 to r64, with zero extension
340 if (dest_r64 < 8) {
341 asm_x64_write_byte_1(as, OPCODE_MOV_I64_TO_R64 | dest_r64);
342 } else {
343 asm_x64_write_byte_2(as, REX_PREFIX | REX_B, OPCODE_MOV_I64_TO_R64 | (dest_r64 & 7));
344 }
345 size_t loc = mp_asm_base_get_code_pos(&as->base);
346 asm_x64_write_word32(as, src_i32);
347 return loc;
348}
349
350void asm_x64_mov_i64_to_r64(asm_x64_t *as, int64_t src_i64, int dest_r64) {
351 // cpu defaults to i32 to r64
352 // to mov i64 to r64 need to use REX prefix
353 asm_x64_write_byte_2(as,
354 REX_PREFIX | REX_W | (dest_r64 < 8 ? 0 : REX_B),
355 OPCODE_MOV_I64_TO_R64 | (dest_r64 & 7));
356 asm_x64_write_word64(as, src_i64);
357}
358
359void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64) {
360 // TODO use movzx, movsx if possible
361 if (UNSIGNED_FIT32(src_i64)) {
362 // 5 bytes
363 asm_x64_mov_i32_to_r64(as, src_i64 & 0xffffffff, dest_r64);
364 } else {
365 // 10 bytes
366 asm_x64_mov_i64_to_r64(as, src_i64, dest_r64);
367 }
368}
369
370void asm_x64_and_r64_r64(asm_x64_t *as, int dest_r64, int src_r64) {
371 asm_x64_generic_r64_r64(as, dest_r64, src_r64, OPCODE_AND_R64_TO_RM64);
372}
373
374void asm_x64_or_r64_r64(asm_x64_t *as, int dest_r64, int src_r64) {
375 asm_x64_generic_r64_r64(as, dest_r64, src_r64, OPCODE_OR_R64_TO_RM64);
376}
377
378void asm_x64_xor_r64_r64(asm_x64_t *as, int dest_r64, int src_r64) {
379 asm_x64_generic_r64_r64(as, dest_r64, src_r64, OPCODE_XOR_R64_TO_RM64);
380}
381
382void asm_x64_shl_r64_cl(asm_x64_t *as, int dest_r64) {
383 asm_x64_generic_r64_r64(as, dest_r64, 4, OPCODE_SHL_RM64_CL);
384}
385
386void asm_x64_shr_r64_cl(asm_x64_t *as, int dest_r64) {
387 asm_x64_generic_r64_r64(as, dest_r64, 5, OPCODE_SHR_RM64_CL);
388}
389
390void asm_x64_sar_r64_cl(asm_x64_t *as, int dest_r64) {
391 asm_x64_generic_r64_r64(as, dest_r64, 7, OPCODE_SAR_RM64_CL);
392}
393
394void asm_x64_add_r64_r64(asm_x64_t *as, int dest_r64, int src_r64) {
395 asm_x64_generic_r64_r64(as, dest_r64, src_r64, OPCODE_ADD_R64_TO_RM64);
396}
397
398void asm_x64_sub_r64_r64(asm_x64_t *as, int dest_r64, int src_r64) {
399 asm_x64_generic_r64_r64(as, dest_r64, src_r64, OPCODE_SUB_R64_FROM_RM64);
400}
401
402void asm_x64_mul_r64_r64(asm_x64_t *as, int dest_r64, int src_r64) {
403 // imul reg64, reg/mem64 -- 0x0f 0xaf /r
404 asm_x64_write_byte_1(as, REX_PREFIX | REX_W | REX_R_FROM_R64(dest_r64) | REX_B_FROM_R64(src_r64));
405 asm_x64_write_byte_3(as, 0x0f, 0xaf, MODRM_R64(dest_r64) | MODRM_RM_REG | MODRM_RM_R64(src_r64));
406}
407
408/*
409void asm_x64_sub_i32_from_r32(asm_x64_t *as, int src_i32, int dest_r32) {
410 if (SIGNED_FIT8(src_i32)) {
411 // defaults to 32 bit operation
412 asm_x64_write_byte_2(as, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
413 asm_x64_write_byte_1(as, src_i32 & 0xff);
414 } else {
415 // defaults to 32 bit operation
416 asm_x64_write_byte_2(as, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r32));
417 asm_x64_write_word32(as, src_i32);
418 }
419}
420*/
421
422STATIC void asm_x64_sub_r64_i32(asm_x64_t *as, int dest_r64, int src_i32) {
423 assert(dest_r64 < 8);
424 if (SIGNED_FIT8(src_i32)) {
425 // use REX prefix for 64 bit operation
426 asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I8_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
427 asm_x64_write_byte_1(as, src_i32 & 0xff);
428 } else {
429 // use REX prefix for 64 bit operation
430 asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_I32_FROM_RM64, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(dest_r64));
431 asm_x64_write_word32(as, src_i32);
432 }
433}
434
435/*
436void asm_x64_shl_r32_by_imm(asm_x64_t *as, int r32, int imm) {
437 asm_x64_write_byte_2(as, OPCODE_SHL_RM32_BY_I8, MODRM_R64(4) | MODRM_RM_REG | MODRM_RM_R64(r32));
438 asm_x64_write_byte_1(as, imm);
439}
440
441void asm_x64_shr_r32_by_imm(asm_x64_t *as, int r32, int imm) {
442 asm_x64_write_byte_2(as, OPCODE_SHR_RM32_BY_I8, MODRM_R64(5) | MODRM_RM_REG | MODRM_RM_R64(r32));
443 asm_x64_write_byte_1(as, imm);
444}
445
446void asm_x64_sar_r32_by_imm(asm_x64_t *as, int r32, int imm) {
447 asm_x64_write_byte_2(as, OPCODE_SAR_RM32_BY_I8, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(r32));
448 asm_x64_write_byte_1(as, imm);
449}
450*/
451
452void asm_x64_cmp_r64_with_r64(asm_x64_t *as, int src_r64_a, int src_r64_b) {
453 asm_x64_generic_r64_r64(as, src_r64_b, src_r64_a, OPCODE_CMP_R64_WITH_RM64);
454}
455
456/*
457void asm_x64_cmp_i32_with_r32(asm_x64_t *as, int src_i32, int src_r32) {
458 if (SIGNED_FIT8(src_i32)) {
459 asm_x64_write_byte_2(as, OPCODE_CMP_I8_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32));
460 asm_x64_write_byte_1(as, src_i32 & 0xff);
461 } else {
462 asm_x64_write_byte_2(as, OPCODE_CMP_I32_WITH_RM32, MODRM_R64(7) | MODRM_RM_REG | MODRM_RM_R64(src_r32));
463 asm_x64_write_word32(as, src_i32);
464 }
465}
466*/
467
468void asm_x64_test_r8_with_r8(asm_x64_t *as, int src_r64_a, int src_r64_b) {
469 assert(src_r64_a < 8);
470 assert(src_r64_b < 8);
471 asm_x64_write_byte_2(as, OPCODE_TEST_R8_WITH_RM8, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b));
472}
473
474void asm_x64_test_r64_with_r64(asm_x64_t *as, int src_r64_a, int src_r64_b) {
475 asm_x64_generic_r64_r64(as, src_r64_b, src_r64_a, OPCODE_TEST_R64_WITH_RM64);
476}
477
478void asm_x64_setcc_r8(asm_x64_t *as, int jcc_type, int dest_r8) {
479 assert(dest_r8 < 8);
480 asm_x64_write_byte_3(as, OPCODE_SETCC_RM8_A, OPCODE_SETCC_RM8_B | jcc_type, MODRM_R64(0) | MODRM_RM_REG | MODRM_RM_R64(dest_r8));
481}
482
483void asm_x64_jmp_reg(asm_x64_t *as, int src_r64) {
484 assert(src_r64 < 8);
485 asm_x64_write_byte_2(as, OPCODE_JMP_RM64, MODRM_R64(4) | MODRM_RM_REG | MODRM_RM_R64(src_r64));
486}
487
488STATIC mp_uint_t get_label_dest(asm_x64_t *as, mp_uint_t label) {
489 assert(label < as->base.max_num_labels);
490 return as->base.label_offsets[label];
491}
492
493void asm_x64_jmp_label(asm_x64_t *as, mp_uint_t label) {
494 mp_uint_t dest = get_label_dest(as, label);
495 mp_int_t rel = dest - as->base.code_offset;
496 if (dest != (mp_uint_t)-1 && rel < 0) {
497 // is a backwards jump, so we know the size of the jump on the first pass
498 // calculate rel assuming 8 bit relative jump
499 rel -= 2;
500 if (SIGNED_FIT8(rel)) {
501 asm_x64_write_byte_2(as, OPCODE_JMP_REL8, rel & 0xff);
502 } else {
503 rel += 2;
504 goto large_jump;
505 }
506 } else {
507 // is a forwards jump, so need to assume it's large
508 large_jump:
509 rel -= 5;
510 asm_x64_write_byte_1(as, OPCODE_JMP_REL32);
511 asm_x64_write_word32(as, rel);
512 }
513}
514
515void asm_x64_jcc_label(asm_x64_t *as, int jcc_type, mp_uint_t label) {
516 mp_uint_t dest = get_label_dest(as, label);
517 mp_int_t rel = dest - as->base.code_offset;
518 if (dest != (mp_uint_t)-1 && rel < 0) {
519 // is a backwards jump, so we know the size of the jump on the first pass
520 // calculate rel assuming 8 bit relative jump
521 rel -= 2;
522 if (SIGNED_FIT8(rel)) {
523 asm_x64_write_byte_2(as, OPCODE_JCC_REL8 | jcc_type, rel & 0xff);
524 } else {
525 rel += 2;
526 goto large_jump;
527 }
528 } else {
529 // is a forwards jump, so need to assume it's large
530 large_jump:
531 rel -= 6;
532 asm_x64_write_byte_2(as, OPCODE_JCC_REL32_A, OPCODE_JCC_REL32_B | jcc_type);
533 asm_x64_write_word32(as, rel);
534 }
535}
536
537void asm_x64_entry(asm_x64_t *as, int num_locals) {
538 assert(num_locals >= 0);
539 asm_x64_push_r64(as, ASM_X64_REG_RBP);
540 asm_x64_push_r64(as, ASM_X64_REG_RBX);
541 asm_x64_push_r64(as, ASM_X64_REG_R12);
542 asm_x64_push_r64(as, ASM_X64_REG_R13);
543 num_locals |= 1; // make it odd so stack is aligned on 16 byte boundary
544 asm_x64_sub_r64_i32(as, ASM_X64_REG_RSP, num_locals * WORD_SIZE);
545 as->num_locals = num_locals;
546}
547
548void asm_x64_exit(asm_x64_t *as) {
549 asm_x64_sub_r64_i32(as, ASM_X64_REG_RSP, -as->num_locals * WORD_SIZE);
550 asm_x64_pop_r64(as, ASM_X64_REG_R13);
551 asm_x64_pop_r64(as, ASM_X64_REG_R12);
552 asm_x64_pop_r64(as, ASM_X64_REG_RBX);
553 asm_x64_pop_r64(as, ASM_X64_REG_RBP);
554 asm_x64_ret(as);
555}
556
557// locals:
558// - stored on the stack in ascending order
559// - numbered 0 through as->num_locals-1
560// - RSP points to the first local
561//
562// | RSP
563// v
564// l0 l1 l2 ... l(n-1)
565// ^ ^
566// | low address | high address in RAM
567//
568STATIC int asm_x64_local_offset_from_rsp(asm_x64_t *as, int local_num) {
569 (void)as;
570 // Stack is full descending, RSP points to local0
571 return local_num * WORD_SIZE;
572}
573
574void asm_x64_mov_local_to_r64(asm_x64_t *as, int src_local_num, int dest_r64) {
575 asm_x64_mov_mem64_to_r64(as, ASM_X64_REG_RSP, asm_x64_local_offset_from_rsp(as, src_local_num), dest_r64);
576}
577
578void asm_x64_mov_r64_to_local(asm_x64_t *as, int src_r64, int dest_local_num) {
579 asm_x64_mov_r64_to_mem64(as, src_r64, ASM_X64_REG_RSP, asm_x64_local_offset_from_rsp(as, dest_local_num));
580}
581
582void asm_x64_mov_local_addr_to_r64(asm_x64_t *as, int local_num, int dest_r64) {
583 int offset = asm_x64_local_offset_from_rsp(as, local_num);
584 if (offset == 0) {
585 asm_x64_mov_r64_r64(as, dest_r64, ASM_X64_REG_RSP);
586 } else {
587 asm_x64_lea_disp_to_r64(as, ASM_X64_REG_RSP, offset, dest_r64);
588 }
589}
590
591void asm_x64_mov_reg_pcrel(asm_x64_t *as, int dest_r64, mp_uint_t label) {
592 mp_uint_t dest = get_label_dest(as, label);
593 mp_int_t rel = dest - (as->base.code_offset + 7);
594 asm_x64_write_byte_3(as, REX_PREFIX | REX_W | REX_R_FROM_R64(dest_r64), OPCODE_LEA_MEM_TO_R64, MODRM_R64(dest_r64) | MODRM_RM_R64(5));
595 asm_x64_write_word32(as, rel);
596}
597
598/*
599void asm_x64_push_local(asm_x64_t *as, int local_num) {
600 asm_x64_push_disp(as, ASM_X64_REG_RSP, asm_x64_local_offset_from_rsp(as, local_num));
601}
602
603void asm_x64_push_local_addr(asm_x64_t *as, int local_num, int temp_r64) {
604 asm_x64_mov_r64_r64(as, temp_r64, ASM_X64_REG_RSP);
605 asm_x64_add_i32_to_r32(as, asm_x64_local_offset_from_rsp(as, local_num), temp_r64);
606 asm_x64_push_r64(as, temp_r64);
607}
608*/
609
610/*
611 can't use these because code might be relocated when resized
612
613void asm_x64_call(asm_x64_t *as, void* func) {
614 asm_x64_sub_i32_from_r32(as, 8, ASM_X64_REG_RSP);
615 asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
616 asm_x64_write_word32(as, func - (void*)(as->code_cur + 4));
617 asm_x64_mov_r64_r64(as, ASM_X64_REG_RSP, ASM_X64_REG_RBP);
618}
619
620void asm_x64_call_i1(asm_x64_t *as, void* func, int i1) {
621 asm_x64_sub_i32_from_r32(as, 8, ASM_X64_REG_RSP);
622 asm_x64_sub_i32_from_r32(as, 12, ASM_X64_REG_RSP);
623 asm_x64_push_i32(as, i1);
624 asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
625 asm_x64_write_word32(as, func - (void*)(as->code_cur + 4));
626 asm_x64_add_i32_to_r32(as, 16, ASM_X64_REG_RSP);
627 asm_x64_mov_r64_r64(as, ASM_X64_REG_RSP, ASM_X64_REG_RBP);
628}
629*/
630
631void asm_x64_call_ind(asm_x64_t *as, size_t fun_id, int temp_r64) {
632 assert(temp_r64 < 8);
633 asm_x64_mov_mem64_to_r64(as, ASM_X64_REG_FUN_TABLE, fun_id * WORD_SIZE, temp_r64);
634 asm_x64_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R64(2) | MODRM_RM_REG | MODRM_RM_R64(temp_r64));
635}
636
637#endif // MICROPY_EMIT_X64
638