1/*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/* x86 64-bit arch dependent functions. */
28
29/* --------------------------------------------------------------------- */
30/* Operators */
31/* --------------------------------------------------------------------- */
32
33static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
34{
35 sljit_u8 *inst;
36
37 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
38 FAIL_IF(!inst);
39 INC_SIZE(2 + sizeof(sljit_sw));
40 *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
41 *inst++ = U8(MOV_r_i32 | (reg_map[reg] & 0x7));
42 sljit_unaligned_store_sw(inst, imm);
43 return SLJIT_SUCCESS;
44}
45
46static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
47{
48 sljit_u8 *inst;
49 sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32);
50
51 inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
52 FAIL_IF(!inst);
53 INC_SIZE(length);
54 if (rex)
55 *inst++ = rex;
56 *inst++ = opcode;
57 sljit_unaligned_store_s32(inst, (sljit_s32)imm);
58 return SLJIT_SUCCESS;
59}
60
61static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
62 /* The register or immediate operand. */
63 sljit_s32 a, sljit_sw imma,
64 /* The general operand (not immediate). */
65 sljit_s32 b, sljit_sw immb)
66{
67 sljit_u8 *inst;
68 sljit_u8 *buf_ptr;
69 sljit_u8 rex = 0;
70 sljit_u8 reg_lmap_b;
71 sljit_uw flags = size;
72 sljit_uw inst_size;
73
74 /* The immediate operand must be 32 bit. */
75 SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
76 /* Both cannot be switched on. */
77 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
78 /* Size flags not allowed for typed instructions. */
79 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
80 /* Both size flags cannot be switched on. */
81 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
82 /* SSE2 and immediate is not possible. */
83 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
84 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
85 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
86 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
87
88 size &= 0xf;
89 inst_size = size;
90
91 if (!compiler->mode32 && !(flags & EX86_NO_REXW))
92 rex |= REX_W;
93 else if (flags & EX86_REX)
94 rex |= REX;
95
96 if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
97 inst_size++;
98 if (flags & EX86_PREF_66)
99 inst_size++;
100
101 /* Calculate size of b. */
102 inst_size += 1; /* mod r/m byte. */
103 if (b & SLJIT_MEM) {
104 if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
105 PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
106 immb = 0;
107 if (b & REG_MASK)
108 b |= TO_OFFS_REG(TMP_REG2);
109 else
110 b |= TMP_REG2;
111 }
112
113 if (!(b & REG_MASK))
114 inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
115 else {
116 if (immb != 0 && !(b & OFFS_REG_MASK)) {
117 /* Immediate operand. */
118 if (immb <= 127 && immb >= -128)
119 inst_size += sizeof(sljit_s8);
120 else
121 inst_size += sizeof(sljit_s32);
122 }
123 else if (reg_lmap[b & REG_MASK] == 5) {
124 /* Swap registers if possible. */
125 if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)
126 b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
127 else
128 inst_size += sizeof(sljit_s8);
129 }
130
131 if (reg_map[b & REG_MASK] >= 8)
132 rex |= REX_B;
133
134 if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
135 b |= TO_OFFS_REG(SLJIT_SP);
136
137 if (b & OFFS_REG_MASK) {
138 inst_size += 1; /* SIB byte. */
139 if (reg_map[OFFS_REG(b)] >= 8)
140 rex |= REX_X;
141 }
142 }
143 }
144 else if (!(flags & EX86_SSE2_OP2)) {
145 if (reg_map[b] >= 8)
146 rex |= REX_B;
147 }
148 else if (freg_map[b] >= 8)
149 rex |= REX_B;
150
151 if (a & SLJIT_IMM) {
152 if (flags & EX86_BIN_INS) {
153 if (imma <= 127 && imma >= -128) {
154 inst_size += 1;
155 flags |= EX86_BYTE_ARG;
156 } else
157 inst_size += 4;
158 }
159 else if (flags & EX86_SHIFT_INS) {
160 SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f));
161 if (imma != 1) {
162 inst_size++;
163 flags |= EX86_BYTE_ARG;
164 }
165 } else if (flags & EX86_BYTE_ARG)
166 inst_size++;
167 else if (flags & EX86_HALF_ARG)
168 inst_size += sizeof(short);
169 else
170 inst_size += sizeof(sljit_s32);
171 }
172 else {
173 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
174 /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
175 if (!(flags & EX86_SSE2_OP1)) {
176 if (reg_map[a] >= 8)
177 rex |= REX_R;
178 }
179 else if (freg_map[a] >= 8)
180 rex |= REX_R;
181 }
182
183 if (rex)
184 inst_size++;
185
186 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
187 PTR_FAIL_IF(!inst);
188
189 /* Encoding the byte. */
190 INC_SIZE(inst_size);
191 if (flags & EX86_PREF_F2)
192 *inst++ = 0xf2;
193 if (flags & EX86_PREF_F3)
194 *inst++ = 0xf3;
195 if (flags & EX86_PREF_66)
196 *inst++ = 0x66;
197 if (rex)
198 *inst++ = rex;
199 buf_ptr = inst + size;
200
201 /* Encode mod/rm byte. */
202 if (!(flags & EX86_SHIFT_INS)) {
203 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
204 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
205
206 if (a & SLJIT_IMM)
207 *buf_ptr = 0;
208 else if (!(flags & EX86_SSE2_OP1))
209 *buf_ptr = U8(reg_lmap[a] << 3);
210 else
211 *buf_ptr = U8(freg_lmap[a] << 3);
212 }
213 else {
214 if (a & SLJIT_IMM) {
215 if (imma == 1)
216 *inst = GROUP_SHIFT_1;
217 else
218 *inst = GROUP_SHIFT_N;
219 } else
220 *inst = GROUP_SHIFT_CL;
221 *buf_ptr = 0;
222 }
223
224 if (!(b & SLJIT_MEM)) {
225 *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b]));
226 buf_ptr++;
227 } else if (b & REG_MASK) {
228 reg_lmap_b = reg_lmap[b & REG_MASK];
229
230 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
231 if (immb != 0 || reg_lmap_b == 5) {
232 if (immb <= 127 && immb >= -128)
233 *buf_ptr |= 0x40;
234 else
235 *buf_ptr |= 0x80;
236 }
237
238 if (!(b & OFFS_REG_MASK))
239 *buf_ptr++ |= reg_lmap_b;
240 else {
241 *buf_ptr++ |= 0x04;
242 *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
243 }
244
245 if (immb != 0 || reg_lmap_b == 5) {
246 if (immb <= 127 && immb >= -128)
247 *buf_ptr++ = U8(immb); /* 8 bit displacement. */
248 else {
249 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
250 buf_ptr += sizeof(sljit_s32);
251 }
252 }
253 }
254 else {
255 if (reg_lmap_b == 5)
256 *buf_ptr |= 0x40;
257
258 *buf_ptr++ |= 0x04;
259 *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
260
261 if (reg_lmap_b == 5)
262 *buf_ptr++ = 0;
263 }
264 }
265 else {
266 *buf_ptr++ |= 0x04;
267 *buf_ptr++ = 0x25;
268 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
269 buf_ptr += sizeof(sljit_s32);
270 }
271
272 if (a & SLJIT_IMM) {
273 if (flags & EX86_BYTE_ARG)
274 *buf_ptr = U8(imma);
275 else if (flags & EX86_HALF_ARG)
276 sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
277 else if (!(flags & EX86_SHIFT_INS))
278 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma);
279 }
280
281 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
282}
283
284/* --------------------------------------------------------------------- */
285/* Enter / return */
286/* --------------------------------------------------------------------- */
287
288static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
289{
290 sljit_uw type = jump->flags >> TYPE_SHIFT;
291
292 int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
293
294 /* The relative jump below specialized for this case. */
295 SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
296
297 if (type < SLJIT_JUMP) {
298 /* Invert type. */
299 *code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10);
300 *code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
301 }
302
303 *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
304 *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
305 jump->addr = (sljit_uw)code_ptr;
306
307 if (jump->flags & JUMP_LABEL)
308 jump->flags |= PATCH_MD;
309 else if (short_addr)
310 sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
311 else
312 sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target);
313
314 code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
315
316 *code_ptr++ = REX_B;
317 *code_ptr++ = GROUP_FF;
318 *code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]);
319
320 return code_ptr;
321}
322
323static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
324{
325 if (max_label > HALFWORD_MAX) {
326 put_label->addr -= put_label->flags;
327 put_label->flags = PATCH_MD;
328 return code_ptr;
329 }
330
331 if (put_label->flags == 0) {
332 /* Destination is register. */
333 code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
334
335 SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
336 SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
337
338 if ((code_ptr[0] & 0x07) != 0) {
339 code_ptr[0] = U8(code_ptr[0] & ~0x08);
340 code_ptr += 2 + sizeof(sljit_s32);
341 }
342 else {
343 code_ptr[0] = code_ptr[1];
344 code_ptr += 1 + sizeof(sljit_s32);
345 }
346
347 put_label->addr = (sljit_uw)code_ptr;
348 return code_ptr;
349 }
350
351 code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
352 SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
353
354 SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
355
356 if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
357 code_ptr += 2 + sizeof(sljit_uw);
358 SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
359 }
360
361 SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
362
363 code_ptr[0] = U8(code_ptr[0] & ~0x4);
364 code_ptr[1] = MOV_rm_i32;
365 code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3));
366
367 code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
368 put_label->addr = (sljit_uw)code_ptr;
369 put_label->flags = 0;
370 return code_ptr;
371}
372
373SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
374 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
375 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
376{
377 sljit_uw size;
378 sljit_s32 word_arg_count = 0;
379 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
380 sljit_s32 saved_regs_size, tmp, i;
381#ifdef _WIN64
382 sljit_s32 saved_float_regs_size;
383 sljit_s32 saved_float_regs_offset = 0;
384 sljit_s32 float_arg_count = 0;
385#endif /* _WIN64 */
386 sljit_u8 *inst;
387
388 CHECK_ERROR();
389 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
390 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
391
392 if (options & SLJIT_ENTER_REG_ARG)
393 arg_types = 0;
394
395 /* Emit ENDBR64 at function entry if needed. */
396 FAIL_IF(emit_endbranch(compiler));
397
398 compiler->mode32 = 0;
399
400 /* Including the return address saved by the call instruction. */
401 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
402
403 tmp = SLJIT_S0 - saveds;
404 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
405 size = reg_map[i] >= 8 ? 2 : 1;
406 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
407 FAIL_IF(!inst);
408 INC_SIZE(size);
409 if (reg_map[i] >= 8)
410 *inst++ = REX_B;
411 PUSH_REG(reg_lmap[i]);
412 }
413
414 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
415 size = reg_map[i] >= 8 ? 2 : 1;
416 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
417 FAIL_IF(!inst);
418 INC_SIZE(size);
419 if (reg_map[i] >= 8)
420 *inst++ = REX_B;
421 PUSH_REG(reg_lmap[i]);
422 }
423
424#ifdef _WIN64
425 local_size += SLJIT_LOCALS_OFFSET;
426 saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
427
428 if (saved_float_regs_size > 0) {
429 saved_float_regs_offset = ((local_size + 0xf) & ~0xf);
430 local_size = saved_float_regs_offset + saved_float_regs_size;
431 }
432#else /* !_WIN64 */
433 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
434#endif /* _WIN64 */
435
436 arg_types >>= SLJIT_ARG_SHIFT;
437
438 while (arg_types > 0) {
439 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
440 tmp = 0;
441#ifndef _WIN64
442 switch (word_arg_count) {
443 case 0:
444 tmp = SLJIT_R2;
445 break;
446 case 1:
447 tmp = SLJIT_R1;
448 break;
449 case 2:
450 tmp = TMP_REG1;
451 break;
452 default:
453 tmp = SLJIT_R3;
454 break;
455 }
456#else /* !_WIN64 */
457 switch (word_arg_count + float_arg_count) {
458 case 0:
459 tmp = SLJIT_R3;
460 break;
461 case 1:
462 tmp = SLJIT_R1;
463 break;
464 case 2:
465 tmp = SLJIT_R2;
466 break;
467 default:
468 tmp = TMP_REG1;
469 break;
470 }
471#endif /* _WIN64 */
472 if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
473 if (tmp != SLJIT_R0 + word_arg_count)
474 EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0);
475 } else {
476 EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0);
477 saved_arg_count++;
478 }
479 word_arg_count++;
480 } else {
481#ifdef _WIN64
482 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
483 float_arg_count++;
484 if (float_arg_count != float_arg_count + word_arg_count)
485 FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32,
486 float_arg_count, float_arg_count + word_arg_count, 0));
487#endif /* _WIN64 */
488 }
489 arg_types >>= SLJIT_ARG_SHIFT;
490 }
491
492 local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
493 compiler->local_size = local_size;
494
495#ifdef _WIN64
496 if (local_size > 0) {
497 if (local_size <= 4 * 4096) {
498 if (local_size > 4096)
499 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
500 if (local_size > 2 * 4096)
501 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
502 if (local_size > 3 * 4096)
503 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
504 }
505 else {
506 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12);
507
508 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096);
509 BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
510 BINARY_IMM32(SUB, 1, TMP_REG1, 0);
511
512 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
513 FAIL_IF(!inst);
514
515 INC_SIZE(2);
516 inst[0] = JNE_i8;
517 inst[1] = (sljit_u8)-21;
518 local_size &= 0xfff;
519 }
520
521 if (local_size > 0)
522 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
523 }
524#endif /* _WIN64 */
525
526 if (local_size > 0)
527 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
528
529#ifdef _WIN64
530 if (saved_float_regs_size > 0) {
531 compiler->mode32 = 1;
532
533 tmp = SLJIT_FS0 - fsaveds;
534 for (i = SLJIT_FS0; i > tmp; i--) {
535 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
536 *inst++ = GROUP_0F;
537 *inst = MOVAPS_xm_x;
538 saved_float_regs_offset += 16;
539 }
540
541 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
542 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
543 *inst++ = GROUP_0F;
544 *inst = MOVAPS_xm_x;
545 saved_float_regs_offset += 16;
546 }
547 }
548#endif /* _WIN64 */
549
550 return SLJIT_SUCCESS;
551}
552
553SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
554 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
555 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
556{
557 sljit_s32 saved_regs_size;
558#ifdef _WIN64
559 sljit_s32 saved_float_regs_size;
560#endif /* _WIN64 */
561
562 CHECK_ERROR();
563 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
564 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
565
566#ifdef _WIN64
567 local_size += SLJIT_LOCALS_OFFSET;
568 saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
569
570 if (saved_float_regs_size > 0)
571 local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size;
572#else /* !_WIN64 */
573 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
574#endif /* _WIN64 */
575
576 /* Including the return address saved by the call instruction. */
577 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
578 compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
579 return SLJIT_SUCCESS;
580}
581
582static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
583{
584 sljit_uw size;
585 sljit_s32 local_size, i, tmp;
586 sljit_u8 *inst;
587#ifdef _WIN64
588 sljit_s32 saved_float_regs_offset;
589 sljit_s32 fscratches = compiler->fscratches;
590 sljit_s32 fsaveds = compiler->fsaveds;
591#endif /* _WIN64 */
592
593#ifdef _WIN64
594 saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
595
596 if (saved_float_regs_offset > 0) {
597 compiler->mode32 = 1;
598 saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf;
599
600 tmp = SLJIT_FS0 - fsaveds;
601 for (i = SLJIT_FS0; i > tmp; i--) {
602 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
603 *inst++ = GROUP_0F;
604 *inst = MOVAPS_x_xm;
605 saved_float_regs_offset += 16;
606 }
607
608 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
609 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
610 *inst++ = GROUP_0F;
611 *inst = MOVAPS_x_xm;
612 saved_float_regs_offset += 16;
613 }
614
615 compiler->mode32 = 0;
616 }
617#endif /* _WIN64 */
618
619 local_size = compiler->local_size;
620
621 if (is_return_to && compiler->scratches < SLJIT_FIRST_SAVED_REG && (compiler->saveds == SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
622 local_size += SSIZE_OF(sw);
623 is_return_to = 0;
624 }
625
626 if (local_size > 0)
627 BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);
628
629 tmp = compiler->scratches;
630 for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
631 size = reg_map[i] >= 8 ? 2 : 1;
632 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
633 FAIL_IF(!inst);
634 INC_SIZE(size);
635 if (reg_map[i] >= 8)
636 *inst++ = REX_B;
637 POP_REG(reg_lmap[i]);
638 }
639
640 tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
641 for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) {
642 size = reg_map[i] >= 8 ? 2 : 1;
643 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
644 FAIL_IF(!inst);
645 INC_SIZE(size);
646 if (reg_map[i] >= 8)
647 *inst++ = REX_B;
648 POP_REG(reg_lmap[i]);
649 }
650
651 if (is_return_to)
652 BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);
653
654 return SLJIT_SUCCESS;
655}
656
657SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
658{
659 sljit_u8 *inst;
660
661 CHECK_ERROR();
662 CHECK(check_sljit_emit_return_void(compiler));
663
664 compiler->mode32 = 0;
665
666 FAIL_IF(emit_stack_frame_release(compiler, 0));
667
668 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
669 FAIL_IF(!inst);
670 INC_SIZE(1);
671 RET();
672 return SLJIT_SUCCESS;
673}
674
675SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
676 sljit_s32 src, sljit_sw srcw)
677{
678 CHECK_ERROR();
679 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
680
681 compiler->mode32 = 0;
682
683 if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
684 ADJUST_LOCAL_OFFSET(src, srcw);
685
686 EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
687 src = TMP_REG2;
688 srcw = 0;
689 }
690
691 FAIL_IF(emit_stack_frame_release(compiler, 1));
692
693 SLJIT_SKIP_CHECKS(compiler);
694 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
695}
696
697/* --------------------------------------------------------------------- */
698/* Call / return instructions */
699/* --------------------------------------------------------------------- */
700
701#ifndef _WIN64
702
703static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
704{
705 sljit_s32 src = src_ptr ? (*src_ptr) : 0;
706 sljit_s32 word_arg_count = 0;
707
708 SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
709 SLJIT_ASSERT(!(src & SLJIT_MEM));
710
711 /* Remove return value. */
712 arg_types >>= SLJIT_ARG_SHIFT;
713
714 while (arg_types) {
715 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
716 word_arg_count++;
717 arg_types >>= SLJIT_ARG_SHIFT;
718 }
719
720 if (word_arg_count == 0)
721 return SLJIT_SUCCESS;
722
723 if (word_arg_count >= 3) {
724 if (src == SLJIT_R2)
725 *src_ptr = TMP_REG1;
726 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
727 }
728
729 return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
730}
731
732#else
733
734static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
735{
736 sljit_s32 src = src_ptr ? (*src_ptr) : 0;
737 sljit_s32 arg_count = 0;
738 sljit_s32 word_arg_count = 0;
739 sljit_s32 float_arg_count = 0;
740 sljit_s32 types = 0;
741 sljit_s32 data_trandfer = 0;
742 static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
743
744 SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
745 SLJIT_ASSERT(!(src & SLJIT_MEM));
746
747 arg_types >>= SLJIT_ARG_SHIFT;
748
749 while (arg_types) {
750 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
751
752 switch (arg_types & SLJIT_ARG_MASK) {
753 case SLJIT_ARG_TYPE_F64:
754 case SLJIT_ARG_TYPE_F32:
755 arg_count++;
756 float_arg_count++;
757
758 if (arg_count != float_arg_count)
759 data_trandfer = 1;
760 break;
761 default:
762 arg_count++;
763 word_arg_count++;
764
765 if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
766 data_trandfer = 1;
767
768 if (src == word_arg_regs[arg_count]) {
769 EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
770 *src_ptr = TMP_REG2;
771 }
772 }
773 break;
774 }
775
776 arg_types >>= SLJIT_ARG_SHIFT;
777 }
778
779 if (!data_trandfer)
780 return SLJIT_SUCCESS;
781
782 while (types) {
783 switch (types & SLJIT_ARG_MASK) {
784 case SLJIT_ARG_TYPE_F64:
785 if (arg_count != float_arg_count)
786 FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
787 arg_count--;
788 float_arg_count--;
789 break;
790 case SLJIT_ARG_TYPE_F32:
791 if (arg_count != float_arg_count)
792 FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
793 arg_count--;
794 float_arg_count--;
795 break;
796 default:
797 if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
798 EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
799 arg_count--;
800 word_arg_count--;
801 break;
802 }
803
804 types >>= SLJIT_ARG_SHIFT;
805 }
806
807 return SLJIT_SUCCESS;
808}
809
810#endif
811
812SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
813 sljit_s32 arg_types)
814{
815 CHECK_ERROR_PTR();
816 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
817
818 compiler->mode32 = 0;
819
820 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
821 PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
822
823 if (type & SLJIT_CALL_RETURN) {
824 PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
825 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
826 }
827
828 SLJIT_SKIP_CHECKS(compiler);
829 return sljit_emit_jump(compiler, type);
830}
831
832SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
833 sljit_s32 arg_types,
834 sljit_s32 src, sljit_sw srcw)
835{
836 CHECK_ERROR();
837 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
838
839 compiler->mode32 = 0;
840
841 if (src & SLJIT_MEM) {
842 ADJUST_LOCAL_OFFSET(src, srcw);
843 EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
844 src = TMP_REG2;
845 }
846
847 if (type & SLJIT_CALL_RETURN) {
848 if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
849 EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
850 src = TMP_REG2;
851 }
852
853 FAIL_IF(emit_stack_frame_release(compiler, 0));
854 }
855
856 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
857 FAIL_IF(call_with_args(compiler, arg_types, &src));
858
859 if (type & SLJIT_CALL_RETURN)
860 type = SLJIT_JUMP;
861
862 SLJIT_SKIP_CHECKS(compiler);
863 return sljit_emit_ijump(compiler, type, src, srcw);
864}
865
866SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
867{
868 sljit_u8 *inst;
869
870 CHECK_ERROR();
871 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
872 ADJUST_LOCAL_OFFSET(dst, dstw);
873
874 if (FAST_IS_REG(dst)) {
875 if (reg_map[dst] < 8) {
876 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
877 FAIL_IF(!inst);
878 INC_SIZE(1);
879 POP_REG(reg_lmap[dst]);
880 return SLJIT_SUCCESS;
881 }
882
883 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
884 FAIL_IF(!inst);
885 INC_SIZE(2);
886 *inst++ = REX_B;
887 POP_REG(reg_lmap[dst]);
888 return SLJIT_SUCCESS;
889 }
890
891 /* REX_W is not necessary (src is not immediate). */
892 compiler->mode32 = 1;
893 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
894 FAIL_IF(!inst);
895 *inst++ = POP_rm;
896 return SLJIT_SUCCESS;
897}
898
899static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
900{
901 sljit_u8 *inst;
902
903 if (FAST_IS_REG(src)) {
904 if (reg_map[src] < 8) {
905 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
906 FAIL_IF(!inst);
907
908 INC_SIZE(1 + 1);
909 PUSH_REG(reg_lmap[src]);
910 }
911 else {
912 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
913 FAIL_IF(!inst);
914
915 INC_SIZE(2 + 1);
916 *inst++ = REX_B;
917 PUSH_REG(reg_lmap[src]);
918 }
919 }
920 else {
921 /* REX_W is not necessary (src is not immediate). */
922 compiler->mode32 = 1;
923 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
924 FAIL_IF(!inst);
925 *inst++ = GROUP_FF;
926 *inst |= PUSH_rm;
927
928 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
929 FAIL_IF(!inst);
930 INC_SIZE(1);
931 }
932
933 RET();
934 return SLJIT_SUCCESS;
935}
936
937/* --------------------------------------------------------------------- */
938/* Other operations */
939/* --------------------------------------------------------------------- */
940
941SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
942 sljit_s32 reg,
943 sljit_s32 mem, sljit_sw memw)
944{
945 sljit_u8* inst;
946 sljit_s32 i, next, reg_idx;
947 sljit_u8 regs[2];
948
949 CHECK_ERROR();
950 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
951
952 if (!(reg & REG_PAIR_MASK))
953 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
954
955 ADJUST_LOCAL_OFFSET(mem, memw);
956
957 compiler->mode32 = 0;
958
959 if ((mem & REG_MASK) == 0) {
960 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
961
962 mem = SLJIT_MEM1(TMP_REG1);
963 memw = 0;
964 } else if (!(mem & OFFS_REG_MASK) && ((memw < HALFWORD_MIN) || (memw > HALFWORD_MAX - SSIZE_OF(sw)))) {
965 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
966
967 mem = SLJIT_MEM2(mem & REG_MASK, TMP_REG1);
968 memw = 0;
969 }
970
971 regs[0] = U8(REG_PAIR_FIRST(reg));
972 regs[1] = U8(REG_PAIR_SECOND(reg));
973
974 next = SSIZE_OF(sw);
975
976 if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {
977 if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {
978 /* Base and offset cannot be TMP_REG1. */
979 EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);
980
981 if (regs[1] == OFFS_REG(mem))
982 next = -SSIZE_OF(sw);
983
984 mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
985 } else {
986 next = -SSIZE_OF(sw);
987
988 if (!(mem & OFFS_REG_MASK))
989 memw += SSIZE_OF(sw);
990 }
991 }
992
993 for (i = 0; i < 2; i++) {
994 reg_idx = next > 0 ? i : (i ^ 0x1);
995 reg = regs[reg_idx];
996
997 if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {
998 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 5));
999 FAIL_IF(!inst);
1000
1001 INC_SIZE(5);
1002
1003 inst[0] = U8(REX_W | ((reg_map[reg] >= 8) ? REX_R : 0) | ((reg_map[mem & REG_MASK] >= 8) ? REX_B : 0) | ((reg_map[OFFS_REG(mem)] >= 8) ? REX_X : 0));
1004 inst[1] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;
1005 inst[2] = 0x44 | U8(reg_lmap[reg] << 3);
1006 inst[3] = U8(memw << 6) | U8(reg_lmap[OFFS_REG(mem)] << 3) | reg_lmap[mem & REG_MASK];
1007 inst[4] = sizeof(sljit_sw);
1008 } else if (type & SLJIT_MEM_STORE) {
1009 EMIT_MOV(compiler, mem, memw, reg, 0);
1010 } else {
1011 EMIT_MOV(compiler, reg, 0, mem, memw);
1012 }
1013
1014 if (!(mem & OFFS_REG_MASK))
1015 memw += next;
1016 }
1017
1018 return SLJIT_SUCCESS;
1019}
1020
1021static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
1022 sljit_s32 dst, sljit_sw dstw,
1023 sljit_s32 src, sljit_sw srcw)
1024{
1025 sljit_u8* inst;
1026 sljit_s32 dst_r;
1027
1028 compiler->mode32 = 0;
1029
1030 if (src & SLJIT_IMM) {
1031 if (FAST_IS_REG(dst)) {
1032 if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
1033 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
1034 FAIL_IF(!inst);
1035 *inst = MOV_rm_i32;
1036 return SLJIT_SUCCESS;
1037 }
1038 return emit_load_imm64(compiler, dst, srcw);
1039 }
1040 compiler->mode32 = 1;
1041 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
1042 FAIL_IF(!inst);
1043 *inst = MOV_rm_i32;
1044 compiler->mode32 = 0;
1045 return SLJIT_SUCCESS;
1046 }
1047
1048 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1049
1050 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1051 dst_r = src;
1052 else {
1053 if (sign) {
1054 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
1055 FAIL_IF(!inst);
1056 *inst++ = MOVSXD_r_rm;
1057 } else {
1058 compiler->mode32 = 1;
1059 FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
1060 compiler->mode32 = 0;
1061 }
1062 }
1063
1064 if (dst & SLJIT_MEM) {
1065 compiler->mode32 = 1;
1066 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1067 FAIL_IF(!inst);
1068 *inst = MOV_rm_r;
1069 compiler->mode32 = 0;
1070 }
1071
1072 return SLJIT_SUCCESS;
1073}
1074
1075static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1076{
1077 sljit_s32 tmp, size;
1078
1079 /* Don't adjust shadow stack if it isn't enabled. */
1080 if (!cpu_has_shadow_stack())
1081 return SLJIT_SUCCESS;
1082
1083 size = compiler->local_size;
1084 tmp = compiler->scratches;
1085 if (tmp >= SLJIT_FIRST_SAVED_REG)
1086 size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw);
1087 tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
1088 if (SLJIT_S0 >= tmp)
1089 size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw);
1090
1091 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
1092}
1093