1 | /* |
2 | * Stack-less Just-In-Time compiler |
3 | * |
4 | * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without modification, are |
7 | * permitted provided that the following conditions are met: |
8 | * |
9 | * 1. Redistributions of source code must retain the above copyright notice, this list of |
10 | * conditions and the following disclaimer. |
11 | * |
12 | * 2. Redistributions in binary form must reproduce the above copyright notice, this list |
13 | * of conditions and the following disclaimer in the documentation and/or other materials |
14 | * provided with the distribution. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY |
17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
18 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT |
19 | * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED |
21 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
22 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
23 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
24 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | */ |
26 | |
27 | SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) |
28 | { |
29 | return "x86" SLJIT_CPUINFO; |
30 | } |
31 | |
32 | /* |
33 | 32b register indexes: |
34 | 0 - EAX |
35 | 1 - ECX |
36 | 2 - EDX |
37 | 3 - EBX |
38 | 4 - ESP |
39 | 5 - EBP |
40 | 6 - ESI |
41 | 7 - EDI |
42 | */ |
43 | |
44 | /* |
45 | 64b register indexes: |
46 | 0 - RAX |
47 | 1 - RCX |
48 | 2 - RDX |
49 | 3 - RBX |
50 | 4 - RSP |
51 | 5 - RBP |
52 | 6 - RSI |
53 | 7 - RDI |
54 | 8 - R8 - From now on REX prefix is required |
55 | 9 - R9 |
56 | 10 - R10 |
57 | 11 - R11 |
58 | 12 - R12 |
59 | 13 - R13 |
60 | 14 - R14 |
61 | 15 - R15 |
62 | */ |
63 | |
64 | #define TMP_FREG (0) |
65 | |
66 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
67 | |
68 | /* Last register + 1. */ |
69 | #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) |
70 | |
71 | static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { |
72 | 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5 |
73 | }; |
74 | |
75 | #define CHECK_EXTRA_REGS(p, w, do) \ |
76 | if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ |
77 | w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \ |
78 | p = SLJIT_MEM1(SLJIT_SP); \ |
79 | do; \ |
80 | } |
81 | |
82 | #else /* SLJIT_CONFIG_X86_32 */ |
83 | |
84 | /* Last register + 1. */ |
85 | #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) |
86 | #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) |
87 | |
88 | /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present |
89 | Note: avoid to use r12 and r13 for memory addessing |
90 | therefore r12 is better to be a higher saved register. */ |
91 | #ifndef _WIN64 |
92 | /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */ |
93 | static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { |
94 | 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9 |
95 | }; |
96 | /* low-map. reg_map & 0x7. */ |
97 | static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { |
98 | 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1 |
99 | }; |
100 | #else |
101 | /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */ |
102 | static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { |
103 | 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10 |
104 | }; |
105 | /* low-map. reg_map & 0x7. */ |
106 | static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { |
107 | 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2 |
108 | }; |
109 | #endif |
110 | |
111 | /* Args: xmm0-xmm3 */ |
112 | static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { |
113 | 4, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 |
114 | }; |
115 | /* low-map. freg_map & 0x7. */ |
116 | static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { |
117 | 4, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 |
118 | }; |
119 | |
120 | #define REX_W 0x48 |
121 | #define REX_R 0x44 |
122 | #define REX_X 0x42 |
123 | #define REX_B 0x41 |
124 | #define REX 0x40 |
125 | |
126 | #ifndef _WIN64 |
127 | #define HALFWORD_MAX 0x7fffffffl |
128 | #define HALFWORD_MIN -0x80000000l |
129 | #else |
130 | #define HALFWORD_MAX 0x7fffffffll |
131 | #define HALFWORD_MIN -0x80000000ll |
132 | #endif |
133 | |
134 | #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) |
135 | #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) |
136 | |
137 | #define (p, w, do) |
138 | |
139 | #endif /* SLJIT_CONFIG_X86_32 */ |
140 | |
141 | #define U8(v) ((sljit_u8)(v)) |
142 | |
143 | |
144 | /* Size flags for emit_x86_instruction: */ |
145 | #define EX86_BIN_INS 0x0010 |
146 | #define EX86_SHIFT_INS 0x0020 |
147 | #define EX86_REX 0x0040 |
148 | #define EX86_NO_REXW 0x0080 |
149 | #define EX86_BYTE_ARG 0x0100 |
150 | #define EX86_HALF_ARG 0x0200 |
151 | #define EX86_PREF_66 0x0400 |
152 | #define EX86_PREF_F2 0x0800 |
153 | #define EX86_PREF_F3 0x1000 |
154 | #define EX86_SSE2_OP1 0x2000 |
155 | #define EX86_SSE2_OP2 0x4000 |
156 | #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) |
157 | |
158 | /* --------------------------------------------------------------------- */ |
159 | /* Instrucion forms */ |
160 | /* --------------------------------------------------------------------- */ |
161 | |
162 | #define ADD (/* BINARY */ 0 << 3) |
163 | #define ADD_EAX_i32 0x05 |
164 | #define ADD_r_rm 0x03 |
165 | #define ADD_rm_r 0x01 |
166 | #define ADDSD_x_xm 0x58 |
167 | #define ADC (/* BINARY */ 2 << 3) |
168 | #define ADC_EAX_i32 0x15 |
169 | #define ADC_r_rm 0x13 |
170 | #define ADC_rm_r 0x11 |
171 | #define AND (/* BINARY */ 4 << 3) |
172 | #define AND_EAX_i32 0x25 |
173 | #define AND_r_rm 0x23 |
174 | #define AND_rm_r 0x21 |
175 | #define ANDPD_x_xm 0x54 |
176 | #define BSR_r_rm (/* GROUP_0F */ 0xbd) |
177 | #define BSF_r_rm (/* GROUP_0F */ 0xbc) |
178 | #define CALL_i32 0xe8 |
179 | #define CALL_rm (/* GROUP_FF */ 2 << 3) |
180 | #define CDQ 0x99 |
181 | #define CMOVE_r_rm (/* GROUP_0F */ 0x44) |
182 | #define CMP (/* BINARY */ 7 << 3) |
183 | #define CMP_EAX_i32 0x3d |
184 | #define CMP_r_rm 0x3b |
185 | #define CMP_rm_r 0x39 |
186 | #define CVTPD2PS_x_xm 0x5a |
187 | #define CVTSI2SD_x_rm 0x2a |
188 | #define CVTTSD2SI_r_xm 0x2c |
189 | #define DIV (/* GROUP_F7 */ 6 << 3) |
190 | #define DIVSD_x_xm 0x5e |
191 | #define FLDS 0xd9 |
192 | #define FLDL 0xdd |
193 | #define FSTPS 0xd9 |
194 | #define FSTPD 0xdd |
195 | #define INT3 0xcc |
196 | #define IDIV (/* GROUP_F7 */ 7 << 3) |
197 | #define IMUL (/* GROUP_F7 */ 5 << 3) |
198 | #define IMUL_r_rm (/* GROUP_0F */ 0xaf) |
199 | #define IMUL_r_rm_i8 0x6b |
200 | #define IMUL_r_rm_i32 0x69 |
201 | #define JE_i8 0x74 |
202 | #define JNE_i8 0x75 |
203 | #define JMP_i8 0xeb |
204 | #define JMP_i32 0xe9 |
205 | #define JMP_rm (/* GROUP_FF */ 4 << 3) |
206 | #define LEA_r_m 0x8d |
207 | #define LOOP_i8 0xe2 |
208 | #define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd) |
209 | #define MOV_r_rm 0x8b |
210 | #define MOV_r_i32 0xb8 |
211 | #define MOV_rm_r 0x89 |
212 | #define MOV_rm_i32 0xc7 |
213 | #define MOV_rm8_i8 0xc6 |
214 | #define MOV_rm8_r8 0x88 |
215 | #define MOVAPS_x_xm 0x28 |
216 | #define MOVAPS_xm_x 0x29 |
217 | #define MOVSD_x_xm 0x10 |
218 | #define MOVSD_xm_x 0x11 |
219 | #define MOVSXD_r_rm 0x63 |
220 | #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) |
221 | #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) |
222 | #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) |
223 | #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) |
224 | #define MUL (/* GROUP_F7 */ 4 << 3) |
225 | #define MULSD_x_xm 0x59 |
226 | #define NEG_rm (/* GROUP_F7 */ 3 << 3) |
227 | #define NOP 0x90 |
228 | #define NOT_rm (/* GROUP_F7 */ 2 << 3) |
229 | #define OR (/* BINARY */ 1 << 3) |
230 | #define OR_r_rm 0x0b |
231 | #define OR_EAX_i32 0x0d |
232 | #define OR_rm_r 0x09 |
233 | #define OR_rm8_r8 0x08 |
234 | #define POP_r 0x58 |
235 | #define POP_rm 0x8f |
236 | #define POPF 0x9d |
237 | #define PREFETCH 0x18 |
238 | #define PUSH_i32 0x68 |
239 | #define PUSH_r 0x50 |
240 | #define PUSH_rm (/* GROUP_FF */ 6 << 3) |
241 | #define PUSHF 0x9c |
242 | #define ROL (/* SHIFT */ 0 << 3) |
243 | #define ROR (/* SHIFT */ 1 << 3) |
244 | #define RET_near 0xc3 |
245 | #define RET_i16 0xc2 |
246 | #define SBB (/* BINARY */ 3 << 3) |
247 | #define SBB_EAX_i32 0x1d |
248 | #define SBB_r_rm 0x1b |
249 | #define SBB_rm_r 0x19 |
250 | #define SAR (/* SHIFT */ 7 << 3) |
251 | #define SHL (/* SHIFT */ 4 << 3) |
252 | #define SHLD (/* GROUP_0F */ 0xa5) |
253 | #define SHRD (/* GROUP_0F */ 0xad) |
254 | #define SHR (/* SHIFT */ 5 << 3) |
255 | #define SUB (/* BINARY */ 5 << 3) |
256 | #define SUB_EAX_i32 0x2d |
257 | #define SUB_r_rm 0x2b |
258 | #define SUB_rm_r 0x29 |
259 | #define SUBSD_x_xm 0x5c |
260 | #define TEST_EAX_i32 0xa9 |
261 | #define TEST_rm_r 0x85 |
262 | #define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc) |
263 | #define UCOMISD_x_xm 0x2e |
264 | #define UNPCKLPD_x_xm 0x14 |
265 | #define XCHG_EAX_r 0x90 |
266 | #define XCHG_r_rm 0x87 |
267 | #define XOR (/* BINARY */ 6 << 3) |
268 | #define XOR_EAX_i32 0x35 |
269 | #define XOR_r_rm 0x33 |
270 | #define XOR_rm_r 0x31 |
271 | #define XORPD_x_xm 0x57 |
272 | |
273 | #define GROUP_0F 0x0f |
274 | #define GROUP_F3 0xf3 |
275 | #define GROUP_F7 0xf7 |
276 | #define GROUP_FF 0xff |
277 | #define GROUP_BINARY_81 0x81 |
278 | #define GROUP_BINARY_83 0x83 |
279 | #define GROUP_SHIFT_1 0xd1 |
280 | #define GROUP_SHIFT_N 0xc1 |
281 | #define GROUP_SHIFT_CL 0xd3 |
282 | |
283 | #define MOD_REG 0xc0 |
284 | #define MOD_DISP8 0x40 |
285 | |
286 | #define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s)) |
287 | |
288 | #define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r))) |
289 | #define POP_REG(r) (*inst++ = U8(POP_r + (r))) |
290 | #define RET() (*inst++ = RET_near) |
291 | #define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0) |
292 | |
293 | /* Multithreading does not affect these static variables, since they store |
294 | built-in CPU features. Therefore they can be overwritten by different threads |
295 | if they detect the CPU features in the same time. */ |
296 | #define CPU_FEATURE_DETECTED 0x001 |
297 | #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) |
298 | #define CPU_FEATURE_SSE2 0x002 |
299 | #endif |
300 | #define CPU_FEATURE_LZCNT 0x004 |
301 | #define CPU_FEATURE_TZCNT 0x008 |
302 | #define CPU_FEATURE_CMOV 0x010 |
303 | |
304 | static sljit_u32 cpu_feature_list = 0; |
305 | |
306 | #ifdef _WIN32_WCE |
307 | #include <cmnintrin.h> |
308 | #elif defined(_MSC_VER) && _MSC_VER >= 1400 |
309 | #include <intrin.h> |
310 | #endif |
311 | |
312 | /******************************************************/ |
313 | /* Unaligned-store functions */ |
314 | /******************************************************/ |
315 | |
316 | static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value) |
317 | { |
318 | SLJIT_MEMCPY(addr, &value, sizeof(value)); |
319 | } |
320 | |
321 | static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value) |
322 | { |
323 | SLJIT_MEMCPY(addr, &value, sizeof(value)); |
324 | } |
325 | |
326 | static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) |
327 | { |
328 | SLJIT_MEMCPY(addr, &value, sizeof(value)); |
329 | } |
330 | |
331 | /******************************************************/ |
332 | /* Utility functions */ |
333 | /******************************************************/ |
334 | |
335 | static void get_cpu_features(void) |
336 | { |
337 | sljit_u32 feature_list = CPU_FEATURE_DETECTED; |
338 | sljit_u32 value; |
339 | |
340 | #if defined(_MSC_VER) && _MSC_VER >= 1400 |
341 | |
342 | int CPUInfo[4]; |
343 | |
344 | __cpuid(CPUInfo, 0); |
345 | if (CPUInfo[0] >= 7) { |
346 | __cpuidex(CPUInfo, 7, 0); |
347 | if (CPUInfo[1] & 0x8) |
348 | feature_list |= CPU_FEATURE_TZCNT; |
349 | } |
350 | |
351 | __cpuid(CPUInfo, (int)0x80000001); |
352 | if (CPUInfo[2] & 0x20) |
353 | feature_list |= CPU_FEATURE_LZCNT; |
354 | |
355 | __cpuid(CPUInfo, 1); |
356 | value = (sljit_u32)CPUInfo[3]; |
357 | |
358 | #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) |
359 | |
360 | /* AT&T syntax. */ |
361 | __asm__ ( |
362 | "movl $0x0, %%eax\n" |
363 | "lzcnt %%eax, %%eax\n" |
364 | "setnz %%al\n" |
365 | "movl %%eax, %0\n" |
366 | : "=g" (value) |
367 | : |
368 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
369 | : "eax" |
370 | #else |
371 | : "rax" |
372 | #endif |
373 | ); |
374 | |
375 | if (value & 0x1) |
376 | feature_list |= CPU_FEATURE_LZCNT; |
377 | |
378 | __asm__ ( |
379 | "movl $0x0, %%eax\n" |
380 | "tzcnt %%eax, %%eax\n" |
381 | "setnz %%al\n" |
382 | "movl %%eax, %0\n" |
383 | : "=g" (value) |
384 | : |
385 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
386 | : "eax" |
387 | #else |
388 | : "rax" |
389 | #endif |
390 | ); |
391 | |
392 | if (value & 0x1) |
393 | feature_list |= CPU_FEATURE_TZCNT; |
394 | |
395 | __asm__ ( |
396 | "movl $0x1, %%eax\n" |
397 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
398 | /* On x86-32, there is no red zone, so this |
399 | should work (no need for a local variable). */ |
400 | "push %%ebx\n" |
401 | #endif |
402 | "cpuid\n" |
403 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
404 | "pop %%ebx\n" |
405 | #endif |
406 | "movl %%edx, %0\n" |
407 | : "=g" (value) |
408 | : |
409 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
410 | : "%eax" , "%ecx" , "%edx" |
411 | #else |
412 | : "%rax" , "%rbx" , "%rcx" , "%rdx" |
413 | #endif |
414 | ); |
415 | |
416 | #else /* _MSC_VER && _MSC_VER >= 1400 */ |
417 | |
418 | /* Intel syntax. */ |
419 | __asm { |
420 | mov eax, 0 |
421 | lzcnt eax, eax |
422 | setnz al |
423 | mov value, eax |
424 | } |
425 | |
426 | if (value & 0x1) |
427 | feature_list |= CPU_FEATURE_LZCNT; |
428 | |
429 | __asm { |
430 | mov eax, 0 |
431 | tzcnt eax, eax |
432 | setnz al |
433 | mov value, eax |
434 | } |
435 | |
436 | if (value & 0x1) |
437 | feature_list |= CPU_FEATURE_TZCNT; |
438 | |
439 | __asm { |
440 | mov eax, 1 |
441 | cpuid |
442 | mov value, edx |
443 | } |
444 | |
445 | #endif /* _MSC_VER && _MSC_VER >= 1400 */ |
446 | |
447 | #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) |
448 | if (value & 0x4000000) |
449 | feature_list |= CPU_FEATURE_SSE2; |
450 | #endif |
451 | if (value & 0x8000) |
452 | feature_list |= CPU_FEATURE_CMOV; |
453 | |
454 | cpu_feature_list = feature_list; |
455 | } |
456 | |
457 | static sljit_u8 get_jump_code(sljit_uw type) |
458 | { |
459 | switch (type) { |
460 | case SLJIT_EQUAL: |
461 | case SLJIT_F_EQUAL: |
462 | case SLJIT_UNORDERED_OR_EQUAL: |
463 | case SLJIT_ORDERED_EQUAL: /* Not supported. */ |
464 | return 0x84 /* je */; |
465 | |
466 | case SLJIT_NOT_EQUAL: |
467 | case SLJIT_F_NOT_EQUAL: |
468 | case SLJIT_ORDERED_NOT_EQUAL: |
469 | case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not supported. */ |
470 | return 0x85 /* jne */; |
471 | |
472 | case SLJIT_LESS: |
473 | case SLJIT_CARRY: |
474 | case SLJIT_F_LESS: |
475 | case SLJIT_UNORDERED_OR_LESS: |
476 | case SLJIT_UNORDERED_OR_GREATER: |
477 | return 0x82 /* jc */; |
478 | |
479 | case SLJIT_GREATER_EQUAL: |
480 | case SLJIT_NOT_CARRY: |
481 | case SLJIT_F_GREATER_EQUAL: |
482 | case SLJIT_ORDERED_GREATER_EQUAL: |
483 | case SLJIT_ORDERED_LESS_EQUAL: |
484 | return 0x83 /* jae */; |
485 | |
486 | case SLJIT_GREATER: |
487 | case SLJIT_F_GREATER: |
488 | case SLJIT_ORDERED_LESS: |
489 | case SLJIT_ORDERED_GREATER: |
490 | return 0x87 /* jnbe */; |
491 | |
492 | case SLJIT_LESS_EQUAL: |
493 | case SLJIT_F_LESS_EQUAL: |
494 | case SLJIT_UNORDERED_OR_GREATER_EQUAL: |
495 | case SLJIT_UNORDERED_OR_LESS_EQUAL: |
496 | return 0x86 /* jbe */; |
497 | |
498 | case SLJIT_SIG_LESS: |
499 | return 0x8c /* jl */; |
500 | |
501 | case SLJIT_SIG_GREATER_EQUAL: |
502 | return 0x8d /* jnl */; |
503 | |
504 | case SLJIT_SIG_GREATER: |
505 | return 0x8f /* jnle */; |
506 | |
507 | case SLJIT_SIG_LESS_EQUAL: |
508 | return 0x8e /* jle */; |
509 | |
510 | case SLJIT_OVERFLOW: |
511 | return 0x80 /* jo */; |
512 | |
513 | case SLJIT_NOT_OVERFLOW: |
514 | return 0x81 /* jno */; |
515 | |
516 | case SLJIT_UNORDERED: |
517 | return 0x8a /* jp */; |
518 | |
519 | case SLJIT_ORDERED: |
520 | return 0x8b /* jpo */; |
521 | } |
522 | return 0; |
523 | } |
524 | |
525 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
526 | static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset); |
527 | #else |
528 | static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr); |
529 | static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label); |
530 | #endif |
531 | |
532 | static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) |
533 | { |
534 | sljit_uw type = jump->flags >> TYPE_SHIFT; |
535 | sljit_s32 short_jump; |
536 | sljit_uw label_addr; |
537 | |
538 | if (jump->flags & JUMP_LABEL) |
539 | label_addr = (sljit_uw)(code + jump->u.label->size); |
540 | else |
541 | label_addr = jump->u.target - (sljit_uw)executable_offset; |
542 | |
543 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
544 | if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) |
545 | return generate_far_jump_code(jump, code_ptr); |
546 | #endif |
547 | |
548 | short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; |
549 | |
550 | if (type == SLJIT_JUMP) { |
551 | if (short_jump) |
552 | *code_ptr++ = JMP_i8; |
553 | else |
554 | *code_ptr++ = JMP_i32; |
555 | jump->addr++; |
556 | } |
557 | else if (type >= SLJIT_FAST_CALL) { |
558 | short_jump = 0; |
559 | *code_ptr++ = CALL_i32; |
560 | jump->addr++; |
561 | } |
562 | else if (short_jump) { |
563 | *code_ptr++ = U8(get_jump_code(type) - 0x10); |
564 | jump->addr++; |
565 | } |
566 | else { |
567 | *code_ptr++ = GROUP_0F; |
568 | *code_ptr++ = get_jump_code(type); |
569 | jump->addr += 2; |
570 | } |
571 | |
572 | if (short_jump) { |
573 | jump->flags |= PATCH_MB; |
574 | code_ptr += sizeof(sljit_s8); |
575 | } else { |
576 | jump->flags |= PATCH_MW; |
577 | code_ptr += sizeof(sljit_s32); |
578 | } |
579 | |
580 | return code_ptr; |
581 | } |
582 | |
583 | SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) |
584 | { |
585 | struct sljit_memory_fragment *buf; |
586 | sljit_u8 *code; |
587 | sljit_u8 *code_ptr; |
588 | sljit_u8 *buf_ptr; |
589 | sljit_u8 *buf_end; |
590 | sljit_u8 len; |
591 | sljit_sw executable_offset; |
592 | sljit_uw jump_addr; |
593 | |
594 | struct sljit_label *label; |
595 | struct sljit_jump *jump; |
596 | struct sljit_const *const_; |
597 | struct sljit_put_label *put_label; |
598 | |
599 | CHECK_ERROR_PTR(); |
600 | CHECK_PTR(check_sljit_generate_code(compiler)); |
601 | reverse_buf(compiler); |
602 | |
603 | /* Second code generation pass. */ |
604 | code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size, compiler->exec_allocator_data); |
605 | PTR_FAIL_WITH_EXEC_IF(code); |
606 | buf = compiler->buf; |
607 | |
608 | code_ptr = code; |
609 | label = compiler->labels; |
610 | jump = compiler->jumps; |
611 | const_ = compiler->consts; |
612 | put_label = compiler->put_labels; |
613 | executable_offset = SLJIT_EXEC_OFFSET(code); |
614 | |
615 | do { |
616 | buf_ptr = buf->memory; |
617 | buf_end = buf_ptr + buf->used_size; |
618 | do { |
619 | len = *buf_ptr++; |
620 | if (len > 0) { |
621 | /* The code is already generated. */ |
622 | SLJIT_MEMCPY(code_ptr, buf_ptr, len); |
623 | code_ptr += len; |
624 | buf_ptr += len; |
625 | } |
626 | else { |
627 | switch (*buf_ptr) { |
628 | case 0: |
629 | label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); |
630 | label->size = (sljit_uw)(code_ptr - code); |
631 | label = label->next; |
632 | break; |
633 | case 1: |
634 | jump->addr = (sljit_uw)code_ptr; |
635 | if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) |
636 | code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset); |
637 | else { |
638 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
639 | code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset); |
640 | #else |
641 | code_ptr = generate_far_jump_code(jump, code_ptr); |
642 | #endif |
643 | } |
644 | jump = jump->next; |
645 | break; |
646 | case 2: |
647 | const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); |
648 | const_ = const_->next; |
649 | break; |
650 | default: |
651 | SLJIT_ASSERT(*buf_ptr == 3); |
652 | SLJIT_ASSERT(put_label->label); |
653 | put_label->addr = (sljit_uw)code_ptr; |
654 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
655 | code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size); |
656 | #endif |
657 | put_label = put_label->next; |
658 | break; |
659 | } |
660 | buf_ptr++; |
661 | } |
662 | } while (buf_ptr < buf_end); |
663 | SLJIT_ASSERT(buf_ptr == buf_end); |
664 | buf = buf->next; |
665 | } while (buf); |
666 | |
667 | SLJIT_ASSERT(!label); |
668 | SLJIT_ASSERT(!jump); |
669 | SLJIT_ASSERT(!const_); |
670 | SLJIT_ASSERT(!put_label); |
671 | SLJIT_ASSERT(code_ptr <= code + compiler->size); |
672 | |
673 | jump = compiler->jumps; |
674 | while (jump) { |
675 | if (jump->flags & (PATCH_MB | PATCH_MW)) { |
676 | if (jump->flags & JUMP_LABEL) |
677 | jump_addr = jump->u.label->addr; |
678 | else |
679 | jump_addr = jump->u.target; |
680 | |
681 | jump_addr -= jump->addr + (sljit_uw)executable_offset; |
682 | |
683 | if (jump->flags & PATCH_MB) { |
684 | jump_addr -= sizeof(sljit_s8); |
685 | SLJIT_ASSERT((sljit_sw)jump_addr >= -128 && (sljit_sw)jump_addr <= 127); |
686 | *(sljit_u8*)jump->addr = U8(jump_addr); |
687 | } else { |
688 | jump_addr -= sizeof(sljit_s32); |
689 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
690 | sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump_addr); |
691 | #else |
692 | SLJIT_ASSERT((sljit_sw)jump_addr >= HALFWORD_MIN && (sljit_sw)jump_addr <= HALFWORD_MAX); |
693 | sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)jump_addr); |
694 | #endif |
695 | } |
696 | } |
697 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
698 | else if (jump->flags & PATCH_MD) { |
699 | SLJIT_ASSERT(jump->flags & JUMP_LABEL); |
700 | sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr); |
701 | } |
702 | #endif |
703 | |
704 | jump = jump->next; |
705 | } |
706 | |
707 | put_label = compiler->put_labels; |
708 | while (put_label) { |
709 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
710 | sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr); |
711 | #else |
712 | if (put_label->flags & PATCH_MD) { |
713 | SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX); |
714 | sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr); |
715 | } |
716 | else { |
717 | SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX); |
718 | sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr); |
719 | } |
720 | #endif |
721 | |
722 | put_label = put_label->next; |
723 | } |
724 | |
725 | compiler->error = SLJIT_ERR_COMPILED; |
726 | compiler->executable_offset = executable_offset; |
727 | compiler->executable_size = (sljit_uw)(code_ptr - code); |
728 | |
729 | code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); |
730 | |
731 | SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1); |
732 | return (void*)code; |
733 | } |
734 | |
735 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) |
736 | { |
737 | switch (feature_type) { |
738 | case SLJIT_HAS_FPU: |
739 | #ifdef SLJIT_IS_FPU_AVAILABLE |
740 | return SLJIT_IS_FPU_AVAILABLE; |
741 | #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) |
742 | if (cpu_feature_list == 0) |
743 | get_cpu_features(); |
744 | return (cpu_feature_list & CPU_FEATURE_SSE2) != 0; |
745 | #else /* SLJIT_DETECT_SSE2 */ |
746 | return 1; |
747 | #endif /* SLJIT_DETECT_SSE2 */ |
748 | |
749 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
750 | case SLJIT_HAS_VIRTUAL_REGISTERS: |
751 | return 1; |
752 | #endif /* SLJIT_CONFIG_X86_32 */ |
753 | |
754 | case SLJIT_HAS_CLZ: |
755 | if (cpu_feature_list == 0) |
756 | get_cpu_features(); |
757 | |
758 | return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2; |
759 | |
760 | case SLJIT_HAS_CTZ: |
761 | if (cpu_feature_list == 0) |
762 | get_cpu_features(); |
763 | |
764 | return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2; |
765 | |
766 | case SLJIT_HAS_CMOV: |
767 | if (cpu_feature_list == 0) |
768 | get_cpu_features(); |
769 | return (cpu_feature_list & CPU_FEATURE_CMOV) != 0; |
770 | |
771 | case SLJIT_HAS_ROT: |
772 | case SLJIT_HAS_PREFETCH: |
773 | return 1; |
774 | |
775 | case SLJIT_HAS_SSE2: |
776 | #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) |
777 | if (cpu_feature_list == 0) |
778 | get_cpu_features(); |
779 | return (cpu_feature_list & CPU_FEATURE_SSE2) != 0; |
780 | #else /* !SLJIT_DETECT_SSE2 */ |
781 | return 1; |
782 | #endif /* SLJIT_DETECT_SSE2 */ |
783 | |
784 | default: |
785 | return 0; |
786 | } |
787 | } |
788 | |
789 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) |
790 | { |
791 | if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL) |
792 | return 0; |
793 | |
794 | switch (type) { |
795 | case SLJIT_ORDERED_EQUAL: |
796 | case SLJIT_UNORDERED_OR_NOT_EQUAL: |
797 | return 0; |
798 | } |
799 | |
800 | return 1; |
801 | } |
802 | |
803 | /* --------------------------------------------------------------------- */ |
804 | /* Operators */ |
805 | /* --------------------------------------------------------------------- */ |
806 | |
807 | #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode)) |
808 | |
809 | #define BINARY_IMM32(op_imm, immw, arg, argw) \ |
810 | do { \ |
811 | inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ |
812 | FAIL_IF(!inst); \ |
813 | *(inst + 1) |= (op_imm); \ |
814 | } while (0) |
815 | |
816 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
817 | |
818 | #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ |
819 | do { \ |
820 | if (IS_HALFWORD(immw) || compiler->mode32) { \ |
821 | BINARY_IMM32(op_imm, immw, arg, argw); \ |
822 | } \ |
823 | else { \ |
824 | FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ |
825 | inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ |
826 | FAIL_IF(!inst); \ |
827 | *inst = (op_mr); \ |
828 | } \ |
829 | } while (0) |
830 | |
831 | #define BINARY_EAX_IMM(op_eax_imm, immw) \ |
832 | FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) |
833 | |
834 | #else /* !SLJIT_CONFIG_X86_64 */ |
835 | |
836 | #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ |
837 | BINARY_IMM32(op_imm, immw, arg, argw) |
838 | |
839 | #define BINARY_EAX_IMM(op_eax_imm, immw) \ |
840 | FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) |
841 | |
842 | #endif /* SLJIT_CONFIG_X86_64 */ |
843 | |
844 | static sljit_s32 emit_mov(struct sljit_compiler *compiler, |
845 | sljit_s32 dst, sljit_sw dstw, |
846 | sljit_s32 src, sljit_sw srcw); |
847 | |
848 | #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ |
849 | FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); |
850 | |
851 | static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, |
852 | sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src); |
853 | |
854 | static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, |
855 | sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw); |
856 | |
857 | static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, |
858 | sljit_s32 src1, sljit_sw src1w, |
859 | sljit_s32 src2, sljit_sw src2w); |
860 | |
861 | static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler) |
862 | { |
863 | #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) |
864 | /* Emit endbr32/endbr64 when CET is enabled. */ |
865 | sljit_u8 *inst; |
866 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); |
867 | FAIL_IF(!inst); |
868 | INC_SIZE(4); |
869 | *inst++ = 0xf3; |
870 | *inst++ = 0x0f; |
871 | *inst++ = 0x1e; |
872 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
873 | *inst = 0xfb; |
874 | #else |
875 | *inst = 0xfa; |
876 | #endif |
877 | #else /* !SLJIT_CONFIG_X86_CET */ |
878 | SLJIT_UNUSED_ARG(compiler); |
879 | #endif /* SLJIT_CONFIG_X86_CET */ |
880 | return SLJIT_SUCCESS; |
881 | } |
882 | |
883 | #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) |
884 | |
885 | static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg) |
886 | { |
887 | sljit_u8 *inst; |
888 | sljit_s32 size; |
889 | |
890 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
891 | size = 5; |
892 | #else |
893 | size = 4; |
894 | #endif |
895 | |
896 | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
897 | FAIL_IF(!inst); |
898 | INC_SIZE(size); |
899 | *inst++ = 0xf3; |
900 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
901 | *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B); |
902 | #endif |
903 | *inst++ = 0x0f; |
904 | *inst++ = 0x1e; |
905 | *inst = (0x3 << 6) | (0x1 << 3) | (reg_map[reg] & 0x7); |
906 | return SLJIT_SUCCESS; |
907 | } |
908 | |
909 | static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg) |
910 | { |
911 | sljit_u8 *inst; |
912 | sljit_s32 size; |
913 | |
914 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
915 | size = 5; |
916 | #else |
917 | size = 4; |
918 | #endif |
919 | |
920 | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
921 | FAIL_IF(!inst); |
922 | INC_SIZE(size); |
923 | *inst++ = 0xf3; |
924 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
925 | *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B); |
926 | #endif |
927 | *inst++ = 0x0f; |
928 | *inst++ = 0xae; |
929 | *inst = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7); |
930 | return SLJIT_SUCCESS; |
931 | } |
932 | |
933 | #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ |
934 | |
935 | static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void) |
936 | { |
937 | #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) |
938 | return _get_ssp() != 0; |
939 | #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */ |
940 | return 0; |
941 | #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ |
942 | } |
943 | |
944 | static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler, |
945 | sljit_s32 src, sljit_sw srcw) |
946 | { |
947 | #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) |
948 | sljit_u8 *inst, *jz_after_cmp_inst; |
949 | sljit_uw size_jz_after_cmp_inst; |
950 | |
951 | sljit_uw size_before_rdssp_inst = compiler->size; |
952 | |
953 | /* Generate "RDSSP TMP_REG1". */ |
954 | FAIL_IF(emit_rdssp(compiler, TMP_REG1)); |
955 | |
956 | /* Load return address on shadow stack into TMP_REG1. */ |
957 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
958 | SLJIT_ASSERT(reg_map[TMP_REG1] == 5); |
959 | |
960 | /* Hand code unsupported "mov 0x0(%ebp),%ebp". */ |
961 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); |
962 | FAIL_IF(!inst); |
963 | INC_SIZE(3); |
964 | *inst++ = 0x8b; |
965 | *inst++ = 0x6d; |
966 | *inst = 0; |
967 | #else /* !SLJIT_CONFIG_X86_32 */ |
968 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0); |
969 | #endif /* SLJIT_CONFIG_X86_32 */ |
970 | |
971 | /* Compare return address against TMP_REG1. */ |
972 | FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw)); |
973 | |
974 | /* Generate JZ to skip shadow stack ajdustment when shadow |
975 | stack matches normal stack. */ |
976 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); |
977 | FAIL_IF(!inst); |
978 | INC_SIZE(2); |
979 | *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10; |
980 | size_jz_after_cmp_inst = compiler->size; |
981 | jz_after_cmp_inst = inst; |
982 | |
983 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
984 | /* REX_W is not necessary. */ |
985 | compiler->mode32 = 1; |
986 | #endif |
987 | /* Load 1 into TMP_REG1. */ |
988 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); |
989 | |
990 | /* Generate "INCSSP TMP_REG1". */ |
991 | FAIL_IF(emit_incssp(compiler, TMP_REG1)); |
992 | |
993 | /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */ |
994 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); |
995 | FAIL_IF(!inst); |
996 | INC_SIZE(2); |
997 | *inst++ = JMP_i8; |
998 | *inst = size_before_rdssp_inst - compiler->size; |
999 | |
1000 | *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst; |
1001 | #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */ |
1002 | SLJIT_UNUSED_ARG(compiler); |
1003 | SLJIT_UNUSED_ARG(src); |
1004 | SLJIT_UNUSED_ARG(srcw); |
1005 | #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ |
1006 | return SLJIT_SUCCESS; |
1007 | } |
1008 | |
1009 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1010 | #include "sljitNativeX86_32.c" |
1011 | #else |
1012 | #include "sljitNativeX86_64.c" |
1013 | #endif |
1014 | |
1015 | static sljit_s32 emit_mov(struct sljit_compiler *compiler, |
1016 | sljit_s32 dst, sljit_sw dstw, |
1017 | sljit_s32 src, sljit_sw srcw) |
1018 | { |
1019 | sljit_u8* inst; |
1020 | |
1021 | if (FAST_IS_REG(src)) { |
1022 | inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); |
1023 | FAIL_IF(!inst); |
1024 | *inst = MOV_rm_r; |
1025 | return SLJIT_SUCCESS; |
1026 | } |
1027 | if (src & SLJIT_IMM) { |
1028 | if (FAST_IS_REG(dst)) { |
1029 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1030 | return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); |
1031 | #else |
1032 | if (!compiler->mode32) { |
1033 | if (NOT_HALFWORD(srcw)) |
1034 | return emit_load_imm64(compiler, dst, srcw); |
1035 | } |
1036 | else |
1037 | return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw); |
1038 | #endif |
1039 | } |
1040 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1041 | if (!compiler->mode32 && NOT_HALFWORD(srcw)) { |
1042 | /* Immediate to memory move. Only SLJIT_MOV operation copies |
1043 | an immediate directly into memory so TMP_REG1 can be used. */ |
1044 | FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); |
1045 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); |
1046 | FAIL_IF(!inst); |
1047 | *inst = MOV_rm_r; |
1048 | return SLJIT_SUCCESS; |
1049 | } |
1050 | #endif |
1051 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); |
1052 | FAIL_IF(!inst); |
1053 | *inst = MOV_rm_i32; |
1054 | return SLJIT_SUCCESS; |
1055 | } |
1056 | if (FAST_IS_REG(dst)) { |
1057 | inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); |
1058 | FAIL_IF(!inst); |
1059 | *inst = MOV_r_rm; |
1060 | return SLJIT_SUCCESS; |
1061 | } |
1062 | |
1063 | /* Memory to memory move. Only SLJIT_MOV operation copies |
1064 | data from memory to memory so TMP_REG1 can be used. */ |
1065 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); |
1066 | FAIL_IF(!inst); |
1067 | *inst = MOV_r_rm; |
1068 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); |
1069 | FAIL_IF(!inst); |
1070 | *inst = MOV_rm_r; |
1071 | return SLJIT_SUCCESS; |
1072 | } |
1073 | |
1074 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) |
1075 | { |
1076 | sljit_u8 *inst; |
1077 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1078 | sljit_uw size; |
1079 | #endif |
1080 | |
1081 | CHECK_ERROR(); |
1082 | CHECK(check_sljit_emit_op0(compiler, op)); |
1083 | |
1084 | switch (GET_OPCODE(op)) { |
1085 | case SLJIT_BREAKPOINT: |
1086 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); |
1087 | FAIL_IF(!inst); |
1088 | INC_SIZE(1); |
1089 | *inst = INT3; |
1090 | break; |
1091 | case SLJIT_NOP: |
1092 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); |
1093 | FAIL_IF(!inst); |
1094 | INC_SIZE(1); |
1095 | *inst = NOP; |
1096 | break; |
1097 | case SLJIT_LMUL_UW: |
1098 | case SLJIT_LMUL_SW: |
1099 | case SLJIT_DIVMOD_UW: |
1100 | case SLJIT_DIVMOD_SW: |
1101 | case SLJIT_DIV_UW: |
1102 | case SLJIT_DIV_SW: |
1103 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1104 | #ifdef _WIN64 |
1105 | SLJIT_ASSERT( |
1106 | reg_map[SLJIT_R0] == 0 |
1107 | && reg_map[SLJIT_R1] == 2 |
1108 | && reg_map[TMP_REG1] > 7); |
1109 | #else |
1110 | SLJIT_ASSERT( |
1111 | reg_map[SLJIT_R0] == 0 |
1112 | && reg_map[SLJIT_R1] < 7 |
1113 | && reg_map[TMP_REG1] == 2); |
1114 | #endif |
1115 | compiler->mode32 = op & SLJIT_32; |
1116 | #endif |
1117 | SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); |
1118 | |
1119 | op = GET_OPCODE(op); |
1120 | if ((op | 0x2) == SLJIT_DIV_UW) { |
1121 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) |
1122 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); |
1123 | inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); |
1124 | #else |
1125 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); |
1126 | #endif |
1127 | FAIL_IF(!inst); |
1128 | *inst = XOR_r_rm; |
1129 | } |
1130 | |
1131 | if ((op | 0x2) == SLJIT_DIV_SW) { |
1132 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) |
1133 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); |
1134 | #endif |
1135 | |
1136 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1137 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); |
1138 | FAIL_IF(!inst); |
1139 | INC_SIZE(1); |
1140 | *inst = CDQ; |
1141 | #else |
1142 | if (compiler->mode32) { |
1143 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); |
1144 | FAIL_IF(!inst); |
1145 | INC_SIZE(1); |
1146 | *inst = CDQ; |
1147 | } else { |
1148 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); |
1149 | FAIL_IF(!inst); |
1150 | INC_SIZE(2); |
1151 | *inst++ = REX_W; |
1152 | *inst = CDQ; |
1153 | } |
1154 | #endif |
1155 | } |
1156 | |
1157 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1158 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); |
1159 | FAIL_IF(!inst); |
1160 | INC_SIZE(2); |
1161 | *inst++ = GROUP_F7; |
1162 | *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); |
1163 | #else |
1164 | #ifdef _WIN64 |
1165 | size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2; |
1166 | #else |
1167 | size = (!compiler->mode32) ? 3 : 2; |
1168 | #endif |
1169 | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
1170 | FAIL_IF(!inst); |
1171 | INC_SIZE(size); |
1172 | #ifdef _WIN64 |
1173 | if (!compiler->mode32) |
1174 | *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0); |
1175 | else if (op >= SLJIT_DIVMOD_UW) |
1176 | *inst++ = REX_B; |
1177 | *inst++ = GROUP_F7; |
1178 | *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); |
1179 | #else |
1180 | if (!compiler->mode32) |
1181 | *inst++ = REX_W; |
1182 | *inst++ = GROUP_F7; |
1183 | *inst = MOD_REG | reg_map[SLJIT_R1]; |
1184 | #endif |
1185 | #endif |
1186 | switch (op) { |
1187 | case SLJIT_LMUL_UW: |
1188 | *inst |= MUL; |
1189 | break; |
1190 | case SLJIT_LMUL_SW: |
1191 | *inst |= IMUL; |
1192 | break; |
1193 | case SLJIT_DIVMOD_UW: |
1194 | case SLJIT_DIV_UW: |
1195 | *inst |= DIV; |
1196 | break; |
1197 | case SLJIT_DIVMOD_SW: |
1198 | case SLJIT_DIV_SW: |
1199 | *inst |= IDIV; |
1200 | break; |
1201 | } |
1202 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) |
1203 | if (op <= SLJIT_DIVMOD_SW) |
1204 | EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); |
1205 | #else |
1206 | if (op >= SLJIT_DIV_UW) |
1207 | EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); |
1208 | #endif |
1209 | break; |
1210 | case SLJIT_ENDBR: |
1211 | return emit_endbranch(compiler); |
1212 | case SLJIT_SKIP_FRAMES_BEFORE_RETURN: |
1213 | return skip_frames_before_return(compiler); |
1214 | } |
1215 | |
1216 | return SLJIT_SUCCESS; |
1217 | } |
1218 | |
1219 | #define ENCODE_PREFIX(prefix) \ |
1220 | do { \ |
1221 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \ |
1222 | FAIL_IF(!inst); \ |
1223 | INC_SIZE(1); \ |
1224 | *inst = U8(prefix); \ |
1225 | } while (0) |
1226 | |
1227 | static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, |
1228 | sljit_s32 dst, sljit_sw dstw, |
1229 | sljit_s32 src, sljit_sw srcw) |
1230 | { |
1231 | sljit_u8* inst; |
1232 | sljit_s32 dst_r; |
1233 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1234 | sljit_s32 work_r; |
1235 | #endif |
1236 | |
1237 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1238 | compiler->mode32 = 0; |
1239 | #endif |
1240 | |
1241 | if (src & SLJIT_IMM) { |
1242 | if (FAST_IS_REG(dst)) { |
1243 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1244 | return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); |
1245 | #else |
1246 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); |
1247 | FAIL_IF(!inst); |
1248 | *inst = MOV_rm_i32; |
1249 | return SLJIT_SUCCESS; |
1250 | #endif |
1251 | } |
1252 | inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); |
1253 | FAIL_IF(!inst); |
1254 | *inst = MOV_rm8_i8; |
1255 | return SLJIT_SUCCESS; |
1256 | } |
1257 | |
1258 | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
1259 | |
1260 | if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { |
1261 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1262 | if (reg_map[src] >= 4) { |
1263 | SLJIT_ASSERT(dst_r == TMP_REG1); |
1264 | EMIT_MOV(compiler, TMP_REG1, 0, src, 0); |
1265 | } else |
1266 | dst_r = src; |
1267 | #else |
1268 | dst_r = src; |
1269 | #endif |
1270 | } |
1271 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1272 | else if (FAST_IS_REG(src) && reg_map[src] >= 4) { |
1273 | /* src, dst are registers. */ |
1274 | SLJIT_ASSERT(FAST_IS_REG(dst)); |
1275 | if (reg_map[dst] < 4) { |
1276 | if (dst != src) |
1277 | EMIT_MOV(compiler, dst, 0, src, 0); |
1278 | inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); |
1279 | FAIL_IF(!inst); |
1280 | *inst++ = GROUP_0F; |
1281 | *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; |
1282 | } |
1283 | else { |
1284 | if (dst != src) |
1285 | EMIT_MOV(compiler, dst, 0, src, 0); |
1286 | if (sign) { |
1287 | /* shl reg, 24 */ |
1288 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); |
1289 | FAIL_IF(!inst); |
1290 | *inst |= SHL; |
1291 | /* sar reg, 24 */ |
1292 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); |
1293 | FAIL_IF(!inst); |
1294 | *inst |= SAR; |
1295 | } |
1296 | else { |
1297 | inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); |
1298 | FAIL_IF(!inst); |
1299 | *(inst + 1) |= AND; |
1300 | } |
1301 | } |
1302 | return SLJIT_SUCCESS; |
1303 | } |
1304 | #endif |
1305 | else { |
1306 | /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ |
1307 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); |
1308 | FAIL_IF(!inst); |
1309 | *inst++ = GROUP_0F; |
1310 | *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; |
1311 | } |
1312 | |
1313 | if (dst & SLJIT_MEM) { |
1314 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1315 | if (dst_r == TMP_REG1) { |
1316 | /* Find a non-used register, whose reg_map[src] < 4. */ |
1317 | if ((dst & REG_MASK) == SLJIT_R0) { |
1318 | if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) |
1319 | work_r = SLJIT_R2; |
1320 | else |
1321 | work_r = SLJIT_R1; |
1322 | } |
1323 | else { |
1324 | if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) |
1325 | work_r = SLJIT_R0; |
1326 | else if ((dst & REG_MASK) == SLJIT_R1) |
1327 | work_r = SLJIT_R2; |
1328 | else |
1329 | work_r = SLJIT_R1; |
1330 | } |
1331 | |
1332 | if (work_r == SLJIT_R0) { |
1333 | ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]); |
1334 | } |
1335 | else { |
1336 | inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); |
1337 | FAIL_IF(!inst); |
1338 | *inst = XCHG_r_rm; |
1339 | } |
1340 | |
1341 | inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); |
1342 | FAIL_IF(!inst); |
1343 | *inst = MOV_rm8_r8; |
1344 | |
1345 | if (work_r == SLJIT_R0) { |
1346 | ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]); |
1347 | } |
1348 | else { |
1349 | inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); |
1350 | FAIL_IF(!inst); |
1351 | *inst = XCHG_r_rm; |
1352 | } |
1353 | } |
1354 | else { |
1355 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); |
1356 | FAIL_IF(!inst); |
1357 | *inst = MOV_rm8_r8; |
1358 | } |
1359 | #else |
1360 | inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); |
1361 | FAIL_IF(!inst); |
1362 | *inst = MOV_rm8_r8; |
1363 | #endif |
1364 | } |
1365 | |
1366 | return SLJIT_SUCCESS; |
1367 | } |
1368 | |
1369 | static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op, |
1370 | sljit_s32 src, sljit_sw srcw) |
1371 | { |
1372 | sljit_u8* inst; |
1373 | |
1374 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1375 | compiler->mode32 = 1; |
1376 | #endif |
1377 | |
1378 | inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw); |
1379 | FAIL_IF(!inst); |
1380 | *inst++ = GROUP_0F; |
1381 | *inst++ = PREFETCH; |
1382 | |
1383 | if (op == SLJIT_PREFETCH_L1) |
1384 | *inst |= (1 << 3); |
1385 | else if (op == SLJIT_PREFETCH_L2) |
1386 | *inst |= (2 << 3); |
1387 | else if (op == SLJIT_PREFETCH_L3) |
1388 | *inst |= (3 << 3); |
1389 | |
1390 | return SLJIT_SUCCESS; |
1391 | } |
1392 | |
1393 | static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, |
1394 | sljit_s32 dst, sljit_sw dstw, |
1395 | sljit_s32 src, sljit_sw srcw) |
1396 | { |
1397 | sljit_u8* inst; |
1398 | sljit_s32 dst_r; |
1399 | |
1400 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1401 | compiler->mode32 = 0; |
1402 | #endif |
1403 | |
1404 | if (src & SLJIT_IMM) { |
1405 | if (FAST_IS_REG(dst)) { |
1406 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1407 | return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); |
1408 | #else |
1409 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); |
1410 | FAIL_IF(!inst); |
1411 | *inst = MOV_rm_i32; |
1412 | return SLJIT_SUCCESS; |
1413 | #endif |
1414 | } |
1415 | inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); |
1416 | FAIL_IF(!inst); |
1417 | *inst = MOV_rm_i32; |
1418 | return SLJIT_SUCCESS; |
1419 | } |
1420 | |
1421 | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
1422 | |
1423 | if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) |
1424 | dst_r = src; |
1425 | else { |
1426 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); |
1427 | FAIL_IF(!inst); |
1428 | *inst++ = GROUP_0F; |
1429 | *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; |
1430 | } |
1431 | |
1432 | if (dst & SLJIT_MEM) { |
1433 | inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); |
1434 | FAIL_IF(!inst); |
1435 | *inst = MOV_rm_r; |
1436 | } |
1437 | |
1438 | return SLJIT_SUCCESS; |
1439 | } |
1440 | |
1441 | static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, |
1442 | sljit_s32 dst, sljit_sw dstw, |
1443 | sljit_s32 src, sljit_sw srcw) |
1444 | { |
1445 | sljit_u8* inst; |
1446 | |
1447 | if (dst == src && dstw == srcw) { |
1448 | /* Same input and output */ |
1449 | inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); |
1450 | FAIL_IF(!inst); |
1451 | *inst++ = GROUP_F7; |
1452 | *inst |= opcode; |
1453 | return SLJIT_SUCCESS; |
1454 | } |
1455 | |
1456 | if (FAST_IS_REG(dst)) { |
1457 | EMIT_MOV(compiler, dst, 0, src, srcw); |
1458 | inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); |
1459 | FAIL_IF(!inst); |
1460 | *inst++ = GROUP_F7; |
1461 | *inst |= opcode; |
1462 | return SLJIT_SUCCESS; |
1463 | } |
1464 | |
1465 | EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); |
1466 | inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); |
1467 | FAIL_IF(!inst); |
1468 | *inst++ = GROUP_F7; |
1469 | *inst |= opcode; |
1470 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
1471 | return SLJIT_SUCCESS; |
1472 | } |
1473 | |
1474 | static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, |
1475 | sljit_s32 dst, sljit_sw dstw, |
1476 | sljit_s32 src, sljit_sw srcw) |
1477 | { |
1478 | sljit_u8* inst; |
1479 | |
1480 | if (FAST_IS_REG(dst)) { |
1481 | EMIT_MOV(compiler, dst, 0, src, srcw); |
1482 | inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); |
1483 | FAIL_IF(!inst); |
1484 | *inst++ = GROUP_F7; |
1485 | *inst |= NOT_rm; |
1486 | inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); |
1487 | FAIL_IF(!inst); |
1488 | *inst = OR_r_rm; |
1489 | return SLJIT_SUCCESS; |
1490 | } |
1491 | |
1492 | EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); |
1493 | inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); |
1494 | FAIL_IF(!inst); |
1495 | *inst++ = GROUP_F7; |
1496 | *inst |= NOT_rm; |
1497 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); |
1498 | FAIL_IF(!inst); |
1499 | *inst = OR_r_rm; |
1500 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
1501 | return SLJIT_SUCCESS; |
1502 | } |
1503 | |
1504 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1505 | static const sljit_sw emit_clz_arg = 32 + 31; |
1506 | static const sljit_sw emit_ctz_arg = 32; |
1507 | #endif |
1508 | |
1509 | static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz, |
1510 | sljit_s32 dst, sljit_sw dstw, |
1511 | sljit_s32 src, sljit_sw srcw) |
1512 | { |
1513 | sljit_u8* inst; |
1514 | sljit_s32 dst_r; |
1515 | sljit_sw max; |
1516 | |
1517 | if (cpu_feature_list == 0) |
1518 | get_cpu_features(); |
1519 | |
1520 | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
1521 | |
1522 | if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) { |
1523 | /* Group prefix added separately. */ |
1524 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); |
1525 | FAIL_IF(!inst); |
1526 | INC_SIZE(1); |
1527 | *inst++ = GROUP_F3; |
1528 | |
1529 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); |
1530 | FAIL_IF(!inst); |
1531 | *inst++ = GROUP_0F; |
1532 | *inst = is_clz ? LZCNT_r_rm : TZCNT_r_rm; |
1533 | |
1534 | if (dst & SLJIT_MEM) |
1535 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
1536 | return SLJIT_SUCCESS; |
1537 | } |
1538 | |
1539 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); |
1540 | FAIL_IF(!inst); |
1541 | *inst++ = GROUP_0F; |
1542 | *inst = is_clz ? BSR_r_rm : BSF_r_rm; |
1543 | |
1544 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1545 | max = is_clz ? (32 + 31) : 32; |
1546 | |
1547 | if (cpu_feature_list & CPU_FEATURE_CMOV) { |
1548 | if (dst_r != TMP_REG1) { |
1549 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max); |
1550 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); |
1551 | } |
1552 | else |
1553 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg); |
1554 | |
1555 | FAIL_IF(!inst); |
1556 | *inst++ = GROUP_0F; |
1557 | *inst = CMOVE_r_rm; |
1558 | } |
1559 | else |
1560 | FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); |
1561 | |
1562 | if (is_clz) { |
1563 | inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); |
1564 | FAIL_IF(!inst); |
1565 | *(inst + 1) |= XOR; |
1566 | } |
1567 | #else |
1568 | if (is_clz) |
1569 | max = compiler->mode32 ? (32 + 31) : (64 + 63); |
1570 | else |
1571 | max = compiler->mode32 ? 32 : 64; |
1572 | |
1573 | if (cpu_feature_list & CPU_FEATURE_CMOV) { |
1574 | EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max); |
1575 | |
1576 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); |
1577 | FAIL_IF(!inst); |
1578 | *inst++ = GROUP_0F; |
1579 | *inst = CMOVE_r_rm; |
1580 | } |
1581 | else |
1582 | FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max)); |
1583 | |
1584 | if (is_clz) { |
1585 | inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0); |
1586 | FAIL_IF(!inst); |
1587 | *(inst + 1) |= XOR; |
1588 | } |
1589 | #endif |
1590 | |
1591 | if (dst & SLJIT_MEM) |
1592 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
1593 | return SLJIT_SUCCESS; |
1594 | } |
1595 | |
1596 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, |
1597 | sljit_s32 dst, sljit_sw dstw, |
1598 | sljit_s32 src, sljit_sw srcw) |
1599 | { |
1600 | sljit_s32 op_flags = GET_ALL_FLAGS(op); |
1601 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1602 | sljit_s32 dst_is_ereg = 0; |
1603 | #endif |
1604 | |
1605 | CHECK_ERROR(); |
1606 | CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); |
1607 | ADJUST_LOCAL_OFFSET(dst, dstw); |
1608 | ADJUST_LOCAL_OFFSET(src, srcw); |
1609 | |
1610 | CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); |
1611 | CHECK_EXTRA_REGS(src, srcw, (void)0); |
1612 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1613 | compiler->mode32 = op_flags & SLJIT_32; |
1614 | #endif |
1615 | |
1616 | op = GET_OPCODE(op); |
1617 | |
1618 | if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { |
1619 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1620 | compiler->mode32 = 0; |
1621 | #endif |
1622 | |
1623 | if (FAST_IS_REG(src) && src == dst) { |
1624 | if (!TYPE_CAST_NEEDED(op)) |
1625 | return SLJIT_SUCCESS; |
1626 | } |
1627 | |
1628 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1629 | if (op_flags & SLJIT_32) { |
1630 | if (src & SLJIT_MEM) { |
1631 | if (op == SLJIT_MOV_S32) |
1632 | op = SLJIT_MOV_U32; |
1633 | } |
1634 | else if (src & SLJIT_IMM) { |
1635 | if (op == SLJIT_MOV_U32) |
1636 | op = SLJIT_MOV_S32; |
1637 | } |
1638 | } |
1639 | #endif |
1640 | |
1641 | if (src & SLJIT_IMM) { |
1642 | switch (op) { |
1643 | case SLJIT_MOV_U8: |
1644 | srcw = (sljit_u8)srcw; |
1645 | break; |
1646 | case SLJIT_MOV_S8: |
1647 | srcw = (sljit_s8)srcw; |
1648 | break; |
1649 | case SLJIT_MOV_U16: |
1650 | srcw = (sljit_u16)srcw; |
1651 | break; |
1652 | case SLJIT_MOV_S16: |
1653 | srcw = (sljit_s16)srcw; |
1654 | break; |
1655 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1656 | case SLJIT_MOV_U32: |
1657 | srcw = (sljit_u32)srcw; |
1658 | break; |
1659 | case SLJIT_MOV_S32: |
1660 | srcw = (sljit_s32)srcw; |
1661 | break; |
1662 | #endif |
1663 | } |
1664 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1665 | if (SLJIT_UNLIKELY(dst_is_ereg)) |
1666 | return emit_mov(compiler, dst, dstw, src, srcw); |
1667 | #endif |
1668 | } |
1669 | |
1670 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1671 | if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { |
1672 | SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); |
1673 | dst = TMP_REG1; |
1674 | } |
1675 | #endif |
1676 | |
1677 | switch (op) { |
1678 | case SLJIT_MOV: |
1679 | case SLJIT_MOV_P: |
1680 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1681 | case SLJIT_MOV_U32: |
1682 | case SLJIT_MOV_S32: |
1683 | case SLJIT_MOV32: |
1684 | #endif |
1685 | EMIT_MOV(compiler, dst, dstw, src, srcw); |
1686 | break; |
1687 | case SLJIT_MOV_U8: |
1688 | FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); |
1689 | break; |
1690 | case SLJIT_MOV_S8: |
1691 | FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); |
1692 | break; |
1693 | case SLJIT_MOV_U16: |
1694 | FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); |
1695 | break; |
1696 | case SLJIT_MOV_S16: |
1697 | FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); |
1698 | break; |
1699 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1700 | case SLJIT_MOV_U32: |
1701 | FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); |
1702 | break; |
1703 | case SLJIT_MOV_S32: |
1704 | FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); |
1705 | break; |
1706 | case SLJIT_MOV32: |
1707 | compiler->mode32 = 1; |
1708 | EMIT_MOV(compiler, dst, dstw, src, srcw); |
1709 | compiler->mode32 = 0; |
1710 | break; |
1711 | #endif |
1712 | } |
1713 | |
1714 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1715 | if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) |
1716 | return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); |
1717 | #endif |
1718 | return SLJIT_SUCCESS; |
1719 | } |
1720 | |
1721 | switch (op) { |
1722 | case SLJIT_NOT: |
1723 | if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z)) |
1724 | return emit_not_with_flags(compiler, dst, dstw, src, srcw); |
1725 | return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); |
1726 | |
1727 | case SLJIT_CLZ: |
1728 | case SLJIT_CTZ: |
1729 | return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw); |
1730 | } |
1731 | |
1732 | return SLJIT_SUCCESS; |
1733 | } |
1734 | |
1735 | static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, |
1736 | sljit_u32 op_types, |
1737 | sljit_s32 dst, sljit_sw dstw, |
1738 | sljit_s32 src1, sljit_sw src1w, |
1739 | sljit_s32 src2, sljit_sw src2w) |
1740 | { |
1741 | sljit_u8* inst; |
1742 | sljit_u8 op_eax_imm = U8(op_types >> 24); |
1743 | sljit_u8 op_rm = U8((op_types >> 16) & 0xff); |
1744 | sljit_u8 op_mr = U8((op_types >> 8) & 0xff); |
1745 | sljit_u8 op_imm = U8(op_types & 0xff); |
1746 | |
1747 | if (dst == src1 && dstw == src1w) { |
1748 | if (src2 & SLJIT_IMM) { |
1749 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1750 | if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { |
1751 | #else |
1752 | if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { |
1753 | #endif |
1754 | BINARY_EAX_IMM(op_eax_imm, src2w); |
1755 | } |
1756 | else { |
1757 | BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); |
1758 | } |
1759 | } |
1760 | else if (FAST_IS_REG(dst)) { |
1761 | inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); |
1762 | FAIL_IF(!inst); |
1763 | *inst = op_rm; |
1764 | } |
1765 | else if (FAST_IS_REG(src2)) { |
1766 | /* Special exception for sljit_emit_op_flags. */ |
1767 | inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); |
1768 | FAIL_IF(!inst); |
1769 | *inst = op_mr; |
1770 | } |
1771 | else { |
1772 | EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); |
1773 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); |
1774 | FAIL_IF(!inst); |
1775 | *inst = op_mr; |
1776 | } |
1777 | return SLJIT_SUCCESS; |
1778 | } |
1779 | |
1780 | /* Only for cumulative operations. */ |
1781 | if (dst == src2 && dstw == src2w) { |
1782 | if (src1 & SLJIT_IMM) { |
1783 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1784 | if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { |
1785 | #else |
1786 | if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { |
1787 | #endif |
1788 | BINARY_EAX_IMM(op_eax_imm, src1w); |
1789 | } |
1790 | else { |
1791 | BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); |
1792 | } |
1793 | } |
1794 | else if (FAST_IS_REG(dst)) { |
1795 | inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); |
1796 | FAIL_IF(!inst); |
1797 | *inst = op_rm; |
1798 | } |
1799 | else if (FAST_IS_REG(src1)) { |
1800 | inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); |
1801 | FAIL_IF(!inst); |
1802 | *inst = op_mr; |
1803 | } |
1804 | else { |
1805 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
1806 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); |
1807 | FAIL_IF(!inst); |
1808 | *inst = op_mr; |
1809 | } |
1810 | return SLJIT_SUCCESS; |
1811 | } |
1812 | |
1813 | /* General version. */ |
1814 | if (FAST_IS_REG(dst)) { |
1815 | EMIT_MOV(compiler, dst, 0, src1, src1w); |
1816 | if (src2 & SLJIT_IMM) { |
1817 | BINARY_IMM(op_imm, op_mr, src2w, dst, 0); |
1818 | } |
1819 | else { |
1820 | inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); |
1821 | FAIL_IF(!inst); |
1822 | *inst = op_rm; |
1823 | } |
1824 | } |
1825 | else { |
1826 | /* This version requires less memory writing. */ |
1827 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
1828 | if (src2 & SLJIT_IMM) { |
1829 | BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); |
1830 | } |
1831 | else { |
1832 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); |
1833 | FAIL_IF(!inst); |
1834 | *inst = op_rm; |
1835 | } |
1836 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
1837 | } |
1838 | |
1839 | return SLJIT_SUCCESS; |
1840 | } |
1841 | |
1842 | static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, |
1843 | sljit_u32 op_types, |
1844 | sljit_s32 dst, sljit_sw dstw, |
1845 | sljit_s32 src1, sljit_sw src1w, |
1846 | sljit_s32 src2, sljit_sw src2w) |
1847 | { |
1848 | sljit_u8* inst; |
1849 | sljit_u8 op_eax_imm = U8(op_types >> 24); |
1850 | sljit_u8 op_rm = U8((op_types >> 16) & 0xff); |
1851 | sljit_u8 op_mr = U8((op_types >> 8) & 0xff); |
1852 | sljit_u8 op_imm = U8(op_types & 0xff); |
1853 | |
1854 | if (dst == src1 && dstw == src1w) { |
1855 | if (src2 & SLJIT_IMM) { |
1856 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
1857 | if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { |
1858 | #else |
1859 | if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { |
1860 | #endif |
1861 | BINARY_EAX_IMM(op_eax_imm, src2w); |
1862 | } |
1863 | else { |
1864 | BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); |
1865 | } |
1866 | } |
1867 | else if (FAST_IS_REG(dst)) { |
1868 | inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); |
1869 | FAIL_IF(!inst); |
1870 | *inst = op_rm; |
1871 | } |
1872 | else if (FAST_IS_REG(src2)) { |
1873 | inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); |
1874 | FAIL_IF(!inst); |
1875 | *inst = op_mr; |
1876 | } |
1877 | else { |
1878 | EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); |
1879 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); |
1880 | FAIL_IF(!inst); |
1881 | *inst = op_mr; |
1882 | } |
1883 | return SLJIT_SUCCESS; |
1884 | } |
1885 | |
1886 | /* General version. */ |
1887 | if (FAST_IS_REG(dst) && dst != src2) { |
1888 | EMIT_MOV(compiler, dst, 0, src1, src1w); |
1889 | if (src2 & SLJIT_IMM) { |
1890 | BINARY_IMM(op_imm, op_mr, src2w, dst, 0); |
1891 | } |
1892 | else { |
1893 | inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); |
1894 | FAIL_IF(!inst); |
1895 | *inst = op_rm; |
1896 | } |
1897 | } |
1898 | else { |
1899 | /* This version requires less memory writing. */ |
1900 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
1901 | if (src2 & SLJIT_IMM) { |
1902 | BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); |
1903 | } |
1904 | else { |
1905 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); |
1906 | FAIL_IF(!inst); |
1907 | *inst = op_rm; |
1908 | } |
1909 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
1910 | } |
1911 | |
1912 | return SLJIT_SUCCESS; |
1913 | } |
1914 | |
1915 | static sljit_s32 emit_mul(struct sljit_compiler *compiler, |
1916 | sljit_s32 dst, sljit_sw dstw, |
1917 | sljit_s32 src1, sljit_sw src1w, |
1918 | sljit_s32 src2, sljit_sw src2w) |
1919 | { |
1920 | sljit_u8* inst; |
1921 | sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
1922 | |
1923 | /* Register destination. */ |
1924 | if (dst_r == src1 && !(src2 & SLJIT_IMM)) { |
1925 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); |
1926 | FAIL_IF(!inst); |
1927 | *inst++ = GROUP_0F; |
1928 | *inst = IMUL_r_rm; |
1929 | } |
1930 | else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { |
1931 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); |
1932 | FAIL_IF(!inst); |
1933 | *inst++ = GROUP_0F; |
1934 | *inst = IMUL_r_rm; |
1935 | } |
1936 | else if (src1 & SLJIT_IMM) { |
1937 | if (src2 & SLJIT_IMM) { |
1938 | EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); |
1939 | src2 = dst_r; |
1940 | src2w = 0; |
1941 | } |
1942 | |
1943 | if (src1w <= 127 && src1w >= -128) { |
1944 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); |
1945 | FAIL_IF(!inst); |
1946 | *inst = IMUL_r_rm_i8; |
1947 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); |
1948 | FAIL_IF(!inst); |
1949 | INC_SIZE(1); |
1950 | *inst = U8(src1w); |
1951 | } |
1952 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1953 | else { |
1954 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); |
1955 | FAIL_IF(!inst); |
1956 | *inst = IMUL_r_rm_i32; |
1957 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); |
1958 | FAIL_IF(!inst); |
1959 | INC_SIZE(4); |
1960 | sljit_unaligned_store_sw(inst, src1w); |
1961 | } |
1962 | #else |
1963 | else if (IS_HALFWORD(src1w)) { |
1964 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); |
1965 | FAIL_IF(!inst); |
1966 | *inst = IMUL_r_rm_i32; |
1967 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); |
1968 | FAIL_IF(!inst); |
1969 | INC_SIZE(4); |
1970 | sljit_unaligned_store_s32(inst, (sljit_s32)src1w); |
1971 | } |
1972 | else { |
1973 | if (dst_r != src2) |
1974 | EMIT_MOV(compiler, dst_r, 0, src2, src2w); |
1975 | FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); |
1976 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); |
1977 | FAIL_IF(!inst); |
1978 | *inst++ = GROUP_0F; |
1979 | *inst = IMUL_r_rm; |
1980 | } |
1981 | #endif |
1982 | } |
1983 | else if (src2 & SLJIT_IMM) { |
1984 | /* Note: src1 is NOT immediate. */ |
1985 | |
1986 | if (src2w <= 127 && src2w >= -128) { |
1987 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); |
1988 | FAIL_IF(!inst); |
1989 | *inst = IMUL_r_rm_i8; |
1990 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); |
1991 | FAIL_IF(!inst); |
1992 | INC_SIZE(1); |
1993 | *inst = U8(src2w); |
1994 | } |
1995 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
1996 | else { |
1997 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); |
1998 | FAIL_IF(!inst); |
1999 | *inst = IMUL_r_rm_i32; |
2000 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); |
2001 | FAIL_IF(!inst); |
2002 | INC_SIZE(4); |
2003 | sljit_unaligned_store_sw(inst, src2w); |
2004 | } |
2005 | #else |
2006 | else if (IS_HALFWORD(src2w)) { |
2007 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); |
2008 | FAIL_IF(!inst); |
2009 | *inst = IMUL_r_rm_i32; |
2010 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); |
2011 | FAIL_IF(!inst); |
2012 | INC_SIZE(4); |
2013 | sljit_unaligned_store_s32(inst, (sljit_s32)src2w); |
2014 | } |
2015 | else { |
2016 | if (dst_r != src1) |
2017 | EMIT_MOV(compiler, dst_r, 0, src1, src1w); |
2018 | FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); |
2019 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); |
2020 | FAIL_IF(!inst); |
2021 | *inst++ = GROUP_0F; |
2022 | *inst = IMUL_r_rm; |
2023 | } |
2024 | #endif |
2025 | } |
2026 | else { |
2027 | /* Neither argument is immediate. */ |
2028 | if (ADDRESSING_DEPENDS_ON(src2, dst_r)) |
2029 | dst_r = TMP_REG1; |
2030 | EMIT_MOV(compiler, dst_r, 0, src1, src1w); |
2031 | inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); |
2032 | FAIL_IF(!inst); |
2033 | *inst++ = GROUP_0F; |
2034 | *inst = IMUL_r_rm; |
2035 | } |
2036 | |
2037 | if (dst & SLJIT_MEM) |
2038 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
2039 | |
2040 | return SLJIT_SUCCESS; |
2041 | } |
2042 | |
2043 | static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, |
2044 | sljit_s32 dst, sljit_sw dstw, |
2045 | sljit_s32 src1, sljit_sw src1w, |
2046 | sljit_s32 src2, sljit_sw src2w) |
2047 | { |
2048 | sljit_u8* inst; |
2049 | sljit_s32 dst_r, done = 0; |
2050 | |
2051 | /* These cases better be left to handled by normal way. */ |
2052 | if (dst == src1 && dstw == src1w) |
2053 | return SLJIT_ERR_UNSUPPORTED; |
2054 | if (dst == src2 && dstw == src2w) |
2055 | return SLJIT_ERR_UNSUPPORTED; |
2056 | |
2057 | dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
2058 | |
2059 | if (FAST_IS_REG(src1)) { |
2060 | if (FAST_IS_REG(src2)) { |
2061 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); |
2062 | FAIL_IF(!inst); |
2063 | *inst = LEA_r_m; |
2064 | done = 1; |
2065 | } |
2066 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2067 | if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { |
2068 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w); |
2069 | #else |
2070 | if (src2 & SLJIT_IMM) { |
2071 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); |
2072 | #endif |
2073 | FAIL_IF(!inst); |
2074 | *inst = LEA_r_m; |
2075 | done = 1; |
2076 | } |
2077 | } |
2078 | else if (FAST_IS_REG(src2)) { |
2079 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2080 | if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { |
2081 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w); |
2082 | #else |
2083 | if (src1 & SLJIT_IMM) { |
2084 | inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); |
2085 | #endif |
2086 | FAIL_IF(!inst); |
2087 | *inst = LEA_r_m; |
2088 | done = 1; |
2089 | } |
2090 | } |
2091 | |
2092 | if (done) { |
2093 | if (dst_r == TMP_REG1) |
2094 | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
2095 | return SLJIT_SUCCESS; |
2096 | } |
2097 | return SLJIT_ERR_UNSUPPORTED; |
2098 | } |
2099 | |
2100 | static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, |
2101 | sljit_s32 src1, sljit_sw src1w, |
2102 | sljit_s32 src2, sljit_sw src2w) |
2103 | { |
2104 | sljit_u8* inst; |
2105 | |
2106 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2107 | if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { |
2108 | #else |
2109 | if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { |
2110 | #endif |
2111 | BINARY_EAX_IMM(CMP_EAX_i32, src2w); |
2112 | return SLJIT_SUCCESS; |
2113 | } |
2114 | |
2115 | if (FAST_IS_REG(src1)) { |
2116 | if (src2 & SLJIT_IMM) { |
2117 | BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); |
2118 | } |
2119 | else { |
2120 | inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); |
2121 | FAIL_IF(!inst); |
2122 | *inst = CMP_r_rm; |
2123 | } |
2124 | return SLJIT_SUCCESS; |
2125 | } |
2126 | |
2127 | if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { |
2128 | inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); |
2129 | FAIL_IF(!inst); |
2130 | *inst = CMP_rm_r; |
2131 | return SLJIT_SUCCESS; |
2132 | } |
2133 | |
2134 | if (src2 & SLJIT_IMM) { |
2135 | if (src1 & SLJIT_IMM) { |
2136 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2137 | src1 = TMP_REG1; |
2138 | src1w = 0; |
2139 | } |
2140 | BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); |
2141 | } |
2142 | else { |
2143 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2144 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); |
2145 | FAIL_IF(!inst); |
2146 | *inst = CMP_r_rm; |
2147 | } |
2148 | return SLJIT_SUCCESS; |
2149 | } |
2150 | |
2151 | static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, |
2152 | sljit_s32 src1, sljit_sw src1w, |
2153 | sljit_s32 src2, sljit_sw src2w) |
2154 | { |
2155 | sljit_u8* inst; |
2156 | |
2157 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2158 | if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { |
2159 | #else |
2160 | if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { |
2161 | #endif |
2162 | BINARY_EAX_IMM(TEST_EAX_i32, src2w); |
2163 | return SLJIT_SUCCESS; |
2164 | } |
2165 | |
2166 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2167 | if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { |
2168 | #else |
2169 | if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { |
2170 | #endif |
2171 | BINARY_EAX_IMM(TEST_EAX_i32, src1w); |
2172 | return SLJIT_SUCCESS; |
2173 | } |
2174 | |
2175 | if (!(src1 & SLJIT_IMM)) { |
2176 | if (src2 & SLJIT_IMM) { |
2177 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2178 | if (IS_HALFWORD(src2w) || compiler->mode32) { |
2179 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); |
2180 | FAIL_IF(!inst); |
2181 | *inst = GROUP_F7; |
2182 | } |
2183 | else { |
2184 | FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w)); |
2185 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w); |
2186 | FAIL_IF(!inst); |
2187 | *inst = TEST_rm_r; |
2188 | } |
2189 | #else |
2190 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); |
2191 | FAIL_IF(!inst); |
2192 | *inst = GROUP_F7; |
2193 | #endif |
2194 | return SLJIT_SUCCESS; |
2195 | } |
2196 | else if (FAST_IS_REG(src1)) { |
2197 | inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); |
2198 | FAIL_IF(!inst); |
2199 | *inst = TEST_rm_r; |
2200 | return SLJIT_SUCCESS; |
2201 | } |
2202 | } |
2203 | |
2204 | if (!(src2 & SLJIT_IMM)) { |
2205 | if (src1 & SLJIT_IMM) { |
2206 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2207 | if (IS_HALFWORD(src1w) || compiler->mode32) { |
2208 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); |
2209 | FAIL_IF(!inst); |
2210 | *inst = GROUP_F7; |
2211 | } |
2212 | else { |
2213 | FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w)); |
2214 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); |
2215 | FAIL_IF(!inst); |
2216 | *inst = TEST_rm_r; |
2217 | } |
2218 | #else |
2219 | inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); |
2220 | FAIL_IF(!inst); |
2221 | *inst = GROUP_F7; |
2222 | #endif |
2223 | return SLJIT_SUCCESS; |
2224 | } |
2225 | else if (FAST_IS_REG(src2)) { |
2226 | inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); |
2227 | FAIL_IF(!inst); |
2228 | *inst = TEST_rm_r; |
2229 | return SLJIT_SUCCESS; |
2230 | } |
2231 | } |
2232 | |
2233 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2234 | if (src2 & SLJIT_IMM) { |
2235 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2236 | if (IS_HALFWORD(src2w) || compiler->mode32) { |
2237 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); |
2238 | FAIL_IF(!inst); |
2239 | *inst = GROUP_F7; |
2240 | } |
2241 | else { |
2242 | FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); |
2243 | inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); |
2244 | FAIL_IF(!inst); |
2245 | *inst = TEST_rm_r; |
2246 | } |
2247 | #else |
2248 | inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); |
2249 | FAIL_IF(!inst); |
2250 | *inst = GROUP_F7; |
2251 | #endif |
2252 | } |
2253 | else { |
2254 | inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); |
2255 | FAIL_IF(!inst); |
2256 | *inst = TEST_rm_r; |
2257 | } |
2258 | return SLJIT_SUCCESS; |
2259 | } |
2260 | |
2261 | static sljit_s32 emit_shift(struct sljit_compiler *compiler, |
2262 | sljit_u8 mode, |
2263 | sljit_s32 dst, sljit_sw dstw, |
2264 | sljit_s32 src1, sljit_sw src1w, |
2265 | sljit_s32 src2, sljit_sw src2w) |
2266 | { |
2267 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2268 | sljit_s32 mode32; |
2269 | #endif |
2270 | sljit_u8* inst; |
2271 | |
2272 | if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { |
2273 | if (dst == src1 && dstw == src1w) { |
2274 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); |
2275 | FAIL_IF(!inst); |
2276 | *inst |= mode; |
2277 | return SLJIT_SUCCESS; |
2278 | } |
2279 | if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { |
2280 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2281 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2282 | FAIL_IF(!inst); |
2283 | *inst |= mode; |
2284 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2285 | return SLJIT_SUCCESS; |
2286 | } |
2287 | if (FAST_IS_REG(dst)) { |
2288 | EMIT_MOV(compiler, dst, 0, src1, src1w); |
2289 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); |
2290 | FAIL_IF(!inst); |
2291 | *inst |= mode; |
2292 | return SLJIT_SUCCESS; |
2293 | } |
2294 | |
2295 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2296 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); |
2297 | FAIL_IF(!inst); |
2298 | *inst |= mode; |
2299 | EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); |
2300 | return SLJIT_SUCCESS; |
2301 | } |
2302 | |
2303 | if (dst == SLJIT_PREF_SHIFT_REG) { |
2304 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2305 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); |
2306 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2307 | FAIL_IF(!inst); |
2308 | *inst |= mode; |
2309 | return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2310 | } |
2311 | |
2312 | if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) { |
2313 | if (src1 != dst) |
2314 | EMIT_MOV(compiler, dst, 0, src1, src1w); |
2315 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2316 | mode32 = compiler->mode32; |
2317 | compiler->mode32 = 0; |
2318 | #endif |
2319 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); |
2320 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2321 | compiler->mode32 = mode32; |
2322 | #endif |
2323 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); |
2324 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); |
2325 | FAIL_IF(!inst); |
2326 | *inst |= mode; |
2327 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2328 | compiler->mode32 = 0; |
2329 | #endif |
2330 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2331 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2332 | compiler->mode32 = mode32; |
2333 | #endif |
2334 | return SLJIT_SUCCESS; |
2335 | } |
2336 | |
2337 | /* This case is complex since ecx itself may be used for |
2338 | addressing, and this case must be supported as well. */ |
2339 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2340 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2341 | EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0); |
2342 | #else /* !SLJIT_CONFIG_X86_32 */ |
2343 | mode32 = compiler->mode32; |
2344 | compiler->mode32 = 0; |
2345 | EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); |
2346 | compiler->mode32 = mode32; |
2347 | #endif /* SLJIT_CONFIG_X86_32 */ |
2348 | |
2349 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); |
2350 | inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2351 | FAIL_IF(!inst); |
2352 | *inst |= mode; |
2353 | |
2354 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2355 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0); |
2356 | #else |
2357 | compiler->mode32 = 0; |
2358 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); |
2359 | compiler->mode32 = mode32; |
2360 | #endif /* SLJIT_CONFIG_X86_32 */ |
2361 | |
2362 | if (dst != TMP_REG1) |
2363 | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
2364 | |
2365 | return SLJIT_SUCCESS; |
2366 | } |
2367 | |
2368 | static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, |
2369 | sljit_u8 mode, sljit_s32 set_flags, |
2370 | sljit_s32 dst, sljit_sw dstw, |
2371 | sljit_s32 src1, sljit_sw src1w, |
2372 | sljit_s32 src2, sljit_sw src2w) |
2373 | { |
2374 | /* The CPU does not set flags if the shift count is 0. */ |
2375 | if (src2 & SLJIT_IMM) { |
2376 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2377 | src2w &= compiler->mode32 ? 0x1f : 0x3f; |
2378 | #else /* !SLJIT_CONFIG_X86_64 */ |
2379 | src2w &= 0x1f; |
2380 | #endif /* SLJIT_CONFIG_X86_64 */ |
2381 | if (src2w != 0) |
2382 | return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); |
2383 | |
2384 | if (!set_flags) |
2385 | return emit_mov(compiler, dst, dstw, src1, src1w); |
2386 | /* OR dst, src, 0 */ |
2387 | return emit_cum_binary(compiler, BINARY_OPCODE(OR), |
2388 | dst, dstw, src1, src1w, SLJIT_IMM, 0); |
2389 | } |
2390 | |
2391 | if (!set_flags) |
2392 | return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); |
2393 | |
2394 | if (!FAST_IS_REG(dst)) |
2395 | FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); |
2396 | |
2397 | FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w)); |
2398 | |
2399 | if (FAST_IS_REG(dst)) |
2400 | return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); |
2401 | return SLJIT_SUCCESS; |
2402 | } |
2403 | |
2404 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, |
2405 | sljit_s32 dst, sljit_sw dstw, |
2406 | sljit_s32 src1, sljit_sw src1w, |
2407 | sljit_s32 src2, sljit_sw src2w) |
2408 | { |
2409 | CHECK_ERROR(); |
2410 | CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); |
2411 | ADJUST_LOCAL_OFFSET(dst, dstw); |
2412 | ADJUST_LOCAL_OFFSET(src1, src1w); |
2413 | ADJUST_LOCAL_OFFSET(src2, src2w); |
2414 | |
2415 | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
2416 | CHECK_EXTRA_REGS(src1, src1w, (void)0); |
2417 | CHECK_EXTRA_REGS(src2, src2w, (void)0); |
2418 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2419 | compiler->mode32 = op & SLJIT_32; |
2420 | #endif |
2421 | |
2422 | SLJIT_ASSERT(dst != TMP_REG1 || HAS_FLAGS(op)); |
2423 | |
2424 | switch (GET_OPCODE(op)) { |
2425 | case SLJIT_ADD: |
2426 | if (!HAS_FLAGS(op)) { |
2427 | if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) |
2428 | return compiler->error; |
2429 | } |
2430 | return emit_cum_binary(compiler, BINARY_OPCODE(ADD), |
2431 | dst, dstw, src1, src1w, src2, src2w); |
2432 | case SLJIT_ADDC: |
2433 | return emit_cum_binary(compiler, BINARY_OPCODE(ADC), |
2434 | dst, dstw, src1, src1w, src2, src2w); |
2435 | case SLJIT_SUB: |
2436 | if (src1 == SLJIT_IMM && src1w == 0) |
2437 | return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w); |
2438 | |
2439 | if (!HAS_FLAGS(op)) { |
2440 | if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) |
2441 | return compiler->error; |
2442 | if (FAST_IS_REG(dst) && src2 == dst) { |
2443 | FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w)); |
2444 | return emit_unary(compiler, NEG_rm, dst, 0, dst, 0); |
2445 | } |
2446 | } |
2447 | |
2448 | return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), |
2449 | dst, dstw, src1, src1w, src2, src2w); |
2450 | case SLJIT_SUBC: |
2451 | return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB), |
2452 | dst, dstw, src1, src1w, src2, src2w); |
2453 | case SLJIT_MUL: |
2454 | return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); |
2455 | case SLJIT_AND: |
2456 | return emit_cum_binary(compiler, BINARY_OPCODE(AND), |
2457 | dst, dstw, src1, src1w, src2, src2w); |
2458 | case SLJIT_OR: |
2459 | return emit_cum_binary(compiler, BINARY_OPCODE(OR), |
2460 | dst, dstw, src1, src1w, src2, src2w); |
2461 | case SLJIT_XOR: |
2462 | return emit_cum_binary(compiler, BINARY_OPCODE(XOR), |
2463 | dst, dstw, src1, src1w, src2, src2w); |
2464 | case SLJIT_SHL: |
2465 | case SLJIT_MSHL: |
2466 | return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op), |
2467 | dst, dstw, src1, src1w, src2, src2w); |
2468 | case SLJIT_LSHR: |
2469 | case SLJIT_MLSHR: |
2470 | return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op), |
2471 | dst, dstw, src1, src1w, src2, src2w); |
2472 | case SLJIT_ASHR: |
2473 | case SLJIT_MASHR: |
2474 | return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op), |
2475 | dst, dstw, src1, src1w, src2, src2w); |
2476 | case SLJIT_ROTL: |
2477 | return emit_shift_with_flags(compiler, ROL, 0, |
2478 | dst, dstw, src1, src1w, src2, src2w); |
2479 | case SLJIT_ROTR: |
2480 | return emit_shift_with_flags(compiler, ROR, 0, |
2481 | dst, dstw, src1, src1w, src2, src2w); |
2482 | } |
2483 | |
2484 | return SLJIT_SUCCESS; |
2485 | } |
2486 | |
2487 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, |
2488 | sljit_s32 src1, sljit_sw src1w, |
2489 | sljit_s32 src2, sljit_sw src2w) |
2490 | { |
2491 | sljit_s32 opcode = GET_OPCODE(op); |
2492 | |
2493 | CHECK_ERROR(); |
2494 | CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); |
2495 | |
2496 | if (opcode != SLJIT_SUB && opcode != SLJIT_AND) { |
2497 | SLJIT_SKIP_CHECKS(compiler); |
2498 | return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); |
2499 | } |
2500 | |
2501 | ADJUST_LOCAL_OFFSET(src1, src1w); |
2502 | ADJUST_LOCAL_OFFSET(src2, src2w); |
2503 | |
2504 | CHECK_EXTRA_REGS(src1, src1w, (void)0); |
2505 | CHECK_EXTRA_REGS(src2, src2w, (void)0); |
2506 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2507 | compiler->mode32 = op & SLJIT_32; |
2508 | #endif |
2509 | |
2510 | if (opcode == SLJIT_SUB) { |
2511 | return emit_cmp_binary(compiler, src1, src1w, src2, src2w); |
2512 | } |
2513 | return emit_test_binary(compiler, src1, src1w, src2, src2w); |
2514 | } |
2515 | |
2516 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, |
2517 | sljit_s32 src_dst, |
2518 | sljit_s32 src1, sljit_sw src1w, |
2519 | sljit_s32 src2, sljit_sw src2w) |
2520 | { |
2521 | sljit_s32 restore_ecx = 0; |
2522 | sljit_s32 is_rotate, is_left; |
2523 | sljit_u8* inst; |
2524 | sljit_sw dstw = 0; |
2525 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2526 | sljit_s32 tmp2 = SLJIT_MEM1(SLJIT_SP); |
2527 | #else /* !SLJIT_CONFIG_X86_32 */ |
2528 | sljit_s32 tmp2 = TMP_REG2; |
2529 | #endif /* SLJIT_CONFIG_X86_32 */ |
2530 | |
2531 | CHECK_ERROR(); |
2532 | CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w)); |
2533 | ADJUST_LOCAL_OFFSET(src1, src1w); |
2534 | ADJUST_LOCAL_OFFSET(src2, src2w); |
2535 | |
2536 | CHECK_EXTRA_REGS(src1, src1w, (void)0); |
2537 | CHECK_EXTRA_REGS(src2, src2w, (void)0); |
2538 | |
2539 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2540 | compiler->mode32 = op & SLJIT_32; |
2541 | #endif |
2542 | |
2543 | if (src2 & SLJIT_IMM) { |
2544 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2545 | src2w &= 0x1f; |
2546 | #else /* !SLJIT_CONFIG_X86_32 */ |
2547 | src2w &= (op & SLJIT_32) ? 0x1f : 0x3f; |
2548 | #endif /* SLJIT_CONFIG_X86_32 */ |
2549 | |
2550 | if (src2w == 0) |
2551 | return SLJIT_SUCCESS; |
2552 | } |
2553 | |
2554 | is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); |
2555 | |
2556 | is_rotate = (src_dst == src1); |
2557 | CHECK_EXTRA_REGS(src_dst, dstw, (void)0); |
2558 | |
2559 | if (is_rotate) |
2560 | return emit_shift(compiler, is_left ? ROL : ROR, src_dst, dstw, src1, src1w, src2, src2w); |
2561 | |
2562 | if ((src2 & SLJIT_IMM) || src2 == SLJIT_PREF_SHIFT_REG) { |
2563 | if (!FAST_IS_REG(src1)) { |
2564 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2565 | src1 = TMP_REG1; |
2566 | } |
2567 | } else if (FAST_IS_REG(src1)) { |
2568 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2569 | compiler->mode32 = 0; |
2570 | #endif |
2571 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); |
2572 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2573 | compiler->mode32 = op & SLJIT_32; |
2574 | #endif |
2575 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); |
2576 | |
2577 | if (src1 == SLJIT_PREF_SHIFT_REG) |
2578 | src1 = TMP_REG1; |
2579 | |
2580 | if (src_dst == SLJIT_PREF_SHIFT_REG) |
2581 | src_dst = TMP_REG1; |
2582 | |
2583 | restore_ecx = 1; |
2584 | } else { |
2585 | EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); |
2586 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2587 | compiler->mode32 = 0; |
2588 | #endif |
2589 | EMIT_MOV(compiler, tmp2, 0, SLJIT_PREF_SHIFT_REG, 0); |
2590 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2591 | compiler->mode32 = op & SLJIT_32; |
2592 | #endif |
2593 | EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); |
2594 | |
2595 | src1 = TMP_REG1; |
2596 | |
2597 | if (src_dst == SLJIT_PREF_SHIFT_REG) { |
2598 | src_dst = tmp2; |
2599 | SLJIT_ASSERT(dstw == 0); |
2600 | } |
2601 | |
2602 | restore_ecx = 2; |
2603 | } |
2604 | |
2605 | inst = emit_x86_instruction(compiler, 2, src1, 0, src_dst, dstw); |
2606 | FAIL_IF(!inst); |
2607 | inst[0] = GROUP_0F; |
2608 | |
2609 | if (src2 & SLJIT_IMM) { |
2610 | inst[1] = U8((is_left ? SHLD : SHRD) - 1); |
2611 | |
2612 | /* Immedate argument is added separately. */ |
2613 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); |
2614 | FAIL_IF(!inst); |
2615 | INC_SIZE(1); |
2616 | *inst = U8(src2w); |
2617 | } else |
2618 | inst[1] = U8(is_left ? SHLD : SHRD); |
2619 | |
2620 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2621 | compiler->mode32 = 0; |
2622 | #endif |
2623 | |
2624 | if (restore_ecx == 1) |
2625 | return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); |
2626 | if (restore_ecx == 2) |
2627 | return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, tmp2, 0); |
2628 | |
2629 | return SLJIT_SUCCESS; |
2630 | } |
2631 | |
2632 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, |
2633 | sljit_s32 src, sljit_sw srcw) |
2634 | { |
2635 | CHECK_ERROR(); |
2636 | CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); |
2637 | ADJUST_LOCAL_OFFSET(src, srcw); |
2638 | |
2639 | CHECK_EXTRA_REGS(src, srcw, (void)0); |
2640 | |
2641 | switch (op) { |
2642 | case SLJIT_FAST_RETURN: |
2643 | return emit_fast_return(compiler, src, srcw); |
2644 | case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: |
2645 | /* Don't adjust shadow stack if it isn't enabled. */ |
2646 | if (!cpu_has_shadow_stack ()) |
2647 | return SLJIT_SUCCESS; |
2648 | return adjust_shadow_stack(compiler, src, srcw); |
2649 | case SLJIT_PREFETCH_L1: |
2650 | case SLJIT_PREFETCH_L2: |
2651 | case SLJIT_PREFETCH_L3: |
2652 | case SLJIT_PREFETCH_ONCE: |
2653 | return emit_prefetch(compiler, op, src, srcw); |
2654 | } |
2655 | |
2656 | return SLJIT_SUCCESS; |
2657 | } |
2658 | |
2659 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) |
2660 | { |
2661 | CHECK_REG_INDEX(check_sljit_get_register_index(reg)); |
2662 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2663 | if (reg >= SLJIT_R3 && reg <= SLJIT_R8) |
2664 | return -1; |
2665 | #endif |
2666 | return reg_map[reg]; |
2667 | } |
2668 | |
2669 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) |
2670 | { |
2671 | CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); |
2672 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
2673 | return reg; |
2674 | #else |
2675 | return freg_map[reg]; |
2676 | #endif |
2677 | } |
2678 | |
2679 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, |
2680 | void *instruction, sljit_u32 size) |
2681 | { |
2682 | sljit_u8 *inst; |
2683 | |
2684 | CHECK_ERROR(); |
2685 | CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); |
2686 | |
2687 | inst = (sljit_u8*)ensure_buf(compiler, 1 + size); |
2688 | FAIL_IF(!inst); |
2689 | INC_SIZE(size); |
2690 | SLJIT_MEMCPY(inst, instruction, size); |
2691 | return SLJIT_SUCCESS; |
2692 | } |
2693 | |
2694 | /* --------------------------------------------------------------------- */ |
2695 | /* Floating point operators */ |
2696 | /* --------------------------------------------------------------------- */ |
2697 | |
2698 | /* Alignment(3) + 4 * 16 bytes. */ |
2699 | static sljit_u32 sse2_data[3 + (4 * 4)]; |
2700 | static sljit_u32 *sse2_buffer; |
2701 | |
2702 | static void init_compiler(void) |
2703 | { |
2704 | /* Align to 16 bytes. */ |
2705 | sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf); |
2706 | |
2707 | /* Single precision constants (each constant is 16 byte long). */ |
2708 | sse2_buffer[0] = 0x80000000; |
2709 | sse2_buffer[4] = 0x7fffffff; |
2710 | /* Double precision constants (each constant is 16 byte long). */ |
2711 | sse2_buffer[8] = 0; |
2712 | sse2_buffer[9] = 0x80000000; |
2713 | sse2_buffer[12] = 0xffffffff; |
2714 | sse2_buffer[13] = 0x7fffffff; |
2715 | } |
2716 | |
2717 | static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode, |
2718 | sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) |
2719 | { |
2720 | sljit_u8 *inst; |
2721 | |
2722 | inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); |
2723 | FAIL_IF(!inst); |
2724 | *inst++ = GROUP_0F; |
2725 | *inst = opcode; |
2726 | return SLJIT_SUCCESS; |
2727 | } |
2728 | |
2729 | static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode, |
2730 | sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) |
2731 | { |
2732 | sljit_u8 *inst; |
2733 | |
2734 | inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); |
2735 | FAIL_IF(!inst); |
2736 | *inst++ = GROUP_0F; |
2737 | *inst = opcode; |
2738 | return SLJIT_SUCCESS; |
2739 | } |
2740 | |
2741 | static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, |
2742 | sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw) |
2743 | { |
2744 | return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); |
2745 | } |
2746 | |
2747 | static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, |
2748 | sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src) |
2749 | { |
2750 | return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); |
2751 | } |
2752 | |
2753 | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, |
2754 | sljit_s32 dst, sljit_sw dstw, |
2755 | sljit_s32 src, sljit_sw srcw) |
2756 | { |
2757 | sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; |
2758 | sljit_u8 *inst; |
2759 | |
2760 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2761 | if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) |
2762 | compiler->mode32 = 0; |
2763 | #endif |
2764 | |
2765 | inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); |
2766 | FAIL_IF(!inst); |
2767 | *inst++ = GROUP_0F; |
2768 | *inst = CVTTSD2SI_r_xm; |
2769 | |
2770 | if (dst & SLJIT_MEM) |
2771 | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
2772 | return SLJIT_SUCCESS; |
2773 | } |
2774 | |
2775 | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, |
2776 | sljit_s32 dst, sljit_sw dstw, |
2777 | sljit_s32 src, sljit_sw srcw) |
2778 | { |
2779 | sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; |
2780 | sljit_u8 *inst; |
2781 | |
2782 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2783 | if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) |
2784 | compiler->mode32 = 0; |
2785 | #endif |
2786 | |
2787 | if (src & SLJIT_IMM) { |
2788 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2789 | if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) |
2790 | srcw = (sljit_s32)srcw; |
2791 | #endif |
2792 | EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); |
2793 | src = TMP_REG1; |
2794 | srcw = 0; |
2795 | } |
2796 | |
2797 | inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); |
2798 | FAIL_IF(!inst); |
2799 | *inst++ = GROUP_0F; |
2800 | *inst = CVTSI2SD_x_rm; |
2801 | |
2802 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2803 | compiler->mode32 = 1; |
2804 | #endif |
2805 | if (dst_r == TMP_FREG) |
2806 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); |
2807 | return SLJIT_SUCCESS; |
2808 | } |
2809 | |
2810 | static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, |
2811 | sljit_s32 src1, sljit_sw src1w, |
2812 | sljit_s32 src2, sljit_sw src2w) |
2813 | { |
2814 | switch (GET_FLAG_TYPE(op)) { |
2815 | case SLJIT_ORDERED_LESS: |
2816 | case SLJIT_UNORDERED_OR_GREATER_EQUAL: |
2817 | case SLJIT_UNORDERED_OR_GREATER: |
2818 | case SLJIT_ORDERED_LESS_EQUAL: |
2819 | if (!FAST_IS_REG(src2)) { |
2820 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w)); |
2821 | src2 = TMP_FREG; |
2822 | } |
2823 | |
2824 | return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src2, src1, src1w); |
2825 | } |
2826 | |
2827 | if (!FAST_IS_REG(src1)) { |
2828 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); |
2829 | src1 = TMP_FREG; |
2830 | } |
2831 | |
2832 | return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src1, src2, src2w); |
2833 | } |
2834 | |
2835 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, |
2836 | sljit_s32 dst, sljit_sw dstw, |
2837 | sljit_s32 src, sljit_sw srcw) |
2838 | { |
2839 | sljit_s32 dst_r; |
2840 | |
2841 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2842 | compiler->mode32 = 1; |
2843 | #endif |
2844 | |
2845 | CHECK_ERROR(); |
2846 | SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); |
2847 | |
2848 | if (GET_OPCODE(op) == SLJIT_MOV_F64) { |
2849 | if (FAST_IS_REG(dst)) |
2850 | return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw); |
2851 | if (FAST_IS_REG(src)) |
2852 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src); |
2853 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); |
2854 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); |
2855 | } |
2856 | |
2857 | if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { |
2858 | dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; |
2859 | if (FAST_IS_REG(src)) { |
2860 | /* We overwrite the high bits of source. From SLJIT point of view, |
2861 | this is not an issue. |
2862 | Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ |
2863 | FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_32, src, src, 0)); |
2864 | } |
2865 | else { |
2866 | FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw)); |
2867 | src = TMP_FREG; |
2868 | } |
2869 | |
2870 | FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_32, dst_r, src, 0)); |
2871 | if (dst_r == TMP_FREG) |
2872 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); |
2873 | return SLJIT_SUCCESS; |
2874 | } |
2875 | |
2876 | if (FAST_IS_REG(dst)) { |
2877 | dst_r = dst; |
2878 | if (dst != src) |
2879 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw)); |
2880 | } |
2881 | else { |
2882 | dst_r = TMP_FREG; |
2883 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw)); |
2884 | } |
2885 | |
2886 | switch (GET_OPCODE(op)) { |
2887 | case SLJIT_NEG_F64: |
2888 | FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer : sse2_buffer + 8))); |
2889 | break; |
2890 | |
2891 | case SLJIT_ABS_F64: |
2892 | FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer + 4 : sse2_buffer + 12))); |
2893 | break; |
2894 | } |
2895 | |
2896 | if (dst_r == TMP_FREG) |
2897 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); |
2898 | return SLJIT_SUCCESS; |
2899 | } |
2900 | |
2901 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, |
2902 | sljit_s32 dst, sljit_sw dstw, |
2903 | sljit_s32 src1, sljit_sw src1w, |
2904 | sljit_s32 src2, sljit_sw src2w) |
2905 | { |
2906 | sljit_s32 dst_r; |
2907 | |
2908 | CHECK_ERROR(); |
2909 | CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); |
2910 | ADJUST_LOCAL_OFFSET(dst, dstw); |
2911 | ADJUST_LOCAL_OFFSET(src1, src1w); |
2912 | ADJUST_LOCAL_OFFSET(src2, src2w); |
2913 | |
2914 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
2915 | compiler->mode32 = 1; |
2916 | #endif |
2917 | |
2918 | if (FAST_IS_REG(dst)) { |
2919 | dst_r = dst; |
2920 | if (dst == src1) |
2921 | ; /* Do nothing here. */ |
2922 | else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) { |
2923 | /* Swap arguments. */ |
2924 | src2 = src1; |
2925 | src2w = src1w; |
2926 | } |
2927 | else if (dst != src2) |
2928 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w)); |
2929 | else { |
2930 | dst_r = TMP_FREG; |
2931 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); |
2932 | } |
2933 | } |
2934 | else { |
2935 | dst_r = TMP_FREG; |
2936 | FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); |
2937 | } |
2938 | |
2939 | switch (GET_OPCODE(op)) { |
2940 | case SLJIT_ADD_F64: |
2941 | FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); |
2942 | break; |
2943 | |
2944 | case SLJIT_SUB_F64: |
2945 | FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); |
2946 | break; |
2947 | |
2948 | case SLJIT_MUL_F64: |
2949 | FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); |
2950 | break; |
2951 | |
2952 | case SLJIT_DIV_F64: |
2953 | FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); |
2954 | break; |
2955 | } |
2956 | |
2957 | if (dst_r == TMP_FREG) |
2958 | return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); |
2959 | return SLJIT_SUCCESS; |
2960 | } |
2961 | |
2962 | /* --------------------------------------------------------------------- */ |
2963 | /* Conditional instructions */ |
2964 | /* --------------------------------------------------------------------- */ |
2965 | |
2966 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) |
2967 | { |
2968 | sljit_u8 *inst; |
2969 | struct sljit_label *label; |
2970 | |
2971 | CHECK_ERROR_PTR(); |
2972 | CHECK_PTR(check_sljit_emit_label(compiler)); |
2973 | |
2974 | if (compiler->last_label && compiler->last_label->size == compiler->size) |
2975 | return compiler->last_label; |
2976 | |
2977 | label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); |
2978 | PTR_FAIL_IF(!label); |
2979 | set_label(label, compiler); |
2980 | |
2981 | inst = (sljit_u8*)ensure_buf(compiler, 2); |
2982 | PTR_FAIL_IF(!inst); |
2983 | |
2984 | *inst++ = 0; |
2985 | *inst++ = 0; |
2986 | |
2987 | return label; |
2988 | } |
2989 | |
2990 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) |
2991 | { |
2992 | sljit_u8 *inst; |
2993 | struct sljit_jump *jump; |
2994 | |
2995 | CHECK_ERROR_PTR(); |
2996 | CHECK_PTR(check_sljit_emit_jump(compiler, type)); |
2997 | |
2998 | jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); |
2999 | PTR_FAIL_IF_NULL(jump); |
3000 | set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT))); |
3001 | type &= 0xff; |
3002 | |
3003 | /* Worst case size. */ |
3004 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
3005 | compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; |
3006 | #else |
3007 | compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); |
3008 | #endif |
3009 | |
3010 | inst = (sljit_u8*)ensure_buf(compiler, 2); |
3011 | PTR_FAIL_IF_NULL(inst); |
3012 | |
3013 | *inst++ = 0; |
3014 | *inst++ = 1; |
3015 | return jump; |
3016 | } |
3017 | |
3018 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) |
3019 | { |
3020 | sljit_u8 *inst; |
3021 | struct sljit_jump *jump; |
3022 | |
3023 | CHECK_ERROR(); |
3024 | CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); |
3025 | ADJUST_LOCAL_OFFSET(src, srcw); |
3026 | |
3027 | CHECK_EXTRA_REGS(src, srcw, (void)0); |
3028 | |
3029 | if (src == SLJIT_IMM) { |
3030 | jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); |
3031 | FAIL_IF_NULL(jump); |
3032 | set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT))); |
3033 | jump->u.target = (sljit_uw)srcw; |
3034 | |
3035 | /* Worst case size. */ |
3036 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
3037 | compiler->size += 5; |
3038 | #else |
3039 | compiler->size += 10 + 3; |
3040 | #endif |
3041 | |
3042 | inst = (sljit_u8*)ensure_buf(compiler, 2); |
3043 | FAIL_IF_NULL(inst); |
3044 | |
3045 | *inst++ = 0; |
3046 | *inst++ = 1; |
3047 | } |
3048 | else { |
3049 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3050 | /* REX_W is not necessary (src is not immediate). */ |
3051 | compiler->mode32 = 1; |
3052 | #endif |
3053 | inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); |
3054 | FAIL_IF(!inst); |
3055 | *inst++ = GROUP_FF; |
3056 | *inst = U8(*inst | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm)); |
3057 | } |
3058 | return SLJIT_SUCCESS; |
3059 | } |
3060 | |
3061 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, |
3062 | sljit_s32 dst, sljit_sw dstw, |
3063 | sljit_s32 type) |
3064 | { |
3065 | sljit_u8 *inst; |
3066 | sljit_u8 cond_set = 0; |
3067 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3068 | sljit_s32 reg; |
3069 | #endif |
3070 | /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */ |
3071 | sljit_s32 dst_save = dst; |
3072 | sljit_sw dstw_save = dstw; |
3073 | |
3074 | CHECK_ERROR(); |
3075 | CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); |
3076 | |
3077 | ADJUST_LOCAL_OFFSET(dst, dstw); |
3078 | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
3079 | |
3080 | /* setcc = jcc + 0x10. */ |
3081 | cond_set = U8(get_jump_code((sljit_uw)type) + 0x10); |
3082 | |
3083 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3084 | if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) { |
3085 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3); |
3086 | FAIL_IF(!inst); |
3087 | INC_SIZE(4 + 3); |
3088 | /* Set low register to conditional flag. */ |
3089 | *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; |
3090 | *inst++ = GROUP_0F; |
3091 | *inst++ = cond_set; |
3092 | *inst++ = MOD_REG | reg_lmap[TMP_REG1]; |
3093 | *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B)); |
3094 | *inst++ = OR_rm8_r8; |
3095 | *inst++ = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]); |
3096 | return SLJIT_SUCCESS; |
3097 | } |
3098 | |
3099 | reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1; |
3100 | |
3101 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4); |
3102 | FAIL_IF(!inst); |
3103 | INC_SIZE(4 + 4); |
3104 | /* Set low register to conditional flag. */ |
3105 | *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; |
3106 | *inst++ = GROUP_0F; |
3107 | *inst++ = cond_set; |
3108 | *inst++ = MOD_REG | reg_lmap[reg]; |
3109 | *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); |
3110 | /* The movzx instruction does not affect flags. */ |
3111 | *inst++ = GROUP_0F; |
3112 | *inst++ = MOVZX_r_rm8; |
3113 | *inst = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]); |
3114 | |
3115 | if (reg != TMP_REG1) |
3116 | return SLJIT_SUCCESS; |
3117 | |
3118 | if (GET_OPCODE(op) < SLJIT_ADD) { |
3119 | compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; |
3120 | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
3121 | } |
3122 | |
3123 | SLJIT_SKIP_CHECKS(compiler); |
3124 | return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); |
3125 | |
3126 | #else |
3127 | /* The SLJIT_CONFIG_X86_32 code path starts here. */ |
3128 | if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { |
3129 | if (reg_map[dst] <= 4) { |
3130 | /* Low byte is accessible. */ |
3131 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); |
3132 | FAIL_IF(!inst); |
3133 | INC_SIZE(3 + 3); |
3134 | /* Set low byte to conditional flag. */ |
3135 | *inst++ = GROUP_0F; |
3136 | *inst++ = cond_set; |
3137 | *inst++ = U8(MOD_REG | reg_map[dst]); |
3138 | |
3139 | *inst++ = GROUP_0F; |
3140 | *inst++ = MOVZX_r_rm8; |
3141 | *inst = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]); |
3142 | return SLJIT_SUCCESS; |
3143 | } |
3144 | |
3145 | /* Low byte is not accessible. */ |
3146 | if (cpu_feature_list == 0) |
3147 | get_cpu_features(); |
3148 | |
3149 | if (cpu_feature_list & CPU_FEATURE_CMOV) { |
3150 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); |
3151 | /* a xor reg, reg operation would overwrite the flags. */ |
3152 | EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); |
3153 | |
3154 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); |
3155 | FAIL_IF(!inst); |
3156 | INC_SIZE(3); |
3157 | |
3158 | *inst++ = GROUP_0F; |
3159 | /* cmovcc = setcc - 0x50. */ |
3160 | *inst++ = U8(cond_set - 0x50); |
3161 | *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]); |
3162 | return SLJIT_SUCCESS; |
3163 | } |
3164 | |
3165 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); |
3166 | FAIL_IF(!inst); |
3167 | INC_SIZE(1 + 3 + 3 + 1); |
3168 | *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); |
3169 | /* Set al to conditional flag. */ |
3170 | *inst++ = GROUP_0F; |
3171 | *inst++ = cond_set; |
3172 | *inst++ = MOD_REG | 0 /* eax */; |
3173 | |
3174 | *inst++ = GROUP_0F; |
3175 | *inst++ = MOVZX_r_rm8; |
3176 | *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | 0 /* eax */); |
3177 | *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); |
3178 | return SLJIT_SUCCESS; |
3179 | } |
3180 | |
3181 | if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) { |
3182 | SLJIT_ASSERT(reg_map[SLJIT_R0] == 0); |
3183 | |
3184 | if (dst != SLJIT_R0) { |
3185 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); |
3186 | FAIL_IF(!inst); |
3187 | INC_SIZE(1 + 3 + 2 + 1); |
3188 | /* Set low register to conditional flag. */ |
3189 | *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); |
3190 | *inst++ = GROUP_0F; |
3191 | *inst++ = cond_set; |
3192 | *inst++ = MOD_REG | 0 /* eax */; |
3193 | *inst++ = OR_rm8_r8; |
3194 | *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; |
3195 | *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); |
3196 | } |
3197 | else { |
3198 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); |
3199 | FAIL_IF(!inst); |
3200 | INC_SIZE(2 + 3 + 2 + 2); |
3201 | /* Set low register to conditional flag. */ |
3202 | *inst++ = XCHG_r_rm; |
3203 | *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]); |
3204 | *inst++ = GROUP_0F; |
3205 | *inst++ = cond_set; |
3206 | *inst++ = MOD_REG | 1 /* ecx */; |
3207 | *inst++ = OR_rm8_r8; |
3208 | *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; |
3209 | *inst++ = XCHG_r_rm; |
3210 | *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]); |
3211 | } |
3212 | return SLJIT_SUCCESS; |
3213 | } |
3214 | |
3215 | /* Set TMP_REG1 to the bit. */ |
3216 | inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); |
3217 | FAIL_IF(!inst); |
3218 | INC_SIZE(1 + 3 + 3 + 1); |
3219 | *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); |
3220 | /* Set al to conditional flag. */ |
3221 | *inst++ = GROUP_0F; |
3222 | *inst++ = cond_set; |
3223 | *inst++ = MOD_REG | 0 /* eax */; |
3224 | |
3225 | *inst++ = GROUP_0F; |
3226 | *inst++ = MOVZX_r_rm8; |
3227 | *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; |
3228 | |
3229 | *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); |
3230 | |
3231 | if (GET_OPCODE(op) < SLJIT_ADD) |
3232 | return emit_mov(compiler, dst, dstw, TMP_REG1, 0); |
3233 | |
3234 | SLJIT_SKIP_CHECKS(compiler); |
3235 | return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); |
3236 | #endif /* SLJIT_CONFIG_X86_64 */ |
3237 | } |
3238 | |
3239 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, |
3240 | sljit_s32 dst_reg, |
3241 | sljit_s32 src, sljit_sw srcw) |
3242 | { |
3243 | sljit_u8* inst; |
3244 | |
3245 | CHECK_ERROR(); |
3246 | CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); |
3247 | |
3248 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
3249 | type &= ~SLJIT_32; |
3250 | |
3251 | if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3)) |
3252 | return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); |
3253 | #else |
3254 | if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV)) |
3255 | return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); |
3256 | #endif |
3257 | |
3258 | /* ADJUST_LOCAL_OFFSET is not needed. */ |
3259 | CHECK_EXTRA_REGS(src, srcw, (void)0); |
3260 | |
3261 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3262 | compiler->mode32 = type & SLJIT_32; |
3263 | type &= ~SLJIT_32; |
3264 | #endif |
3265 | |
3266 | if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { |
3267 | EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); |
3268 | src = TMP_REG1; |
3269 | srcw = 0; |
3270 | } |
3271 | |
3272 | inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); |
3273 | FAIL_IF(!inst); |
3274 | *inst++ = GROUP_0F; |
3275 | *inst = U8(get_jump_code((sljit_uw)type) - 0x40); |
3276 | return SLJIT_SUCCESS; |
3277 | } |
3278 | |
3279 | SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) |
3280 | { |
3281 | CHECK_ERROR(); |
3282 | CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); |
3283 | ADJUST_LOCAL_OFFSET(dst, dstw); |
3284 | |
3285 | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
3286 | |
3287 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3288 | compiler->mode32 = 0; |
3289 | #endif |
3290 | |
3291 | ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); |
3292 | |
3293 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3294 | if (NOT_HALFWORD(offset)) { |
3295 | FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); |
3296 | #if (defined SLJIT_DEBUG && SLJIT_DEBUG) |
3297 | SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); |
3298 | return compiler->error; |
3299 | #else |
3300 | return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); |
3301 | #endif |
3302 | } |
3303 | #endif |
3304 | |
3305 | if (offset != 0) |
3306 | return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); |
3307 | return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); |
3308 | } |
3309 | |
3310 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) |
3311 | { |
3312 | sljit_u8 *inst; |
3313 | struct sljit_const *const_; |
3314 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3315 | sljit_s32 reg; |
3316 | #endif |
3317 | |
3318 | CHECK_ERROR_PTR(); |
3319 | CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); |
3320 | ADJUST_LOCAL_OFFSET(dst, dstw); |
3321 | |
3322 | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
3323 | |
3324 | const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); |
3325 | PTR_FAIL_IF(!const_); |
3326 | set_const(const_, compiler); |
3327 | |
3328 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3329 | compiler->mode32 = 0; |
3330 | reg = FAST_IS_REG(dst) ? dst : TMP_REG1; |
3331 | |
3332 | if (emit_load_imm64(compiler, reg, init_value)) |
3333 | return NULL; |
3334 | #else |
3335 | if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) |
3336 | return NULL; |
3337 | #endif |
3338 | |
3339 | inst = (sljit_u8*)ensure_buf(compiler, 2); |
3340 | PTR_FAIL_IF(!inst); |
3341 | |
3342 | *inst++ = 0; |
3343 | *inst++ = 2; |
3344 | |
3345 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3346 | if (dst & SLJIT_MEM) |
3347 | if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) |
3348 | return NULL; |
3349 | #endif |
3350 | |
3351 | return const_; |
3352 | } |
3353 | |
3354 | SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) |
3355 | { |
3356 | struct sljit_put_label *put_label; |
3357 | sljit_u8 *inst; |
3358 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3359 | sljit_s32 reg; |
3360 | sljit_uw start_size; |
3361 | #endif |
3362 | |
3363 | CHECK_ERROR_PTR(); |
3364 | CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); |
3365 | ADJUST_LOCAL_OFFSET(dst, dstw); |
3366 | |
3367 | CHECK_EXTRA_REGS(dst, dstw, (void)0); |
3368 | |
3369 | put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); |
3370 | PTR_FAIL_IF(!put_label); |
3371 | set_put_label(put_label, compiler, 0); |
3372 | |
3373 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3374 | compiler->mode32 = 0; |
3375 | reg = FAST_IS_REG(dst) ? dst : TMP_REG1; |
3376 | |
3377 | if (emit_load_imm64(compiler, reg, 0)) |
3378 | return NULL; |
3379 | #else |
3380 | if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0)) |
3381 | return NULL; |
3382 | #endif |
3383 | |
3384 | #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
3385 | if (dst & SLJIT_MEM) { |
3386 | start_size = compiler->size; |
3387 | if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) |
3388 | return NULL; |
3389 | put_label->flags = compiler->size - start_size; |
3390 | } |
3391 | #endif |
3392 | |
3393 | inst = (sljit_u8*)ensure_buf(compiler, 2); |
3394 | PTR_FAIL_IF(!inst); |
3395 | |
3396 | *inst++ = 0; |
3397 | *inst++ = 3; |
3398 | |
3399 | return put_label; |
3400 | } |
3401 | |
3402 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) |
3403 | { |
3404 | SLJIT_UNUSED_ARG(executable_offset); |
3405 | |
3406 | SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0); |
3407 | #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
3408 | sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset)); |
3409 | #else |
3410 | sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target); |
3411 | #endif |
3412 | SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1); |
3413 | } |
3414 | |
3415 | SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) |
3416 | { |
3417 | SLJIT_UNUSED_ARG(executable_offset); |
3418 | |
3419 | SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0); |
3420 | sljit_unaligned_store_sw((void*)addr, new_constant); |
3421 | SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1); |
3422 | } |
3423 | |