1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
6 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
7 | XX XX |
8 | XX emitX86.cpp XX |
9 | XX XX |
10 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
11 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX |
12 | */ |
13 | |
14 | #include "jitpch.h" |
15 | #ifdef _MSC_VER |
16 | #pragma hdrstop |
17 | #endif |
18 | |
19 | #if defined(_TARGET_XARCH_) |
20 | |
21 | /*****************************************************************************/ |
22 | /*****************************************************************************/ |
23 | |
24 | #include "instr.h" |
25 | #include "emit.h" |
26 | #include "codegen.h" |
27 | |
28 | bool IsSSEInstruction(instruction ins) |
29 | { |
30 | return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_SSE_INSTRUCTION); |
31 | } |
32 | |
33 | bool IsSSEOrAVXInstruction(instruction ins) |
34 | { |
35 | return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION); |
36 | } |
37 | |
38 | bool IsAVXOnlyInstruction(instruction ins) |
39 | { |
40 | return (ins >= INS_FIRST_AVX_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION); |
41 | } |
42 | |
43 | bool IsFMAInstruction(instruction ins) |
44 | { |
45 | return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION); |
46 | } |
47 | |
48 | bool IsBMIInstruction(instruction ins) |
49 | { |
50 | return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION); |
51 | } |
52 | |
53 | regNumber getBmiRegNumber(instruction ins) |
54 | { |
55 | switch (ins) |
56 | { |
57 | case INS_blsi: |
58 | { |
59 | return (regNumber)3; |
60 | } |
61 | |
62 | case INS_blsmsk: |
63 | { |
64 | return (regNumber)2; |
65 | } |
66 | |
67 | case INS_blsr: |
68 | { |
69 | return (regNumber)1; |
70 | } |
71 | |
72 | default: |
73 | { |
74 | assert(IsBMIInstruction(ins)); |
75 | return REG_NA; |
76 | } |
77 | } |
78 | } |
79 | |
80 | regNumber getSseShiftRegNumber(instruction ins) |
81 | { |
82 | switch (ins) |
83 | { |
84 | case INS_psrldq: |
85 | { |
86 | return (regNumber)3; |
87 | } |
88 | |
89 | case INS_pslldq: |
90 | { |
91 | return (regNumber)7; |
92 | } |
93 | |
94 | case INS_psrld: |
95 | case INS_psrlw: |
96 | case INS_psrlq: |
97 | { |
98 | return (regNumber)2; |
99 | } |
100 | |
101 | case INS_pslld: |
102 | case INS_psllw: |
103 | case INS_psllq: |
104 | { |
105 | return (regNumber)6; |
106 | } |
107 | |
108 | case INS_psrad: |
109 | case INS_psraw: |
110 | { |
111 | return (regNumber)4; |
112 | } |
113 | |
114 | default: |
115 | { |
116 | assert(!"Invalid instruction for SSE2 instruction of the form: opcode reg, immed8" ); |
117 | return REG_NA; |
118 | } |
119 | } |
120 | } |
121 | |
122 | bool emitter::IsAVXInstruction(instruction ins) |
123 | { |
124 | return UseVEXEncoding() && IsSSEOrAVXInstruction(ins); |
125 | } |
126 | |
127 | // Returns true if the AVX instruction is a binary operator that requires 3 operands. |
128 | // When we emit an instruction with only two operands, we will duplicate the destination |
129 | // as a source. |
130 | // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to |
131 | // be formalized by adding an additional field to instruction table to |
132 | // to indicate whether a 3-operand instruction. |
133 | bool emitter::IsDstDstSrcAVXInstruction(instruction ins) |
134 | { |
135 | return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstDstSrcAVXInstruction) != 0) && IsAVXInstruction(ins); |
136 | } |
137 | |
138 | // Returns true if the AVX instruction requires 3 operands that duplicate the source |
139 | // register in the vvvv field. |
140 | // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to |
141 | // be formalized by adding an additional field to instruction table to |
142 | // to indicate whether a 3-operand instruction. |
143 | bool emitter::IsDstSrcSrcAVXInstruction(instruction ins) |
144 | { |
145 | return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstSrcSrcAVXInstruction) != 0) && IsAVXInstruction(ins); |
146 | } |
147 | |
148 | #ifdef FEATURE_HW_INTRINSICS |
149 | //------------------------------------------------------------------------ |
150 | // IsDstSrcImmAvxInstruction: Checks if the instruction has a "reg, reg/mem, imm" or |
151 | // "reg/mem, reg, imm" form for the legacy, VEX, and EVEX |
152 | // encodings. |
153 | // |
154 | // Arguments: |
155 | // instruction -- processor instruction to check |
156 | // |
157 | // Return Value: |
158 | // true if instruction has a "reg, reg/mem, imm" or "reg/mem, reg, imm" encoding |
159 | // form for the legacy, VEX, and EVEX encodings. |
160 | // |
161 | // That is, the instruction takes two operands, one of which is immediate, and it |
162 | // does not need to encode any data in the VEX.vvvv field. |
163 | // |
164 | static bool IsDstSrcImmAvxInstruction(instruction ins) |
165 | { |
166 | switch (ins) |
167 | { |
168 | case INS_aeskeygenassist: |
169 | case INS_extractps: |
170 | case INS_pextrb: |
171 | case INS_pextrw: |
172 | case INS_pextrd: |
173 | case INS_pextrq: |
174 | case INS_pshufd: |
175 | case INS_pshufhw: |
176 | case INS_pshuflw: |
177 | case INS_roundpd: |
178 | case INS_roundps: |
179 | return true; |
180 | default: |
181 | return false; |
182 | } |
183 | } |
184 | #endif // FEATURE_HW_INTRINSICS |
185 | |
186 | // ------------------------------------------------------------------- |
187 | // Is4ByteSSEInstruction: Returns true if the SSE instruction is a 4-byte opcode. |
188 | // |
189 | // Arguments: |
190 | // ins - instruction |
191 | // |
192 | // Note that this should be true for any of the instructions in instrsXArch.h |
193 | // that use the SSE38 or SSE3A macro but returns false if the VEX encoding is |
194 | // in use, since that encoding does not require an additional byte. |
195 | bool emitter::Is4ByteSSEInstruction(instruction ins) |
196 | { |
197 | return !UseVEXEncoding() && EncodedBySSE38orSSE3A(ins); |
198 | } |
199 | |
200 | // Returns true if this instruction requires a VEX prefix |
201 | // All AVX instructions require a VEX prefix |
202 | bool emitter::TakesVexPrefix(instruction ins) |
203 | { |
204 | // special case vzeroupper as it requires 2-byte VEX prefix |
205 | // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix |
206 | switch (ins) |
207 | { |
208 | case INS_lfence: |
209 | case INS_mfence: |
210 | case INS_movnti: |
211 | case INS_prefetchnta: |
212 | case INS_prefetcht0: |
213 | case INS_prefetcht1: |
214 | case INS_prefetcht2: |
215 | case INS_sfence: |
216 | case INS_vzeroupper: |
217 | return false; |
218 | default: |
219 | break; |
220 | } |
221 | |
222 | return IsAVXInstruction(ins); |
223 | } |
224 | |
225 | // Add base VEX prefix without setting W, R, X, or B bits |
226 | // L bit will be set based on emitter attr. |
227 | // |
228 | // 2-byte VEX prefix = C5 <R,vvvv,L,pp> |
229 | // 3-byte VEX prefix = C4 <R,X,B,m-mmmm> <W,vvvv,L,pp> |
230 | // - R, X, B, W - bits to express corresponding REX prefixes |
231 | // - m-mmmmm (5-bit) |
232 | // 0-00001 - implied leading 0F opcode byte |
233 | // 0-00010 - implied leading 0F 38 opcode bytes |
234 | // 0-00011 - implied leading 0F 3A opcode bytes |
235 | // Rest - reserved for future use and usage of them will uresult in Undefined instruction exception |
236 | // |
237 | // - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused |
238 | // - L - scalar or AVX-128 bit operations (L=0), 256-bit operations (L=1) |
239 | // - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix |
240 | // these prefixes are treated mandatory when used with escape opcode 0Fh for |
241 | // some SIMD instructions |
242 | // 00 - None (0F - packed float) |
243 | // 01 - 66 (66 0F - packed double) |
244 | // 10 - F3 (F3 0F - scalar float |
245 | // 11 - F2 (F2 0F - scalar double) |
246 | #define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL |
247 | #define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL |
248 | #define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL |
249 | emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr) |
250 | { |
251 | // The 2-byte VEX encoding is preferred when possible, but actually emitting |
252 | // it depends on a number of factors that we may not know until much later. |
253 | // |
254 | // In order to handle this "easily", we just carry the 3-byte encoding all |
255 | // the way through and "fix-up" the encoding when the VEX prefix is actually |
256 | // emitted, by simply checking that all the requirements were met. |
257 | |
258 | // Only AVX instructions require VEX prefix |
259 | assert(IsAVXInstruction(ins)); |
260 | |
261 | // Shouldn't have already added VEX prefix |
262 | assert(!hasVexPrefix(code)); |
263 | |
264 | assert((code & DEFAULT_3BYTE_VEX_PREFIX_MASK) == 0); |
265 | |
266 | code |= DEFAULT_3BYTE_VEX_PREFIX; |
267 | |
268 | if (attr == EA_32BYTE) |
269 | { |
270 | // Set L bit to 1 in case of instructions that operate on 256-bits. |
271 | code |= LBIT_IN_3BYTE_VEX_PREFIX; |
272 | } |
273 | |
274 | return code; |
275 | } |
276 | |
277 | // Returns true if this instruction, for the given EA_SIZE(attr), will require a REX.W prefix |
278 | bool TakesRexWPrefix(instruction ins, emitAttr attr) |
279 | { |
280 | // Because the current implementation of AVX does not have a way to distinguish between the register |
281 | // size specification (128 vs. 256 bits) and the operand size specification (32 vs. 64 bits), where both are |
282 | // required, the instruction must be created with the register size attribute (EA_16BYTE or EA_32BYTE), |
283 | // and here we must special case these by the opcode. |
284 | switch (ins) |
285 | { |
286 | case INS_vpermpd: |
287 | case INS_vpermq: |
288 | case INS_vpsrlvq: |
289 | case INS_vpsllvq: |
290 | case INS_pinsrq: |
291 | case INS_pextrq: |
292 | case INS_vfmadd132pd: |
293 | case INS_vfmadd213pd: |
294 | case INS_vfmadd231pd: |
295 | case INS_vfmadd132sd: |
296 | case INS_vfmadd213sd: |
297 | case INS_vfmadd231sd: |
298 | case INS_vfmaddsub132pd: |
299 | case INS_vfmaddsub213pd: |
300 | case INS_vfmaddsub231pd: |
301 | case INS_vfmsubadd132pd: |
302 | case INS_vfmsubadd213pd: |
303 | case INS_vfmsubadd231pd: |
304 | case INS_vfmsub132pd: |
305 | case INS_vfmsub213pd: |
306 | case INS_vfmsub231pd: |
307 | case INS_vfmsub132sd: |
308 | case INS_vfmsub213sd: |
309 | case INS_vfmsub231sd: |
310 | case INS_vfnmadd132pd: |
311 | case INS_vfnmadd213pd: |
312 | case INS_vfnmadd231pd: |
313 | case INS_vfnmadd132sd: |
314 | case INS_vfnmadd213sd: |
315 | case INS_vfnmadd231sd: |
316 | case INS_vfnmsub132pd: |
317 | case INS_vfnmsub213pd: |
318 | case INS_vfnmsub231pd: |
319 | case INS_vfnmsub132sd: |
320 | case INS_vfnmsub213sd: |
321 | case INS_vfnmsub231sd: |
322 | case INS_vpmaskmovq: |
323 | case INS_vpgatherdq: |
324 | case INS_vpgatherqq: |
325 | case INS_vgatherdpd: |
326 | case INS_vgatherqpd: |
327 | return true; |
328 | default: |
329 | break; |
330 | } |
331 | |
332 | #ifdef _TARGET_AMD64_ |
333 | // movsx should always sign extend out to 8 bytes just because we don't track |
334 | // whether the dest should be 4 bytes or 8 bytes (attr indicates the size |
335 | // of the source, not the dest). |
336 | // A 4-byte movzx is equivalent to an 8 byte movzx, so it is not special |
337 | // cased here. |
338 | // |
339 | // Rex_jmp = jmp with rex prefix always requires rex.w prefix. |
340 | if (ins == INS_movsx || ins == INS_rex_jmp) |
341 | { |
342 | return true; |
343 | } |
344 | |
345 | if (EA_SIZE(attr) != EA_8BYTE) |
346 | { |
347 | return false; |
348 | } |
349 | |
350 | if (IsSSEOrAVXInstruction(ins)) |
351 | { |
352 | switch (ins) |
353 | { |
354 | case INS_andn: |
355 | case INS_bextr: |
356 | case INS_blsi: |
357 | case INS_blsmsk: |
358 | case INS_blsr: |
359 | case INS_bzhi: |
360 | case INS_cvttsd2si: |
361 | case INS_cvttss2si: |
362 | case INS_cvtsd2si: |
363 | case INS_cvtss2si: |
364 | case INS_cvtsi2sd: |
365 | case INS_cvtsi2ss: |
366 | case INS_mov_xmm2i: |
367 | case INS_mov_i2xmm: |
368 | case INS_movnti: |
369 | case INS_mulx: |
370 | case INS_pdep: |
371 | case INS_pext: |
372 | return true; |
373 | default: |
374 | return false; |
375 | } |
376 | } |
377 | |
378 | // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these |
379 | // opcodes... |
380 | // These are all the instructions that default to 8-byte operand without the REX.W bit |
381 | // With 1 special case: movzx because the 4 byte version still zeros-out the hi 4 bytes |
382 | // so we never need it |
383 | if ((ins != INS_push) && (ins != INS_pop) && (ins != INS_movq) && (ins != INS_movzx) && (ins != INS_push_hide) && |
384 | (ins != INS_pop_hide) && (ins != INS_ret) && (ins != INS_call) && !((ins >= INS_i_jmp) && (ins <= INS_l_jg))) |
385 | { |
386 | return true; |
387 | } |
388 | else |
389 | { |
390 | return false; |
391 | } |
392 | #else //!_TARGET_AMD64 = _TARGET_X86_ |
393 | return false; |
394 | #endif //!_TARGET_AMD64_ |
395 | } |
396 | |
397 | // Returns true if using this register will require a REX.* prefix. |
398 | // Since XMM registers overlap with YMM registers, this routine |
399 | // can also be used to know whether a YMM register if the |
400 | // instruction in question is AVX. |
401 | bool IsExtendedReg(regNumber reg) |
402 | { |
403 | #ifdef _TARGET_AMD64_ |
404 | return ((reg >= REG_R8) && (reg <= REG_R15)) || ((reg >= REG_XMM8) && (reg <= REG_XMM15)); |
405 | #else |
406 | // X86 JIT operates in 32-bit mode and hence extended reg are not available. |
407 | return false; |
408 | #endif |
409 | } |
410 | |
411 | // Returns true if using this register, for the given EA_SIZE(attr), will require a REX.* prefix |
412 | bool IsExtendedReg(regNumber reg, emitAttr attr) |
413 | { |
414 | #ifdef _TARGET_AMD64_ |
415 | // Not a register, so doesn't need a prefix |
416 | if (reg > REG_XMM15) |
417 | { |
418 | return false; |
419 | } |
420 | |
421 | // Opcode field only has 3 bits for the register, these high registers |
422 | // need a 4th bit, that comes from the REX prefix (eiter REX.X, REX.R, or REX.B) |
423 | if (IsExtendedReg(reg)) |
424 | { |
425 | return true; |
426 | } |
427 | |
428 | if (EA_SIZE(attr) != EA_1BYTE) |
429 | { |
430 | return false; |
431 | } |
432 | |
433 | // There are 12 one byte registers addressible 'below' r8b: |
434 | // al, cl, dl, bl, ah, ch, dh, bh, spl, bpl, sil, dil. |
435 | // The first 4 are always addressible, the last 8 are divided into 2 sets: |
436 | // ah, ch, dh, bh |
437 | // -- or -- |
438 | // spl, bpl, sil, dil |
439 | // Both sets are encoded exactly the same, the difference is the presence |
440 | // of a REX prefix, even a REX prefix with no other bits set (0x40). |
441 | // So in order to get to the second set we need a REX prefix (but no bits). |
442 | // |
443 | // TODO-AMD64-CQ: if we ever want to start using the first set, we'll need a different way of |
444 | // encoding/tracking/encoding registers. |
445 | return (reg >= REG_RSP); |
446 | #else |
447 | // X86 JIT operates in 32-bit mode and hence extended reg are not available. |
448 | return false; |
449 | #endif |
450 | } |
451 | |
452 | // Since XMM registers overlap with YMM registers, this routine |
453 | // can also used to know whether a YMM register in case of AVX instructions. |
454 | bool IsXMMReg(regNumber reg) |
455 | { |
456 | #ifdef _TARGET_AMD64_ |
457 | return (reg >= REG_XMM0) && (reg <= REG_XMM15); |
458 | #else // !_TARGET_AMD64_ |
459 | return (reg >= REG_XMM0) && (reg <= REG_XMM7); |
460 | #endif // !_TARGET_AMD64_ |
461 | } |
462 | |
463 | // Returns bits to be encoded in instruction for the given register. |
464 | unsigned RegEncoding(regNumber reg) |
465 | { |
466 | static_assert((REG_XMM0 & 0x7) == 0, "bad XMMBASE" ); |
467 | return (unsigned)(reg & 0x7); |
468 | } |
469 | |
470 | // Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes |
471 | // SSE2: separate 1-byte prefix gets added before opcode. |
472 | // AVX: specific bits within VEX prefix need to be set in bit-inverted form. |
473 | emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code) |
474 | { |
475 | if (UseVEXEncoding() && IsAVXInstruction(ins)) |
476 | { |
477 | if (TakesVexPrefix(ins)) |
478 | { |
479 | // W-bit is available only in 3-byte VEX prefix that starts with byte C4. |
480 | assert(hasVexPrefix(code)); |
481 | |
482 | // W-bit is the only bit that is added in non bit-inverted form. |
483 | return emitter::code_t(code | 0x00008000000000ULL); |
484 | } |
485 | } |
486 | #ifdef _TARGET_AMD64_ |
487 | return emitter::code_t(code | 0x4800000000ULL); |
488 | #else |
489 | assert(!"UNREACHED" ); |
490 | return code; |
491 | #endif |
492 | } |
493 | |
494 | #ifdef _TARGET_AMD64_ |
495 | |
496 | emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code) |
497 | { |
498 | if (UseVEXEncoding() && IsAVXInstruction(ins)) |
499 | { |
500 | if (TakesVexPrefix(ins)) |
501 | { |
502 | // R-bit is supported by both 2-byte and 3-byte VEX prefix |
503 | assert(hasVexPrefix(code)); |
504 | |
505 | // R-bit is added in bit-inverted form. |
506 | return code & 0xFF7FFFFFFFFFFFULL; |
507 | } |
508 | } |
509 | |
510 | return code | 0x4400000000ULL; |
511 | } |
512 | |
513 | emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code) |
514 | { |
515 | if (UseVEXEncoding() && IsAVXInstruction(ins)) |
516 | { |
517 | if (TakesVexPrefix(ins)) |
518 | { |
519 | // X-bit is available only in 3-byte VEX prefix that starts with byte C4. |
520 | assert(hasVexPrefix(code)); |
521 | |
522 | // X-bit is added in bit-inverted form. |
523 | return code & 0xFFBFFFFFFFFFFFULL; |
524 | } |
525 | } |
526 | |
527 | return code | 0x4200000000ULL; |
528 | } |
529 | |
530 | emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code) |
531 | { |
532 | if (UseVEXEncoding() && IsAVXInstruction(ins)) |
533 | { |
534 | if (TakesVexPrefix(ins)) |
535 | { |
536 | // B-bit is available only in 3-byte VEX prefix that starts with byte C4. |
537 | assert(hasVexPrefix(code)); |
538 | |
539 | // B-bit is added in bit-inverted form. |
540 | return code & 0xFFDFFFFFFFFFFFULL; |
541 | } |
542 | } |
543 | |
544 | return code | 0x4100000000ULL; |
545 | } |
546 | |
547 | // Adds REX prefix (0x40) without W, R, X or B bits set |
548 | emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code) |
549 | { |
550 | assert(!UseVEXEncoding() || !IsAVXInstruction(ins)); |
551 | return code | 0x4000000000ULL; |
552 | } |
553 | |
554 | #endif //_TARGET_AMD64_ |
555 | |
556 | bool isPrefix(BYTE b) |
557 | { |
558 | assert(b != 0); // Caller should check this |
559 | assert(b != 0x67); // We don't use the address size prefix |
560 | assert(b != 0x65); // The GS segment override prefix is emitted separately |
561 | assert(b != 0x64); // The FS segment override prefix is emitted separately |
562 | assert(b != 0xF0); // The lock prefix is emitted separately |
563 | assert(b != 0x2E); // We don't use the CS segment override prefix |
564 | assert(b != 0x3E); // Or the DS segment override prefix |
565 | assert(b != 0x26); // Or the ES segment override prefix |
566 | assert(b != 0x36); // Or the SS segment override prefix |
567 | |
568 | // That just leaves the size prefixes used in SSE opcodes: |
569 | // Scalar Double Scalar Single Packed Double |
570 | return ((b == 0xF2) || (b == 0xF3) || (b == 0x66)); |
571 | } |
572 | |
573 | // Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise. |
574 | unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code) |
575 | { |
576 | if (hasVexPrefix(code)) |
577 | { |
578 | // Only AVX instructions should have a VEX prefix |
579 | assert(UseVEXEncoding() && IsAVXInstruction(ins)); |
580 | code_t vexPrefix = (code >> 32) & 0x00FFFFFF; |
581 | code &= 0x00000000FFFFFFFFLL; |
582 | |
583 | WORD leadingBytes = 0; |
584 | BYTE check = (code >> 24) & 0xFF; |
585 | if (check != 0) |
586 | { |
587 | // 3-byte opcode: with the bytes ordered as 0x2211RM33 or |
588 | // 4-byte opcode: with the bytes ordered as 0x22114433 |
589 | // check for a prefix in the 11 position |
590 | BYTE sizePrefix = (code >> 16) & 0xFF; |
591 | if ((sizePrefix != 0) && isPrefix(sizePrefix)) |
592 | { |
593 | // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits |
594 | // |
595 | // 00 - None (0F - packed float) |
596 | // 01 - 66 (66 0F - packed double) |
597 | // 10 - F3 (F3 0F - scalar float |
598 | // 11 - F2 (F2 0F - scalar double) |
599 | switch (sizePrefix) |
600 | { |
601 | case 0x66: |
602 | if (IsBMIInstruction(ins)) |
603 | { |
604 | switch (ins) |
605 | { |
606 | case INS_pdep: |
607 | case INS_mulx: |
608 | { |
609 | vexPrefix |= 0x03; |
610 | break; |
611 | } |
612 | |
613 | case INS_pext: |
614 | { |
615 | vexPrefix |= 0x02; |
616 | break; |
617 | } |
618 | |
619 | default: |
620 | { |
621 | vexPrefix |= 0x00; |
622 | break; |
623 | } |
624 | } |
625 | } |
626 | else |
627 | { |
628 | vexPrefix |= 0x01; |
629 | } |
630 | break; |
631 | case 0xF3: |
632 | vexPrefix |= 0x02; |
633 | break; |
634 | case 0xF2: |
635 | vexPrefix |= 0x03; |
636 | break; |
637 | default: |
638 | assert(!"unrecognized SIMD size prefix" ); |
639 | unreached(); |
640 | } |
641 | |
642 | // Now the byte in the 22 position must be an escape byte 0F |
643 | leadingBytes = check; |
644 | assert(leadingBytes == 0x0F); |
645 | |
646 | // Get rid of both sizePrefix and escape byte |
647 | code &= 0x0000FFFFLL; |
648 | |
649 | // Check the byte in the 33 position to see if it is 3A or 38. |
650 | // In such a case escape bytes must be 0x0F3A or 0x0F38 |
651 | check = code & 0xFF; |
652 | if (check == 0x3A || check == 0x38) |
653 | { |
654 | leadingBytes = (leadingBytes << 8) | check; |
655 | code &= 0x0000FF00LL; |
656 | } |
657 | } |
658 | } |
659 | else |
660 | { |
661 | // 2-byte opcode with the bytes ordered as 0x0011RM22 |
662 | // the byte in position 11 must be an escape byte. |
663 | leadingBytes = (code >> 16) & 0xFF; |
664 | assert(leadingBytes == 0x0F || leadingBytes == 0x00); |
665 | code &= 0xFFFF; |
666 | } |
667 | |
668 | // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 |
669 | // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these |
670 | // implied leading bytes. 0x0F is supported by both the 2-byte and |
671 | // 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by |
672 | // the 3-byte version. |
673 | |
674 | switch (leadingBytes) |
675 | { |
676 | case 0x00: |
677 | // there is no leading byte |
678 | break; |
679 | case 0x0F: |
680 | vexPrefix |= 0x0100; |
681 | break; |
682 | case 0x0F38: |
683 | vexPrefix |= 0x0200; |
684 | break; |
685 | case 0x0F3A: |
686 | vexPrefix |= 0x0300; |
687 | break; |
688 | default: |
689 | assert(!"encountered unknown leading bytes" ); |
690 | unreached(); |
691 | } |
692 | |
693 | // At this point |
694 | // VEX.2211RM33 got transformed as VEX.0000RM33 |
695 | // VEX.0011RM22 got transformed as VEX.0000RM22 |
696 | // |
697 | // Now output VEX prefix leaving the 4-byte opcode |
698 | |
699 | // The 2-byte VEX encoding, requires that the X and B-bits are set (these |
700 | // bits are inverted from the REX values so set means off), the W-bit is |
701 | // not set (this bit is not inverted), and that the m-mmmm bits are 0-0001 |
702 | // (the 2-byte VEX encoding only supports the 0x0F leading byte). When these |
703 | // conditions are met, we can change byte-0 from 0xC4 to 0xC5 and then |
704 | // byte-1 is the logical-or of bit 7 from byte-1 and bits 0-6 from byte 2 |
705 | // from the 3-byte VEX encoding. |
706 | // |
707 | // Given the above, the check can be reduced to a simple mask and comparison. |
708 | // * 0xFFFF7F80 is a mask that ignores any bits whose value we don't care about: |
709 | // * R can be set or unset (0x7F ignores bit 7) |
710 | // * vvvv can be any value (0x80 ignores bits 3-6) |
711 | // * L can be set or unset (0x80 ignores bit 2) |
712 | // * pp can be any value (0x80 ignores bits 0-1) |
713 | // * 0x00C46100 is a value that signifies the requirements listed above were met: |
714 | // * We must be a three-byte VEX opcode (0x00C4) |
715 | // * X and B must be set (0x61 validates bits 5-6) |
716 | // * m-mmmm must be 0-00001 (0x61 validates bits 0-4) |
717 | // * W must be unset (0x00 validates bit 7) |
718 | if ((vexPrefix & 0xFFFF7F80) == 0x00C46100) |
719 | { |
720 | emitOutputByte(dst, 0xC5); |
721 | emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); |
722 | return 2; |
723 | } |
724 | |
725 | emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF)); |
726 | emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF)); |
727 | emitOutputByte(dst + 2, vexPrefix & 0xFF); |
728 | return 3; |
729 | } |
730 | |
731 | #ifdef _TARGET_AMD64_ |
732 | if (code > 0x00FFFFFFFFLL) |
733 | { |
734 | BYTE prefix = (code >> 32) & 0xFF; |
735 | noway_assert(prefix >= 0x40 && prefix <= 0x4F); |
736 | code &= 0x00000000FFFFFFFFLL; |
737 | |
738 | // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now) |
739 | // we can remove this code as well |
740 | |
741 | // The REX prefix is required to come after all other prefixes. |
742 | // Some of our 'opcodes' actually include some prefixes, if that |
743 | // is the case, shift them over and place the REX prefix after |
744 | // the other prefixes, and emit any prefix that got moved out. |
745 | BYTE check = (code >> 24) & 0xFF; |
746 | if (check == 0) |
747 | { |
748 | // 3-byte opcode: with the bytes ordered as 0x00113322 |
749 | // check for a prefix in the 11 position |
750 | check = (code >> 16) & 0xFF; |
751 | if (check != 0 && isPrefix(check)) |
752 | { |
753 | // Swap the rex prefix and whatever this prefix is |
754 | code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL)); |
755 | // and then emit the other prefix |
756 | return emitOutputByte(dst, check); |
757 | } |
758 | } |
759 | else |
760 | { |
761 | // 4-byte opcode with the bytes ordered as 0x22114433 |
762 | // first check for a prefix in the 11 position |
763 | BYTE check2 = (code >> 16) & 0xFF; |
764 | if (isPrefix(check2)) |
765 | { |
766 | assert(!isPrefix(check)); // We currently don't use this, so it is untested |
767 | if (isPrefix(check)) |
768 | { |
769 | // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX |
770 | // Change to c2rrc1XXXX, and emit check2 now |
771 | code = (((code_t)prefix << 24) | ((code_t)check << 16) | (code & 0x0000FFFFLL)); |
772 | } |
773 | else |
774 | { |
775 | // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode) |
776 | // Change to c2XXrrXXXX, and emit check2 now |
777 | code = (((code_t)check << 24) | ((code_t)prefix << 16) | (code & 0x0000FFFFLL)); |
778 | } |
779 | return emitOutputByte(dst, check2); |
780 | } |
781 | } |
782 | |
783 | return emitOutputByte(dst, prefix); |
784 | } |
785 | #endif // _TARGET_AMD64_ |
786 | |
787 | return 0; |
788 | } |
789 | |
790 | #ifdef _TARGET_AMD64_ |
791 | /***************************************************************************** |
792 | * Is the last instruction emitted a call instruction? |
793 | */ |
794 | bool emitter::emitIsLastInsCall() |
795 | { |
796 | if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call)) |
797 | { |
798 | return true; |
799 | } |
800 | |
801 | return false; |
802 | } |
803 | |
804 | /***************************************************************************** |
805 | * We're about to create an epilog. If the last instruction we output was a 'call', |
806 | * then we need to insert a NOP, to allow for proper exception-handling behavior. |
807 | */ |
808 | void emitter::emitOutputPreEpilogNOP() |
809 | { |
810 | if (emitIsLastInsCall()) |
811 | { |
812 | emitIns(INS_nop); |
813 | } |
814 | } |
815 | |
816 | #endif //_TARGET_AMD64_ |
817 | |
818 | // Size of rex prefix in bytes |
819 | unsigned emitter::emitGetRexPrefixSize(instruction ins) |
820 | { |
821 | // In case of AVX instructions, REX prefixes are part of VEX prefix. |
822 | // And hence requires no additional byte to encode REX prefixes. |
823 | if (IsAVXInstruction(ins)) |
824 | { |
825 | return 0; |
826 | } |
827 | |
828 | // If not AVX, then we would need 1-byte to encode REX prefix. |
829 | return 1; |
830 | } |
831 | |
832 | // Size of vex prefix in bytes |
833 | unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr) |
834 | { |
835 | if (IsAVXInstruction(ins)) |
836 | { |
837 | return 3; |
838 | } |
839 | |
840 | // If not AVX, then we don't need to encode vex prefix. |
841 | return 0; |
842 | } |
843 | |
844 | // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces. |
845 | // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate. |
846 | // Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that |
847 | // instruction size estimate will be accurate. |
848 | // Basically this function will decrease the vexPrefixSize, |
849 | // so that opcodeSize + vexPrefixAdjustedSize will be the right size. |
850 | // rightOpcodeSize + vexPrefixSize |
851 | //=(opcodeSize - ExtrabytesSize) + vexPrefixSize |
852 | //=opcodeSize + (vexPrefixSize - ExtrabytesSize) |
853 | //=opcodeSize + vexPrefixAdjustedSize |
854 | unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code) |
855 | { |
856 | if (IsAVXInstruction(ins)) |
857 | { |
858 | unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr); |
859 | assert(vexPrefixAdjustedSize == 3); |
860 | |
861 | // In this case, opcode will contains escape prefix at least one byte, |
862 | // vexPrefixAdjustedSize should be minus one. |
863 | vexPrefixAdjustedSize -= 1; |
864 | |
865 | // Get the fourth byte in Opcode. |
866 | // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not. |
867 | BYTE check = (code >> 24) & 0xFF; |
868 | if (check != 0) |
869 | { |
870 | // 3-byte opcode: with the bytes ordered as 0x2211RM33 or |
871 | // 4-byte opcode: with the bytes ordered as 0x22114433 |
872 | // Simd prefix is at the first byte. |
873 | BYTE sizePrefix = (code >> 16) & 0xFF; |
874 | if (sizePrefix != 0 && isPrefix(sizePrefix)) |
875 | { |
876 | vexPrefixAdjustedSize -= 1; |
877 | } |
878 | |
879 | // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode. |
880 | // But in this case the opcode has not counted R\M part. |
881 | // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize |
882 | //=opcodeSize + VexPrefixAdjustedSize -1 + 1 |
883 | //=opcodeSize + VexPrefixAdjustedSize |
884 | // So although we may have second byte escape prefix, we won't decrease vexPrefixAdjustedSize. |
885 | } |
886 | |
887 | return vexPrefixAdjustedSize; |
888 | } |
889 | return 0; |
890 | } |
891 | |
892 | // Get size of rex or vex prefix emitted in code |
893 | unsigned emitter::emitGetPrefixSize(code_t code) |
894 | { |
895 | if (hasVexPrefix(code)) |
896 | { |
897 | return 3; |
898 | } |
899 | |
900 | if (hasRexPrefix(code)) |
901 | { |
902 | return 1; |
903 | } |
904 | |
905 | return 0; |
906 | } |
907 | |
908 | #ifdef _TARGET_X86_ |
909 | /***************************************************************************** |
910 | * |
911 | * Record a non-empty stack |
912 | */ |
913 | |
914 | void emitter::emitMarkStackLvl(unsigned stackLevel) |
915 | { |
916 | assert(int(stackLevel) >= 0); |
917 | assert(emitCurStackLvl == 0); |
918 | assert(emitCurIG->igStkLvl == 0); |
919 | assert(emitCurIGfreeNext == emitCurIGfreeBase); |
920 | |
921 | assert(stackLevel && stackLevel % sizeof(int) == 0); |
922 | |
923 | emitCurStackLvl = emitCurIG->igStkLvl = stackLevel; |
924 | |
925 | if (emitMaxStackDepth < emitCurStackLvl) |
926 | { |
927 | JITDUMP("Upping emitMaxStackDepth from %d to %d\n" , emitMaxStackDepth, emitCurStackLvl); |
928 | emitMaxStackDepth = emitCurStackLvl; |
929 | } |
930 | } |
931 | #endif |
932 | |
933 | /***************************************************************************** |
934 | * |
935 | * Get hold of the address mode displacement value for an indirect call. |
936 | */ |
937 | |
938 | inline ssize_t emitter::emitGetInsCIdisp(instrDesc* id) |
939 | { |
940 | if (id->idIsLargeCall()) |
941 | { |
942 | return ((instrDescCGCA*)id)->idcDisp; |
943 | } |
944 | else |
945 | { |
946 | assert(!id->idIsLargeDsp()); |
947 | assert(!id->idIsLargeCns()); |
948 | |
949 | return id->idAddr()->iiaAddrMode.amDisp; |
950 | } |
951 | } |
952 | |
953 | /** *************************************************************************** |
954 | * |
955 | * The following table is used by the instIsFP()/instUse/DefFlags() helpers. |
956 | */ |
957 | |
958 | // clang-format off |
959 | const insFlags CodeGenInterface::instInfo[] = |
960 | { |
961 | #define INST0(id, nm, um, mr, flags) static_cast<insFlags>(flags), |
962 | #define INST1(id, nm, um, mr, flags) static_cast<insFlags>(flags), |
963 | #define INST2(id, nm, um, mr, mi, flags) static_cast<insFlags>(flags), |
964 | #define INST3(id, nm, um, mr, mi, rm, flags) static_cast<insFlags>(flags), |
965 | #define INST4(id, nm, um, mr, mi, rm, a4, flags) static_cast<insFlags>(flags), |
966 | #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) static_cast<insFlags>(flags), |
967 | #include "instrs.h" |
968 | #undef INST0 |
969 | #undef INST1 |
970 | #undef INST2 |
971 | #undef INST3 |
972 | #undef INST4 |
973 | #undef INST5 |
974 | }; |
975 | // clang-format on |
976 | |
977 | /***************************************************************************** |
978 | * |
979 | * Initialize the table used by emitInsModeFormat(). |
980 | */ |
981 | |
982 | // clang-format off |
983 | const BYTE emitter::emitInsModeFmtTab[] = |
984 | { |
985 | #define INST0(id, nm, um, mr, flags) um, |
986 | #define INST1(id, nm, um, mr, flags) um, |
987 | #define INST2(id, nm, um, mr, mi, flags) um, |
988 | #define INST3(id, nm, um, mr, mi, rm, flags) um, |
989 | #define INST4(id, nm, um, mr, mi, rm, a4, flags) um, |
990 | #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) um, |
991 | #include "instrs.h" |
992 | #undef INST0 |
993 | #undef INST1 |
994 | #undef INST2 |
995 | #undef INST3 |
996 | #undef INST4 |
997 | #undef INST5 |
998 | }; |
999 | // clang-format on |
1000 | |
1001 | #ifdef DEBUG |
1002 | unsigned const emitter::emitInsModeFmtCnt = _countof(emitInsModeFmtTab); |
1003 | #endif |
1004 | |
1005 | /***************************************************************************** |
1006 | * |
1007 | * Combine the given base format with the update mode of the instuction. |
1008 | */ |
1009 | |
1010 | inline emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base) |
1011 | { |
1012 | assert(IF_RRD + IUM_RD == IF_RRD); |
1013 | assert(IF_RRD + IUM_WR == IF_RWR); |
1014 | assert(IF_RRD + IUM_RW == IF_RRW); |
1015 | |
1016 | return (insFormat)(base + emitInsUpdateMode(ins)); |
1017 | } |
1018 | |
1019 | // This is a helper we need due to Vs Whidbey #254016 in order to distinguish |
1020 | // if we can not possibly be updating an integer register. This is not the best |
1021 | // solution, but the other ones (see bug) are going to be much more complicated. |
1022 | bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) |
1023 | { |
1024 | instruction ins = id->idIns(); |
1025 | |
1026 | if (!IsSSEOrAVXInstruction(ins)) |
1027 | { |
1028 | return false; |
1029 | } |
1030 | |
1031 | switch (ins) |
1032 | { |
1033 | case INS_andn: |
1034 | case INS_bextr: |
1035 | case INS_blsi: |
1036 | case INS_blsmsk: |
1037 | case INS_blsr: |
1038 | case INS_bzhi: |
1039 | case INS_cvttsd2si: |
1040 | case INS_cvttss2si: |
1041 | case INS_cvtsd2si: |
1042 | case INS_cvtss2si: |
1043 | case INS_extractps: |
1044 | case INS_mov_xmm2i: |
1045 | case INS_movmskpd: |
1046 | case INS_movmskps: |
1047 | case INS_mulx: |
1048 | case INS_pdep: |
1049 | case INS_pext: |
1050 | case INS_pmovmskb: |
1051 | case INS_pextrb: |
1052 | case INS_pextrd: |
1053 | case INS_pextrq: |
1054 | case INS_pextrw: |
1055 | case INS_pextrw_sse41: |
1056 | { |
1057 | // These SSE instructions write to a general purpose integer register. |
1058 | return false; |
1059 | } |
1060 | |
1061 | default: |
1062 | { |
1063 | return true; |
1064 | } |
1065 | } |
1066 | } |
1067 | |
1068 | /***************************************************************************** |
1069 | * |
1070 | * Returns the base encoding of the given CPU instruction. |
1071 | */ |
1072 | |
1073 | inline size_t insCode(instruction ins) |
1074 | { |
1075 | // clang-format off |
1076 | const static |
1077 | size_t insCodes[] = |
1078 | { |
1079 | #define INST0(id, nm, um, mr, flags) mr, |
1080 | #define INST1(id, nm, um, mr, flags) mr, |
1081 | #define INST2(id, nm, um, mr, mi, flags) mr, |
1082 | #define INST3(id, nm, um, mr, mi, rm, flags) mr, |
1083 | #define INST4(id, nm, um, mr, mi, rm, a4, flags) mr, |
1084 | #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr, |
1085 | #include "instrs.h" |
1086 | #undef INST0 |
1087 | #undef INST1 |
1088 | #undef INST2 |
1089 | #undef INST3 |
1090 | #undef INST4 |
1091 | #undef INST5 |
1092 | }; |
1093 | // clang-format on |
1094 | |
1095 | assert((unsigned)ins < _countof(insCodes)); |
1096 | assert((insCodes[ins] != BAD_CODE)); |
1097 | |
1098 | return insCodes[ins]; |
1099 | } |
1100 | |
1101 | /***************************************************************************** |
1102 | * |
1103 | * Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction. |
1104 | */ |
1105 | |
1106 | inline size_t insCodeACC(instruction ins) |
1107 | { |
1108 | // clang-format off |
1109 | const static |
1110 | size_t insCodesACC[] = |
1111 | { |
1112 | #define INST0(id, nm, um, mr, flags) |
1113 | #define INST1(id, nm, um, mr, flags) |
1114 | #define INST2(id, nm, um, mr, mi, flags) |
1115 | #define INST3(id, nm, um, mr, mi, rm, flags) |
1116 | #define INST4(id, nm, um, mr, mi, rm, a4, flags) a4, |
1117 | #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) a4, |
1118 | #include "instrs.h" |
1119 | #undef INST0 |
1120 | #undef INST1 |
1121 | #undef INST2 |
1122 | #undef INST3 |
1123 | #undef INST4 |
1124 | #undef INST5 |
1125 | }; |
1126 | // clang-format on |
1127 | |
1128 | assert((unsigned)ins < _countof(insCodesACC)); |
1129 | assert((insCodesACC[ins] != BAD_CODE)); |
1130 | |
1131 | return insCodesACC[ins]; |
1132 | } |
1133 | |
1134 | /***************************************************************************** |
1135 | * |
1136 | * Returns the "register" encoding of the given CPU instruction. |
1137 | */ |
1138 | |
1139 | inline size_t insCodeRR(instruction ins) |
1140 | { |
1141 | // clang-format off |
1142 | const static |
1143 | size_t insCodesRR[] = |
1144 | { |
1145 | #define INST0(id, nm, um, mr, flags) |
1146 | #define INST1(id, nm, um, mr, flags) |
1147 | #define INST2(id, nm, um, mr, mi, flags) |
1148 | #define INST3(id, nm, um, mr, mi, rm, flags) |
1149 | #define INST4(id, nm, um, mr, mi, rm, a4, flags) |
1150 | #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rr, |
1151 | #include "instrs.h" |
1152 | #undef INST0 |
1153 | #undef INST1 |
1154 | #undef INST2 |
1155 | #undef INST3 |
1156 | #undef INST4 |
1157 | #undef INST5 |
1158 | }; |
1159 | // clang-format on |
1160 | |
1161 | assert((unsigned)ins < _countof(insCodesRR)); |
1162 | assert((insCodesRR[ins] != BAD_CODE)); |
1163 | |
1164 | return insCodesRR[ins]; |
1165 | } |
1166 | |
1167 | // clang-format off |
1168 | const static |
1169 | size_t insCodesRM[] = |
1170 | { |
1171 | #define INST0(id, nm, um, mr, flags) |
1172 | #define INST1(id, nm, um, mr, flags) |
1173 | #define INST2(id, nm, um, mr, mi, flags) |
1174 | #define INST3(id, nm, um, mr, mi, rm, flags) rm, |
1175 | #define INST4(id, nm, um, mr, mi, rm, a4, flags) rm, |
1176 | #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rm, |
1177 | #include "instrs.h" |
1178 | #undef INST0 |
1179 | #undef INST1 |
1180 | #undef INST2 |
1181 | #undef INST3 |
1182 | #undef INST4 |
1183 | #undef INST5 |
1184 | }; |
1185 | // clang-format on |
1186 | |
1187 | // Returns true iff the give CPU instruction has an RM encoding. |
1188 | inline bool hasCodeRM(instruction ins) |
1189 | { |
1190 | assert((unsigned)ins < _countof(insCodesRM)); |
1191 | return ((insCodesRM[ins] != BAD_CODE)); |
1192 | } |
1193 | |
1194 | /***************************************************************************** |
1195 | * |
1196 | * Returns the "reg, [r/m]" encoding of the given CPU instruction. |
1197 | */ |
1198 | |
1199 | inline size_t insCodeRM(instruction ins) |
1200 | { |
1201 | assert((unsigned)ins < _countof(insCodesRM)); |
1202 | assert((insCodesRM[ins] != BAD_CODE)); |
1203 | |
1204 | return insCodesRM[ins]; |
1205 | } |
1206 | |
1207 | // clang-format off |
1208 | const static |
1209 | size_t insCodesMI[] = |
1210 | { |
1211 | #define INST0(id, nm, um, mr, flags) |
1212 | #define INST1(id, nm, um, mr, flags) |
1213 | #define INST2(id, nm, um, mr, mi, flags) mi, |
1214 | #define INST3(id, nm, um, mr, mi, rm, flags) mi, |
1215 | #define INST4(id, nm, um, mr, mi, rm, a4, flags) mi, |
1216 | #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mi, |
1217 | #include "instrs.h" |
1218 | #undef INST0 |
1219 | #undef INST1 |
1220 | #undef INST2 |
1221 | #undef INST3 |
1222 | #undef INST4 |
1223 | #undef INST5 |
1224 | }; |
1225 | // clang-format on |
1226 | |
1227 | // Returns true iff the give CPU instruction has an MI encoding. |
1228 | inline bool hasCodeMI(instruction ins) |
1229 | { |
1230 | assert((unsigned)ins < _countof(insCodesMI)); |
1231 | return ((insCodesMI[ins] != BAD_CODE)); |
1232 | } |
1233 | |
1234 | /***************************************************************************** |
1235 | * |
1236 | * Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction. |
1237 | */ |
1238 | |
1239 | inline size_t insCodeMI(instruction ins) |
1240 | { |
1241 | assert((unsigned)ins < _countof(insCodesMI)); |
1242 | assert((insCodesMI[ins] != BAD_CODE)); |
1243 | |
1244 | return insCodesMI[ins]; |
1245 | } |
1246 | |
1247 | // clang-format off |
1248 | const static |
1249 | size_t insCodesMR[] = |
1250 | { |
1251 | #define INST0(id, nm, um, mr, flags) |
1252 | #define INST1(id, nm, um, mr, flags) mr, |
1253 | #define INST2(id, nm, um, mr, mi, flags) mr, |
1254 | #define INST3(id, nm, um, mr, mi, rm, flags) mr, |
1255 | #define INST4(id, nm, um, mr, mi, rm, a4, flags) mr, |
1256 | #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr, |
1257 | #include "instrs.h" |
1258 | #undef INST0 |
1259 | #undef INST1 |
1260 | #undef INST2 |
1261 | #undef INST3 |
1262 | #undef INST4 |
1263 | #undef INST5 |
1264 | }; |
1265 | // clang-format on |
1266 | |
1267 | // Returns true iff the give CPU instruction has an MR encoding. |
1268 | inline bool hasCodeMR(instruction ins) |
1269 | { |
1270 | assert((unsigned)ins < _countof(insCodesMR)); |
1271 | return ((insCodesMR[ins] != BAD_CODE)); |
1272 | } |
1273 | |
1274 | /***************************************************************************** |
1275 | * |
1276 | * Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction. |
1277 | */ |
1278 | |
1279 | inline size_t insCodeMR(instruction ins) |
1280 | { |
1281 | assert((unsigned)ins < _countof(insCodesMR)); |
1282 | assert((insCodesMR[ins] != BAD_CODE)); |
1283 | |
1284 | return insCodesMR[ins]; |
1285 | } |
1286 | |
1287 | // Return true if the instruction uses the SSE38 or SSE3A macro in instrsXArch.h. |
1288 | bool emitter::(instruction ins) |
1289 | { |
1290 | const size_t SSE38 = 0x0F660038; |
1291 | const size_t SSE3A = 0x0F66003A; |
1292 | const size_t MASK = 0xFFFF00FF; |
1293 | |
1294 | size_t insCode = 0; |
1295 | |
1296 | if (!IsSSEOrAVXInstruction(ins)) |
1297 | { |
1298 | return false; |
1299 | } |
1300 | |
1301 | if (hasCodeRM(ins)) |
1302 | { |
1303 | insCode = insCodeRM(ins); |
1304 | } |
1305 | else if (hasCodeMI(ins)) |
1306 | { |
1307 | insCode = insCodeMI(ins); |
1308 | } |
1309 | else if (hasCodeMR(ins)) |
1310 | { |
1311 | insCode = insCodeMR(ins); |
1312 | } |
1313 | |
1314 | insCode &= MASK; |
1315 | return insCode == SSE38 || insCode == SSE3A; |
1316 | } |
1317 | |
1318 | /***************************************************************************** |
1319 | * |
1320 | * Returns an encoding for the specified register to be used in the bit0-2 |
1321 | * part of an opcode. |
1322 | */ |
1323 | |
1324 | inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code) |
1325 | { |
1326 | assert(reg < REG_STK); |
1327 | |
1328 | #ifdef _TARGET_AMD64_ |
1329 | // Either code is not NULL or reg is not an extended reg. |
1330 | // If reg is an extended reg, instruction needs to be prefixed with 'REX' |
1331 | // which would require code != NULL. |
1332 | assert(code != nullptr || !IsExtendedReg(reg)); |
1333 | |
1334 | if (IsExtendedReg(reg)) |
1335 | { |
1336 | *code = AddRexBPrefix(ins, *code); // REX.B |
1337 | } |
1338 | else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr)) |
1339 | { |
1340 | // We are assuming that we only use/encode SPL, BPL, SIL and DIL |
1341 | // not the corresponding AH, CH, DH, or BH |
1342 | *code = AddRexPrefix(ins, *code); // REX |
1343 | } |
1344 | #endif // _TARGET_AMD64_ |
1345 | |
1346 | unsigned regBits = RegEncoding(reg); |
1347 | |
1348 | assert(regBits < 8); |
1349 | return regBits; |
1350 | } |
1351 | |
1352 | /***************************************************************************** |
1353 | * |
1354 | * Returns an encoding for the specified register to be used in the bit3-5 |
1355 | * part of an opcode. |
1356 | */ |
1357 | |
1358 | inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code) |
1359 | { |
1360 | assert(reg < REG_STK); |
1361 | |
1362 | #ifdef _TARGET_AMD64_ |
1363 | // Either code is not NULL or reg is not an extended reg. |
1364 | // If reg is an extended reg, instruction needs to be prefixed with 'REX' |
1365 | // which would require code != NULL. |
1366 | assert(code != nullptr || !IsExtendedReg(reg)); |
1367 | |
1368 | if (IsExtendedReg(reg)) |
1369 | { |
1370 | *code = AddRexRPrefix(ins, *code); // REX.R |
1371 | } |
1372 | else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr)) |
1373 | { |
1374 | // We are assuming that we only use/encode SPL, BPL, SIL and DIL |
1375 | // not the corresponding AH, CH, DH, or BH |
1376 | *code = AddRexPrefix(ins, *code); // REX |
1377 | } |
1378 | #endif // _TARGET_AMD64_ |
1379 | |
1380 | unsigned regBits = RegEncoding(reg); |
1381 | |
1382 | assert(regBits < 8); |
1383 | return (regBits << 3); |
1384 | } |
1385 | |
1386 | /*********************************************************************************** |
1387 | * |
1388 | * Returns modified AVX opcode with the specified register encoded in bits 3-6 of |
1389 | * byte 2 of VEX prefix. |
1390 | */ |
1391 | inline emitter::code_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code) |
1392 | { |
1393 | assert(reg < REG_STK); |
1394 | assert(IsAVXInstruction(ins)); |
1395 | assert(hasVexPrefix(code)); |
1396 | |
1397 | // Get 4-bit register encoding |
1398 | // RegEncoding() gives lower 3 bits |
1399 | // IsExtendedReg() gives MSB. |
1400 | code_t regBits = RegEncoding(reg); |
1401 | if (IsExtendedReg(reg)) |
1402 | { |
1403 | regBits |= 0x08; |
1404 | } |
1405 | |
1406 | // VEX prefix encodes register operand in 1's complement form |
1407 | // Shift count = 4-bytes of opcode + 0-2 bits |
1408 | assert(regBits <= 0xF); |
1409 | regBits <<= 35; |
1410 | return code ^ regBits; |
1411 | } |
1412 | |
1413 | /***************************************************************************** |
1414 | * |
1415 | * Returns an encoding for the specified register to be used in the bit3-5 |
1416 | * part of an SIB byte (unshifted). |
1417 | * Used exclusively to generate the REX.X bit and truncate the register. |
1418 | */ |
1419 | |
1420 | inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, code_t* code) |
1421 | { |
1422 | assert(reg < REG_STK); |
1423 | |
1424 | #ifdef _TARGET_AMD64_ |
1425 | // Either code is not NULL or reg is not an extended reg. |
1426 | // If reg is an extended reg, instruction needs to be prefixed with 'REX' |
1427 | // which would require code != NULL. |
1428 | assert(code != nullptr || reg < REG_R8 || (reg >= REG_XMM0 && reg < REG_XMM8)); |
1429 | |
1430 | if (IsExtendedReg(reg)) |
1431 | { |
1432 | *code = AddRexXPrefix(ins, *code); // REX.X |
1433 | } |
1434 | unsigned regBits = RegEncoding(reg); |
1435 | #else // !_TARGET_AMD64_ |
1436 | unsigned regBits = reg; |
1437 | #endif // !_TARGET_AMD64_ |
1438 | |
1439 | assert(regBits < 8); |
1440 | return regBits; |
1441 | } |
1442 | |
1443 | /***************************************************************************** |
1444 | * |
1445 | * Returns the "[r/m]" opcode with the mod/RM field set to register. |
1446 | */ |
1447 | |
1448 | inline emitter::code_t emitter::insEncodeMRreg(instruction ins, code_t code) |
1449 | { |
1450 | // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes. |
1451 | // Otherwise, it will be placed after the 4 byte encoding. |
1452 | if ((code & 0xFF00) == 0) |
1453 | { |
1454 | assert((code & 0xC000) == 0); |
1455 | code |= 0xC000; |
1456 | } |
1457 | |
1458 | return code; |
1459 | } |
1460 | |
1461 | /***************************************************************************** |
1462 | * |
1463 | * Returns the given "[r/m]" opcode with the mod/RM field set to register. |
1464 | */ |
1465 | |
1466 | inline emitter::code_t emitter::insEncodeRMreg(instruction ins, code_t code) |
1467 | { |
1468 | // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes. |
1469 | // Otherwise, it will be placed after the 4 byte encoding. |
1470 | if ((code & 0xFF00) == 0) |
1471 | { |
1472 | assert((code & 0xC000) == 0); |
1473 | code |= 0xC000; |
1474 | } |
1475 | return code; |
1476 | } |
1477 | |
1478 | /***************************************************************************** |
1479 | * |
1480 | * Returns the "byte ptr [r/m]" opcode with the mod/RM field set to |
1481 | * the given register. |
1482 | */ |
1483 | |
1484 | inline emitter::code_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code) |
1485 | { |
1486 | assert((code & 0xC000) == 0); |
1487 | code |= 0xC000; |
1488 | unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8; |
1489 | code |= regcode; |
1490 | return code; |
1491 | } |
1492 | |
1493 | /***************************************************************************** |
1494 | * |
1495 | * Returns the "byte ptr [r/m], icon" opcode with the mod/RM field set to |
1496 | * the given register. |
1497 | */ |
1498 | |
1499 | inline emitter::code_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code) |
1500 | { |
1501 | assert((code & 0xC000) == 0); |
1502 | code |= 0xC000; |
1503 | unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8; |
1504 | code |= regcode; |
1505 | return code; |
1506 | } |
1507 | |
1508 | /***************************************************************************** |
1509 | * |
1510 | * Returns true iff the given instruction does not have a "[r/m], icon" form, but *does* have a |
1511 | * "reg,reg,imm8" form. |
1512 | */ |
1513 | inline bool insNeedsRRIb(instruction ins) |
1514 | { |
1515 | // If this list gets longer, use a switch or a table. |
1516 | return ins == INS_imul; |
1517 | } |
1518 | |
1519 | /***************************************************************************** |
1520 | * |
1521 | * Returns the "reg,reg,imm8" opcode with both the reg's set to the |
1522 | * the given register. |
1523 | */ |
1524 | inline emitter::code_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size) |
1525 | { |
1526 | assert(size == EA_4BYTE); // All we handle for now. |
1527 | assert(insNeedsRRIb(ins)); |
1528 | // If this list gets longer, use a switch, or a table lookup. |
1529 | code_t code = 0x69c0; |
1530 | unsigned regcode = insEncodeReg012(ins, reg, size, &code); |
1531 | // We use the same register as source and destination. (Could have another version that does both regs...) |
1532 | code |= regcode; |
1533 | code |= (regcode << 3); |
1534 | return code; |
1535 | } |
1536 | |
1537 | /***************************************************************************** |
1538 | * |
1539 | * Returns the "+reg" opcode with the the given register set into the low |
1540 | * nibble of the opcode |
1541 | */ |
1542 | |
1543 | inline emitter::code_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size) |
1544 | { |
1545 | code_t code = insCodeRR(ins); |
1546 | unsigned regcode = insEncodeReg012(ins, reg, size, &code); |
1547 | code |= regcode; |
1548 | return code; |
1549 | } |
1550 | |
1551 | /***************************************************************************** |
1552 | * |
1553 | * Return the 'SS' field value for the given index scale factor. |
1554 | */ |
1555 | |
1556 | inline unsigned emitter::insSSval(unsigned scale) |
1557 | { |
1558 | assert(scale == 1 || scale == 2 || scale == 4 || scale == 8); |
1559 | |
1560 | const static BYTE scales[] = { |
1561 | 0x00, // 1 |
1562 | 0x40, // 2 |
1563 | 0xFF, // 3 |
1564 | 0x80, // 4 |
1565 | 0xFF, // 5 |
1566 | 0xFF, // 6 |
1567 | 0xFF, // 7 |
1568 | 0xC0, // 8 |
1569 | }; |
1570 | |
1571 | return scales[scale - 1]; |
1572 | } |
1573 | |
1574 | const instruction emitJumpKindInstructions[] = {INS_nop, |
1575 | |
1576 | #define JMP_SMALL(en, rev, ins) INS_##ins, |
1577 | #include "emitjmps.h" |
1578 | |
1579 | INS_call}; |
1580 | |
1581 | const emitJumpKind emitReverseJumpKinds[] = { |
1582 | EJ_NONE, |
1583 | |
1584 | #define JMP_SMALL(en, rev, ins) EJ_##rev, |
1585 | #include "emitjmps.h" |
1586 | }; |
1587 | |
1588 | /***************************************************************************** |
1589 | * Look up the instruction for a jump kind |
1590 | */ |
1591 | |
1592 | /*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind) |
1593 | { |
1594 | assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions)); |
1595 | return emitJumpKindInstructions[jumpKind]; |
1596 | } |
1597 | |
1598 | /***************************************************************************** |
1599 | * Reverse the conditional jump |
1600 | */ |
1601 | |
1602 | /* static */ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind) |
1603 | { |
1604 | assert(jumpKind < EJ_COUNT); |
1605 | return emitReverseJumpKinds[jumpKind]; |
1606 | } |
1607 | |
1608 | /***************************************************************************** |
1609 | * The size for these instructions is less than EA_4BYTE, |
1610 | * but the target register need not be byte-addressable |
1611 | */ |
1612 | |
1613 | inline bool emitInstHasNoCode(instruction ins) |
1614 | { |
1615 | if (ins == INS_align) |
1616 | { |
1617 | return true; |
1618 | } |
1619 | |
1620 | return false; |
1621 | } |
1622 | |
1623 | /***************************************************************************** |
1624 | * When encoding instructions that operate on byte registers |
1625 | * we have to ensure that we use a low register (EAX, EBX, ECX or EDX) |
1626 | * otherwise we will incorrectly encode the instruction |
1627 | */ |
1628 | |
1629 | bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 /* = REG_NA */) |
1630 | { |
1631 | #if CPU_HAS_BYTE_REGS |
1632 | if (size != EA_1BYTE) // Not operating on a byte register is fine |
1633 | { |
1634 | return true; |
1635 | } |
1636 | |
1637 | if ((ins != INS_movsx) && // These three instructions support high register |
1638 | (ins != INS_movzx) // encodings for reg1 |
1639 | #ifdef FEATURE_HW_INTRINSICS |
1640 | && (ins != INS_crc32) |
1641 | #endif |
1642 | ) |
1643 | { |
1644 | // reg1 must be a byte-able register |
1645 | if ((genRegMask(reg1) & RBM_BYTE_REGS) == 0) |
1646 | { |
1647 | return false; |
1648 | } |
1649 | } |
1650 | // if reg2 is not REG_NA then reg2 must be a byte-able register |
1651 | if ((reg2 != REG_NA) && ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)) |
1652 | { |
1653 | return false; |
1654 | } |
1655 | #endif |
1656 | // The instruction can be encoded |
1657 | return true; |
1658 | } |
1659 | |
1660 | /***************************************************************************** |
1661 | * |
1662 | * Estimate the size (in bytes of generated code) of the given instruction. |
1663 | */ |
1664 | |
1665 | inline UNATIVE_OFFSET emitter::emitInsSize(code_t code) |
1666 | { |
1667 | UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2; |
1668 | #ifdef _TARGET_AMD64_ |
1669 | size += emitGetPrefixSize(code); |
1670 | #endif |
1671 | return size; |
1672 | } |
1673 | |
1674 | inline UNATIVE_OFFSET emitter::emitInsSizeRM(instruction ins) |
1675 | { |
1676 | return emitInsSize(insCodeRM(ins)); |
1677 | } |
1678 | |
1679 | inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr) |
1680 | { |
1681 | emitAttr size = EA_SIZE(attr); |
1682 | |
1683 | UNATIVE_OFFSET sz; |
1684 | |
1685 | // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes. |
1686 | // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes. |
1687 | // This would probably be better expressed as a different format or something? |
1688 | code_t code = insCodeRM(ins); |
1689 | |
1690 | if ((code & 0xFF00) != 0) |
1691 | { |
1692 | sz = 5; |
1693 | } |
1694 | else |
1695 | { |
1696 | sz = emitInsSize(insEncodeRMreg(ins, code)); |
1697 | } |
1698 | |
1699 | // Most 16-bit operand instructions will need a prefix |
1700 | if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx) |
1701 | { |
1702 | sz += 1; |
1703 | } |
1704 | |
1705 | // VEX prefix |
1706 | sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins)); |
1707 | |
1708 | // REX prefix |
1709 | if (!hasRexPrefix(code)) |
1710 | { |
1711 | if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2))) || IsExtendedReg(reg1, attr) || |
1712 | IsExtendedReg(reg2, attr)) |
1713 | { |
1714 | sz += emitGetRexPrefixSize(ins); |
1715 | } |
1716 | } |
1717 | |
1718 | return sz; |
1719 | } |
1720 | |
1721 | /*****************************************************************************/ |
1722 | |
1723 | inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp) |
1724 | { |
1725 | UNATIVE_OFFSET size = emitInsSize(code); |
1726 | UNATIVE_OFFSET offs; |
1727 | bool offsIsUpperBound = true; |
1728 | bool EBPbased = true; |
1729 | |
1730 | /* Is this a temporary? */ |
1731 | |
1732 | if (var < 0) |
1733 | { |
1734 | /* An address off of ESP takes an extra byte */ |
1735 | |
1736 | if (!emitHasFramePtr) |
1737 | { |
1738 | size++; |
1739 | } |
1740 | |
1741 | // The offset is already assigned. Find the temp. |
1742 | TempDsc* tmp = codeGen->regSet.tmpFindNum(var, RegSet::TEMP_USAGE_USED); |
1743 | if (tmp == nullptr) |
1744 | { |
1745 | // It might be in the free lists, if we're working on zero initializing the temps. |
1746 | tmp = codeGen->regSet.tmpFindNum(var, RegSet::TEMP_USAGE_FREE); |
1747 | } |
1748 | assert(tmp != nullptr); |
1749 | offs = tmp->tdTempOffs(); |
1750 | |
1751 | // We only care about the magnitude of the offset here, to determine instruction size. |
1752 | if (emitComp->isFramePointerUsed()) |
1753 | { |
1754 | if ((int)offs < 0) |
1755 | { |
1756 | offs = -(int)offs; |
1757 | } |
1758 | } |
1759 | else |
1760 | { |
1761 | // SP-based offsets must already be positive. |
1762 | assert((int)offs >= 0); |
1763 | } |
1764 | } |
1765 | else |
1766 | { |
1767 | |
1768 | /* Get the frame offset of the (non-temp) variable */ |
1769 | |
1770 | offs = dsp + emitComp->lvaFrameAddress(var, &EBPbased); |
1771 | |
1772 | /* An address off of ESP takes an extra byte */ |
1773 | |
1774 | if (!EBPbased) |
1775 | { |
1776 | ++size; |
1777 | } |
1778 | |
1779 | /* Is this a stack parameter reference? */ |
1780 | |
1781 | if (emitComp->lvaIsParameter(var) |
1782 | #if !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI) |
1783 | && !emitComp->lvaIsRegArgument(var) |
1784 | #endif // !_TARGET_AMD64_ || UNIX_AMD64_ABI |
1785 | ) |
1786 | { |
1787 | /* If no EBP frame, arguments are off of ESP, above temps */ |
1788 | |
1789 | if (!EBPbased) |
1790 | { |
1791 | assert((int)offs >= 0); |
1792 | |
1793 | offsIsUpperBound = false; // since #temps can increase |
1794 | offs += emitMaxTmpSize; |
1795 | } |
1796 | } |
1797 | else |
1798 | { |
1799 | /* Locals off of EBP are at negative offsets */ |
1800 | |
1801 | if (EBPbased) |
1802 | { |
1803 | #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) |
1804 | // If localloc is not used, then ebp chaining is done and hence |
1805 | // offset of locals will be at negative offsets, Otherwise offsets |
1806 | // will be positive. In future, when RBP gets positioned in the |
1807 | // middle of the frame so as to optimize instruction encoding size, |
1808 | // the below asserts needs to be modified appropriately. |
1809 | // However, for Unix platforms, we always do frame pointer chaining, |
1810 | // so offsets from the frame pointer will always be negative. |
1811 | if (emitComp->compLocallocUsed || emitComp->opts.compDbgEnC) |
1812 | { |
1813 | noway_assert((int)offs >= 0); |
1814 | } |
1815 | else |
1816 | #endif |
1817 | { |
1818 | // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes |
1819 | CLANG_FORMAT_COMMENT_ANCHOR; |
1820 | |
1821 | #ifdef UNIX_AMD64_ABI |
1822 | LclVarDsc* varDsc = emitComp->lvaTable + var; |
1823 | bool isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg; |
1824 | // Register passed args could have a stack offset of 0. |
1825 | noway_assert((int)offs < 0 || isRegPassedArg); |
1826 | #else // !UNIX_AMD64_ABI |
1827 | noway_assert((int)offs < 0); |
1828 | #endif // !UNIX_AMD64_ABI |
1829 | } |
1830 | |
1831 | assert(emitComp->lvaTempsHaveLargerOffsetThanVars()); |
1832 | |
1833 | // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar are placed below the temps |
1834 | if (unsigned(var) == emitComp->lvaInlinedPInvokeFrameVar || |
1835 | unsigned(var) == emitComp->lvaStubArgumentVar) |
1836 | { |
1837 | offs -= emitMaxTmpSize; |
1838 | } |
1839 | |
1840 | if ((int)offs < 0) |
1841 | { |
1842 | // offset is negative |
1843 | return size + ((int(offs) >= SCHAR_MIN) ? sizeof(char) : sizeof(int)); |
1844 | } |
1845 | #ifdef _TARGET_AMD64_ |
1846 | // This case arises for localloc frames |
1847 | else |
1848 | { |
1849 | return size + ((offs <= SCHAR_MAX) ? sizeof(char) : sizeof(int)); |
1850 | } |
1851 | #endif |
1852 | } |
1853 | |
1854 | if (emitComp->lvaTempsHaveLargerOffsetThanVars() == false) |
1855 | { |
1856 | offs += emitMaxTmpSize; |
1857 | } |
1858 | } |
1859 | } |
1860 | |
1861 | assert((int)offs >= 0); |
1862 | |
1863 | #if !FEATURE_FIXED_OUT_ARGS |
1864 | |
1865 | /* Are we addressing off of ESP? */ |
1866 | |
1867 | if (!emitHasFramePtr) |
1868 | { |
1869 | /* Adjust the effective offset if necessary */ |
1870 | |
1871 | if (emitCntStackDepth) |
1872 | offs += emitCurStackLvl; |
1873 | |
1874 | // we could (and used to) check for the special case [sp] here but the stack offset |
1875 | // estimator was off, and there is very little harm in overestimating for such a |
1876 | // rare case. |
1877 | } |
1878 | |
1879 | #endif // !FEATURE_FIXED_OUT_ARGS |
1880 | |
1881 | // printf("lcl = %04X, tmp = %04X, stk = %04X, offs = %04X\n", |
1882 | // emitLclSize, emitMaxTmpSize, emitCurStackLvl, offs); |
1883 | |
1884 | #ifdef _TARGET_AMD64_ |
1885 | bool useSmallEncoding = (SCHAR_MIN <= (int)offs) && ((int)offs <= SCHAR_MAX); |
1886 | #else |
1887 | bool useSmallEncoding = (offs <= size_t(SCHAR_MAX)); |
1888 | #endif |
1889 | |
1890 | // If it is ESP based, and the offset is zero, we will not encode the disp part. |
1891 | if (!EBPbased && offs == 0) |
1892 | { |
1893 | return size; |
1894 | } |
1895 | else |
1896 | { |
1897 | return size + (useSmallEncoding ? sizeof(char) : sizeof(int)); |
1898 | } |
1899 | } |
1900 | |
1901 | inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp) |
1902 | { |
1903 | instruction ins = id->idIns(); |
1904 | emitAttr attrSize = id->idOpSize(); |
1905 | UNATIVE_OFFSET prefix = emitGetVexPrefixAdjustedSize(ins, attrSize, code); |
1906 | return prefix + emitInsSizeSV(code, var, dsp); |
1907 | } |
1908 | |
1909 | inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val) |
1910 | { |
1911 | instruction ins = id->idIns(); |
1912 | emitAttr attrSize = id->idOpSize(); |
1913 | UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(attrSize); |
1914 | UNATIVE_OFFSET prefix = emitGetVexPrefixAdjustedSize(ins, attrSize, code); |
1915 | bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); |
1916 | |
1917 | #ifdef _TARGET_AMD64_ |
1918 | // mov reg, imm64 is the only opcode which takes a full 8 byte immediate |
1919 | // all other opcodes take a sign-extended 4-byte immediate |
1920 | noway_assert(valSize <= sizeof(int) || !id->idIsCnsReloc()); |
1921 | #endif // _TARGET_AMD64_ |
1922 | |
1923 | if (valSize > sizeof(int)) |
1924 | { |
1925 | valSize = sizeof(int); |
1926 | } |
1927 | |
1928 | if (id->idIsCnsReloc()) |
1929 | { |
1930 | valInByte = false; // relocs can't be placed in a byte |
1931 | assert(valSize == sizeof(int)); |
1932 | } |
1933 | |
1934 | if (valInByte) |
1935 | { |
1936 | valSize = sizeof(char); |
1937 | } |
1938 | |
1939 | // 16-bit operand instructions need a prefix. |
1940 | // This referes to 66h size prefix override |
1941 | if (id->idOpSize() == EA_2BYTE) |
1942 | { |
1943 | prefix += 1; |
1944 | } |
1945 | |
1946 | return prefix + valSize + emitInsSizeSV(code, var, dsp); |
1947 | } |
1948 | |
1949 | /*****************************************************************************/ |
1950 | |
1951 | static bool baseRegisterRequiresSibByte(regNumber base) |
1952 | { |
1953 | #ifdef _TARGET_AMD64_ |
1954 | return base == REG_ESP || base == REG_R12; |
1955 | #else |
1956 | return base == REG_ESP; |
1957 | #endif |
1958 | } |
1959 | |
1960 | static bool baseRegisterRequiresDisplacement(regNumber base) |
1961 | { |
1962 | #ifdef _TARGET_AMD64_ |
1963 | return base == REG_EBP || base == REG_R13; |
1964 | #else |
1965 | return base == REG_EBP; |
1966 | #endif |
1967 | } |
1968 | |
1969 | UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) |
1970 | { |
1971 | emitAttr attrSize = id->idOpSize(); |
1972 | instruction ins = id->idIns(); |
1973 | /* The displacement field is in an unusual place for calls */ |
1974 | ssize_t dsp = (ins == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id); |
1975 | bool dspInByte = ((signed char)dsp == (ssize_t)dsp); |
1976 | bool dspIsZero = (dsp == 0); |
1977 | UNATIVE_OFFSET size; |
1978 | |
1979 | // Note that the values in reg and rgx are used in this method to decide |
1980 | // how many bytes will be needed by the address [reg+rgx+cns] |
1981 | // this includes the prefix bytes when reg or rgx are registers R8-R15 |
1982 | regNumber reg; |
1983 | regNumber rgx; |
1984 | |
1985 | // The idAddr field is a union and only some of the instruction formats use the iiaAddrMode variant |
1986 | // these are IF_AWR_*, IF_ARD_*, IF_ARW_* and IF_*_ARD |
1987 | // ideally these should really be the only idInsFmts that we see here |
1988 | // but we have some outliers to deal with: |
1989 | // emitIns_R_L adds IF_RWR_LABEL and calls emitInsSizeAM |
1990 | // emitInsRMW adds IF_MRW_CNS, IF_MRW_RRD, IF_MRW_SHF, and calls emitInsSizeAM |
1991 | |
1992 | switch (id->idInsFmt()) |
1993 | { |
1994 | case IF_RWR_LABEL: |
1995 | case IF_MRW_CNS: |
1996 | case IF_MRW_RRD: |
1997 | case IF_MRW_SHF: |
1998 | reg = REG_NA; |
1999 | rgx = REG_NA; |
2000 | break; |
2001 | |
2002 | default: |
2003 | reg = id->idAddr()->iiaAddrMode.amBaseReg; |
2004 | rgx = id->idAddr()->iiaAddrMode.amIndxReg; |
2005 | break; |
2006 | } |
2007 | |
2008 | if (id->idIsDspReloc()) |
2009 | { |
2010 | dspInByte = false; // relocs can't be placed in a byte |
2011 | dspIsZero = false; // relocs won't always be zero |
2012 | } |
2013 | |
2014 | if (code & 0xFF000000) |
2015 | { |
2016 | size = 4; |
2017 | } |
2018 | else if (code & 0x00FF0000) |
2019 | { |
2020 | // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix. |
2021 | assert(ins != INS_bt); |
2022 | |
2023 | assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64 |
2024 | || (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) // only for x64 |
2025 | || (ins == INS_movzx) || (ins == INS_movsx) |
2026 | // The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded |
2027 | || isPrefetch(ins)); |
2028 | size = 3; |
2029 | } |
2030 | else |
2031 | { |
2032 | size = 2; |
2033 | |
2034 | // Most 16-bit operands will require a size prefix. |
2035 | // This refers to 66h size prefix override. |
2036 | |
2037 | if (attrSize == EA_2BYTE) |
2038 | { |
2039 | size++; |
2040 | } |
2041 | } |
2042 | |
2043 | size += emitGetVexPrefixAdjustedSize(ins, attrSize, code); |
2044 | |
2045 | if (hasRexPrefix(code)) |
2046 | { |
2047 | // REX prefix |
2048 | size += emitGetRexPrefixSize(ins); |
2049 | } |
2050 | else if (TakesRexWPrefix(ins, attrSize)) |
2051 | { |
2052 | // REX.W prefix |
2053 | size += emitGetRexPrefixSize(ins); |
2054 | } |
2055 | else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) || |
2056 | ((ins != INS_call) && IsExtendedReg(id->idReg1(), attrSize))) |
2057 | { |
2058 | // Should have a REX byte |
2059 | size += emitGetRexPrefixSize(ins); |
2060 | } |
2061 | |
2062 | if (rgx == REG_NA) |
2063 | { |
2064 | /* The address is of the form "[reg+disp]" */ |
2065 | |
2066 | if (reg == REG_NA) |
2067 | { |
2068 | /* The address is of the form "[disp]" */ |
2069 | |
2070 | size += sizeof(INT32); |
2071 | |
2072 | #ifdef _TARGET_AMD64_ |
2073 | // If id is not marked for reloc, add 1 additional byte for SIB that follows disp32 |
2074 | if (!id->idIsDspReloc()) |
2075 | { |
2076 | size++; |
2077 | } |
2078 | #endif |
2079 | return size; |
2080 | } |
2081 | |
2082 | // If this is just "call reg", we're done. |
2083 | if (id->idIsCallRegPtr()) |
2084 | { |
2085 | assert(ins == INS_call); |
2086 | assert(dsp == 0); |
2087 | return size; |
2088 | } |
2089 | |
2090 | // If the base register is ESP (or R12 on 64-bit systems), a SIB byte must be used. |
2091 | if (baseRegisterRequiresSibByte(reg)) |
2092 | { |
2093 | size++; |
2094 | } |
2095 | |
2096 | // If the base register is EBP (or R13 on 64-bit systems), a displacement is required. |
2097 | // Otherwise, the displacement can be elided if it is zero. |
2098 | if (dspIsZero && !baseRegisterRequiresDisplacement(reg)) |
2099 | { |
2100 | return size; |
2101 | } |
2102 | |
2103 | /* Does the offset fit in a byte? */ |
2104 | |
2105 | if (dspInByte) |
2106 | { |
2107 | size += sizeof(char); |
2108 | } |
2109 | else |
2110 | { |
2111 | size += sizeof(INT32); |
2112 | } |
2113 | } |
2114 | else |
2115 | { |
2116 | /* An index register is present */ |
2117 | |
2118 | size++; |
2119 | |
2120 | /* Is the index value scaled? */ |
2121 | |
2122 | if (emitDecodeScale(id->idAddr()->iiaAddrMode.amScale) > 1) |
2123 | { |
2124 | /* Is there a base register? */ |
2125 | |
2126 | if (reg != REG_NA) |
2127 | { |
2128 | /* The address is "[reg + {2/4/8} * rgx + icon]" */ |
2129 | |
2130 | if (dspIsZero && !baseRegisterRequiresDisplacement(reg)) |
2131 | { |
2132 | /* The address is "[reg + {2/4/8} * rgx]" */ |
2133 | } |
2134 | else |
2135 | { |
2136 | /* The address is "[reg + {2/4/8} * rgx + disp]" */ |
2137 | |
2138 | if (dspInByte) |
2139 | { |
2140 | size += sizeof(char); |
2141 | } |
2142 | else |
2143 | { |
2144 | size += sizeof(int); |
2145 | } |
2146 | } |
2147 | } |
2148 | else |
2149 | { |
2150 | /* The address is "[{2/4/8} * rgx + icon]" */ |
2151 | |
2152 | size += sizeof(INT32); |
2153 | } |
2154 | } |
2155 | else |
2156 | { |
2157 | if (dspIsZero && baseRegisterRequiresDisplacement(reg) && !baseRegisterRequiresDisplacement(rgx)) |
2158 | { |
2159 | /* Swap reg and rgx, such that reg is not EBP/R13 */ |
2160 | regNumber tmp = reg; |
2161 | id->idAddr()->iiaAddrMode.amBaseReg = reg = rgx; |
2162 | id->idAddr()->iiaAddrMode.amIndxReg = rgx = tmp; |
2163 | } |
2164 | |
2165 | /* The address is "[reg+rgx+dsp]" */ |
2166 | |
2167 | if (dspIsZero && !baseRegisterRequiresDisplacement(reg)) |
2168 | { |
2169 | /* This is [reg+rgx]" */ |
2170 | } |
2171 | else |
2172 | { |
2173 | /* This is [reg+rgx+dsp]" */ |
2174 | |
2175 | if (dspInByte) |
2176 | { |
2177 | size += sizeof(char); |
2178 | } |
2179 | else |
2180 | { |
2181 | size += sizeof(int); |
2182 | } |
2183 | } |
2184 | } |
2185 | } |
2186 | |
2187 | return size; |
2188 | } |
2189 | |
2190 | inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val) |
2191 | { |
2192 | instruction ins = id->idIns(); |
2193 | UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); |
2194 | bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); |
2195 | |
2196 | // We should never generate BT mem,reg because it has poor performance. BT mem,imm might be useful |
2197 | // but it requires special handling of the immediate value (it is always encoded in a byte). |
2198 | // Let's not complicate things until this is needed. |
2199 | assert(ins != INS_bt); |
2200 | |
2201 | #ifdef _TARGET_AMD64_ |
2202 | // mov reg, imm64 is the only opcode which takes a full 8 byte immediate |
2203 | // all other opcodes take a sign-extended 4-byte immediate |
2204 | noway_assert(valSize <= sizeof(INT32) || !id->idIsCnsReloc()); |
2205 | #endif // _TARGET_AMD64_ |
2206 | |
2207 | if (valSize > sizeof(INT32)) |
2208 | { |
2209 | valSize = sizeof(INT32); |
2210 | } |
2211 | |
2212 | if (id->idIsCnsReloc()) |
2213 | { |
2214 | valInByte = false; // relocs can't be placed in a byte |
2215 | assert(valSize == sizeof(INT32)); |
2216 | } |
2217 | |
2218 | if (valInByte) |
2219 | { |
2220 | valSize = sizeof(char); |
2221 | } |
2222 | |
2223 | return valSize + emitInsSizeAM(id, code); |
2224 | } |
2225 | |
2226 | inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code) |
2227 | { |
2228 | instruction ins = id->idIns(); |
2229 | |
2230 | // fgMorph changes any statics that won't fit into 32-bit addresses |
2231 | // into constants with an indir, rather than GT_CLS_VAR |
2232 | // so we should only hit this path for statics that are RIP-relative |
2233 | UNATIVE_OFFSET size = sizeof(INT32); |
2234 | |
2235 | size += emitGetVexPrefixAdjustedSize(ins, id->idOpSize(), code); |
2236 | |
2237 | // Most 16-bit operand instructions will need a prefix. |
2238 | // This refers to 66h size prefix override. |
2239 | |
2240 | if (id->idOpSize() == EA_2BYTE && ins != INS_movzx && ins != INS_movsx) |
2241 | { |
2242 | size++; |
2243 | } |
2244 | |
2245 | return size + emitInsSize(code); |
2246 | } |
2247 | |
2248 | inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val) |
2249 | { |
2250 | instruction ins = id->idIns(); |
2251 | UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); |
2252 | bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); |
2253 | |
2254 | #ifndef _TARGET_AMD64_ |
2255 | // occasionally longs get here on x86 |
2256 | if (valSize > sizeof(INT32)) |
2257 | valSize = sizeof(INT32); |
2258 | #endif // !_TARGET_AMD64_ |
2259 | |
2260 | if (id->idIsCnsReloc()) |
2261 | { |
2262 | valInByte = false; // relocs can't be placed in a byte |
2263 | assert(valSize == sizeof(INT32)); |
2264 | } |
2265 | |
2266 | if (valInByte) |
2267 | { |
2268 | valSize = sizeof(char); |
2269 | } |
2270 | |
2271 | return valSize + emitInsSizeCV(id, code); |
2272 | } |
2273 | |
2274 | /***************************************************************************** |
2275 | * |
2276 | * Allocate instruction descriptors for instructions with address modes. |
2277 | */ |
2278 | |
2279 | inline emitter::instrDesc* emitter::emitNewInstrAmd(emitAttr size, ssize_t dsp) |
2280 | { |
2281 | if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX) |
2282 | { |
2283 | instrDescAmd* id = emitAllocInstrAmd(size); |
2284 | |
2285 | id->idSetIsLargeDsp(); |
2286 | #ifdef DEBUG |
2287 | id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL; |
2288 | #endif |
2289 | id->idaAmdVal = dsp; |
2290 | |
2291 | return id; |
2292 | } |
2293 | else |
2294 | { |
2295 | instrDesc* id = emitAllocInstr(size); |
2296 | |
2297 | id->idAddr()->iiaAddrMode.amDisp = dsp; |
2298 | assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit |
2299 | |
2300 | return id; |
2301 | } |
2302 | } |
2303 | |
2304 | /***************************************************************************** |
2305 | * |
2306 | * Set the displacement field in an instruction. Only handles instrDescAmd type. |
2307 | */ |
2308 | |
2309 | inline void emitter::emitSetAmdDisp(instrDescAmd* id, ssize_t dsp) |
2310 | { |
2311 | if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX) |
2312 | { |
2313 | id->idSetIsLargeDsp(); |
2314 | #ifdef DEBUG |
2315 | id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL; |
2316 | #endif |
2317 | id->idaAmdVal = dsp; |
2318 | } |
2319 | else |
2320 | { |
2321 | id->idSetIsSmallDsp(); |
2322 | id->idAddr()->iiaAddrMode.amDisp = dsp; |
2323 | assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit |
2324 | } |
2325 | } |
2326 | |
2327 | /***************************************************************************** |
2328 | * |
2329 | * Allocate an instruction descriptor for an instruction that uses both |
2330 | * an address mode displacement and a constant. |
2331 | */ |
2332 | |
2333 | emitter::instrDesc* emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int cns) |
2334 | { |
2335 | if (dsp >= AM_DISP_MIN && dsp <= AM_DISP_MAX) |
2336 | { |
2337 | if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS) |
2338 | { |
2339 | instrDesc* id = emitAllocInstr(size); |
2340 | |
2341 | id->idSmallCns(cns); |
2342 | |
2343 | id->idAddr()->iiaAddrMode.amDisp = dsp; |
2344 | assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit |
2345 | |
2346 | return id; |
2347 | } |
2348 | else |
2349 | { |
2350 | instrDescCns* id = emitAllocInstrCns(size); |
2351 | |
2352 | id->idSetIsLargeCns(); |
2353 | id->idcCnsVal = cns; |
2354 | |
2355 | id->idAddr()->iiaAddrMode.amDisp = dsp; |
2356 | assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit |
2357 | |
2358 | return id; |
2359 | } |
2360 | } |
2361 | else |
2362 | { |
2363 | if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS) |
2364 | { |
2365 | instrDescAmd* id = emitAllocInstrAmd(size); |
2366 | |
2367 | id->idSetIsLargeDsp(); |
2368 | #ifdef DEBUG |
2369 | id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL; |
2370 | #endif |
2371 | id->idaAmdVal = dsp; |
2372 | |
2373 | id->idSmallCns(cns); |
2374 | |
2375 | return id; |
2376 | } |
2377 | else |
2378 | { |
2379 | instrDescCnsAmd* id = emitAllocInstrCnsAmd(size); |
2380 | |
2381 | id->idSetIsLargeCns(); |
2382 | id->idacCnsVal = cns; |
2383 | |
2384 | id->idSetIsLargeDsp(); |
2385 | #ifdef DEBUG |
2386 | id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL; |
2387 | #endif |
2388 | id->idacAmdVal = dsp; |
2389 | |
2390 | return id; |
2391 | } |
2392 | } |
2393 | } |
2394 | |
2395 | /***************************************************************************** |
2396 | * |
2397 | * The next instruction will be a loop head entry point |
2398 | * So insert a dummy instruction here to ensure that |
2399 | * the x86 I-cache alignment rule is followed. |
2400 | */ |
2401 | |
2402 | void emitter::emitLoopAlign() |
2403 | { |
2404 | /* Insert a pseudo-instruction to ensure that we align |
2405 | the next instruction properly */ |
2406 | |
2407 | instrDesc* id = emitNewInstrSmall(EA_1BYTE); |
2408 | id->idIns(INS_align); |
2409 | id->idCodeSize(15); // We may need to skip up to 15 bytes of code |
2410 | emitCurIGsize += 15; |
2411 | } |
2412 | |
2413 | /***************************************************************************** |
2414 | * |
2415 | * Add a NOP instruction of the given size. |
2416 | */ |
2417 | |
2418 | void emitter::emitIns_Nop(unsigned size) |
2419 | { |
2420 | assert(size <= 15); |
2421 | |
2422 | instrDesc* id = emitNewInstr(); |
2423 | id->idIns(INS_nop); |
2424 | id->idInsFmt(IF_NONE); |
2425 | id->idCodeSize(size); |
2426 | |
2427 | dispIns(id); |
2428 | emitCurIGsize += size; |
2429 | } |
2430 | |
2431 | /***************************************************************************** |
2432 | * |
2433 | * Add an instruction with no operands. |
2434 | */ |
2435 | void emitter::emitIns(instruction ins) |
2436 | { |
2437 | UNATIVE_OFFSET sz; |
2438 | instrDesc* id = emitNewInstr(); |
2439 | code_t code = insCodeMR(ins); |
2440 | |
2441 | #ifdef DEBUG |
2442 | { |
2443 | // We cannot have #ifdef inside macro expansion. |
2444 | bool assertCond = |
2445 | (ins == INS_cdq || ins == INS_int3 || ins == INS_lock || ins == INS_leave || ins == INS_movsb || |
2446 | ins == INS_movsd || ins == INS_movsp || ins == INS_nop || ins == INS_r_movsb || ins == INS_r_movsd || |
2447 | ins == INS_r_movsp || ins == INS_r_stosb || ins == INS_r_stosd || ins == INS_r_stosp || ins == INS_ret || |
2448 | ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp |
2449 | // These instructions take zero operands |
2450 | || ins == INS_vzeroupper || ins == INS_lfence || ins == INS_mfence || ins == INS_sfence); |
2451 | |
2452 | assert(assertCond); |
2453 | } |
2454 | #endif // DEBUG |
2455 | |
2456 | assert(!hasRexPrefix(code)); // Can't have a REX bit with no operands, right? |
2457 | |
2458 | if (code & 0xFF000000) |
2459 | { |
2460 | sz = 2; // TODO-XArch-Bug?: Shouldn't this be 4? Or maybe we should assert that we don't see this case. |
2461 | } |
2462 | else if (code & 0x00FF0000) |
2463 | { |
2464 | sz = 3; |
2465 | } |
2466 | else if (code & 0x0000FF00) |
2467 | { |
2468 | sz = 2; |
2469 | } |
2470 | else |
2471 | { |
2472 | sz = 1; |
2473 | } |
2474 | |
2475 | // vzeroupper includes its 2-byte VEX prefix in its MR code. |
2476 | assert((ins != INS_vzeroupper) || (sz == 3)); |
2477 | |
2478 | insFormat fmt = IF_NONE; |
2479 | |
2480 | id->idIns(ins); |
2481 | id->idInsFmt(fmt); |
2482 | id->idCodeSize(sz); |
2483 | |
2484 | dispIns(id); |
2485 | emitCurIGsize += sz; |
2486 | } |
2487 | |
2488 | // Add an instruction with no operands, but whose encoding depends on the size |
2489 | // (Only CDQ/CQO currently) |
2490 | void emitter::emitIns(instruction ins, emitAttr attr) |
2491 | { |
2492 | UNATIVE_OFFSET sz; |
2493 | instrDesc* id = emitNewInstr(attr); |
2494 | code_t code = insCodeMR(ins); |
2495 | assert(ins == INS_cdq); |
2496 | assert((code & 0xFFFFFF00) == 0); |
2497 | sz = 1; |
2498 | |
2499 | insFormat fmt = IF_NONE; |
2500 | |
2501 | sz += emitGetVexPrefixAdjustedSize(ins, attr, code); |
2502 | if (TakesRexWPrefix(ins, attr)) |
2503 | { |
2504 | sz += emitGetRexPrefixSize(ins); |
2505 | } |
2506 | |
2507 | id->idIns(ins); |
2508 | id->idInsFmt(fmt); |
2509 | id->idCodeSize(sz); |
2510 | |
2511 | dispIns(id); |
2512 | emitCurIGsize += sz; |
2513 | } |
2514 | |
2515 | //------------------------------------------------------------------------ |
2516 | // emitMapFmtForIns: map the instruction format based on the instruction. |
2517 | // Shift-by-a-constant instructions have a special format. |
2518 | // |
2519 | // Arguments: |
2520 | // fmt - the instruction format to map |
2521 | // ins - the instruction |
2522 | // |
2523 | // Returns: |
2524 | // The mapped instruction format. |
2525 | // |
2526 | emitter::insFormat emitter::emitMapFmtForIns(insFormat fmt, instruction ins) |
2527 | { |
2528 | switch (ins) |
2529 | { |
2530 | case INS_rol_N: |
2531 | case INS_ror_N: |
2532 | case INS_rcl_N: |
2533 | case INS_rcr_N: |
2534 | case INS_shl_N: |
2535 | case INS_shr_N: |
2536 | case INS_sar_N: |
2537 | { |
2538 | switch (fmt) |
2539 | { |
2540 | case IF_RRW_CNS: |
2541 | return IF_RRW_SHF; |
2542 | case IF_MRW_CNS: |
2543 | return IF_MRW_SHF; |
2544 | case IF_SRW_CNS: |
2545 | return IF_SRW_SHF; |
2546 | case IF_ARW_CNS: |
2547 | return IF_ARW_SHF; |
2548 | default: |
2549 | unreached(); |
2550 | } |
2551 | } |
2552 | |
2553 | default: |
2554 | return fmt; |
2555 | } |
2556 | } |
2557 | |
2558 | //------------------------------------------------------------------------ |
2559 | // emitMapFmtAtoM: map the address mode formats ARD, ARW, and AWR to their direct address equivalents. |
2560 | // |
2561 | // Arguments: |
2562 | // fmt - the instruction format to map |
2563 | // |
2564 | // Returns: |
2565 | // The mapped instruction format. |
2566 | // |
2567 | emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt) |
2568 | { |
2569 | switch (fmt) |
2570 | { |
2571 | case IF_ARD: |
2572 | return IF_MRD; |
2573 | case IF_AWR: |
2574 | return IF_MWR; |
2575 | case IF_ARW: |
2576 | return IF_MRW; |
2577 | |
2578 | case IF_RRD_ARD: |
2579 | return IF_RRD_MRD; |
2580 | case IF_RWR_ARD: |
2581 | return IF_RWR_MRD; |
2582 | case IF_RWR_ARD_CNS: |
2583 | return IF_RWR_MRD_CNS; |
2584 | case IF_RRW_ARD: |
2585 | return IF_RRW_MRD; |
2586 | case IF_RRW_ARD_CNS: |
2587 | return IF_RRW_MRD_CNS; |
2588 | case IF_RWR_RRD_ARD: |
2589 | return IF_RWR_RRD_MRD; |
2590 | case IF_RWR_RRD_ARD_CNS: |
2591 | return IF_RWR_RRD_MRD_CNS; |
2592 | case IF_RWR_RRD_ARD_RRD: |
2593 | return IF_RWR_RRD_MRD_RRD; |
2594 | |
2595 | case IF_ARD_RRD: |
2596 | return IF_MRD_RRD; |
2597 | case IF_AWR_RRD: |
2598 | return IF_MWR_RRD; |
2599 | case IF_ARW_RRD: |
2600 | return IF_MRW_RRD; |
2601 | |
2602 | case IF_ARD_CNS: |
2603 | return IF_MRD_CNS; |
2604 | case IF_AWR_CNS: |
2605 | return IF_MWR_CNS; |
2606 | case IF_ARW_CNS: |
2607 | return IF_MRW_CNS; |
2608 | |
2609 | case IF_AWR_RRD_CNS: |
2610 | return IF_MWR_RRD_CNS; |
2611 | |
2612 | case IF_ARW_SHF: |
2613 | return IF_MRW_SHF; |
2614 | |
2615 | default: |
2616 | unreached(); |
2617 | } |
2618 | } |
2619 | |
2620 | //------------------------------------------------------------------------ |
2621 | // emitHandleMemOp: For a memory operand, fill in the relevant fields of the instrDesc. |
2622 | // |
2623 | // Arguments: |
2624 | // indir - the memory operand. |
2625 | // id - the instrDesc to fill in. |
2626 | // fmt - the instruction format to use. This must be one of the ARD, AWR, or ARW formats. If necessary (such as for |
2627 | // GT_CLS_VAR_ADDR), this function will map it to the correct format. |
2628 | // ins - the instruction we are generating. This might affect the instruction format we choose. |
2629 | // |
2630 | // Assumptions: |
2631 | // The correctly sized instrDesc must already be created, e.g., via emitNewInstrAmd() or emitNewInstrAmdCns(); |
2632 | // |
2633 | // Post-conditions: |
2634 | // For base address of int constant: |
2635 | // -- the caller must have added the int constant base to the instrDesc when creating it via |
2636 | // emitNewInstrAmdCns(). |
2637 | // For simple address modes (base + scale * index + offset): |
2638 | // -- the base register, index register, and scale factor are set. |
2639 | // -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via |
2640 | // emitNewInstrAmdCns(). |
2641 | // |
2642 | // The instruction format is set. |
2643 | // |
2644 | // idSetIsDspReloc() is called if necessary. |
2645 | // |
2646 | void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins) |
2647 | { |
2648 | assert(fmt != IF_NONE); |
2649 | |
2650 | GenTree* memBase = indir->Base(); |
2651 | |
2652 | if ((memBase != nullptr) && memBase->isContained() && (memBase->OperGet() == GT_CLS_VAR_ADDR)) |
2653 | { |
2654 | CORINFO_FIELD_HANDLE fldHnd = memBase->gtClsVar.gtClsVarHnd; |
2655 | |
2656 | // Static always need relocs |
2657 | if (!jitStaticFldIsGlobAddr(fldHnd)) |
2658 | { |
2659 | // Contract: |
2660 | // fgMorphField() changes any statics that won't fit into 32-bit addresses into |
2661 | // constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given |
2662 | // by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable. |
2663 | // |
2664 | // Data section constants: these get allocated close to code block of the method and |
2665 | // always addressable IP relative. These too should be marked as relocatable. |
2666 | |
2667 | id->idSetIsDspReloc(); |
2668 | } |
2669 | |
2670 | id->idAddr()->iiaFieldHnd = fldHnd; |
2671 | id->idInsFmt(emitMapFmtForIns(emitMapFmtAtoM(fmt), ins)); |
2672 | } |
2673 | else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained()) |
2674 | { |
2675 | // Absolute addresses marked as contained should fit within the base of addr mode. |
2676 | assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp)); |
2677 | |
2678 | // Either not generating relocatable code, or addr must be an icon handle, or the |
2679 | // constant is zero (which we won't generate a relocation for). |
2680 | assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0)); |
2681 | |
2682 | if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp)) |
2683 | { |
2684 | id->idSetIsDspReloc(); |
2685 | } |
2686 | |
2687 | id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; |
2688 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
2689 | id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1; // for completeness |
2690 | |
2691 | id->idInsFmt(emitMapFmtForIns(fmt, ins)); |
2692 | |
2693 | // Absolute address must have already been set in the instrDesc constructor. |
2694 | assert(emitGetInsAmdAny(id) == memBase->AsIntConCommon()->IconValue()); |
2695 | } |
2696 | else |
2697 | { |
2698 | if (memBase != nullptr) |
2699 | { |
2700 | id->idAddr()->iiaAddrMode.amBaseReg = memBase->gtRegNum; |
2701 | } |
2702 | else |
2703 | { |
2704 | id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; |
2705 | } |
2706 | |
2707 | if (indir->HasIndex()) |
2708 | { |
2709 | id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->gtRegNum; |
2710 | } |
2711 | else |
2712 | { |
2713 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
2714 | } |
2715 | id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale()); |
2716 | |
2717 | id->idInsFmt(emitMapFmtForIns(fmt, ins)); |
2718 | |
2719 | // disp must have already been set in the instrDesc constructor. |
2720 | assert(emitGetInsAmdAny(id) == indir->Offset()); // make sure "disp" is stored properly |
2721 | } |
2722 | } |
2723 | |
2724 | // Takes care of storing all incoming register parameters |
2725 | // into its corresponding shadow space (defined by the x64 ABI) |
2726 | void emitter::spillIntArgRegsToShadowSlots() |
2727 | { |
2728 | unsigned argNum; |
2729 | instrDesc* id; |
2730 | UNATIVE_OFFSET sz; |
2731 | |
2732 | assert(emitComp->compGeneratingProlog); |
2733 | |
2734 | for (argNum = 0; argNum < MAX_REG_ARG; ++argNum) |
2735 | { |
2736 | regNumber argReg = intArgRegs[argNum]; |
2737 | |
2738 | // The offsets for the shadow space start at RSP + 8 |
2739 | // (right before the caller return address) |
2740 | int offset = (argNum + 1) * EA_PTRSIZE; |
2741 | |
2742 | id = emitNewInstrAmd(EA_PTRSIZE, offset); |
2743 | id->idIns(INS_mov); |
2744 | id->idInsFmt(IF_AWR_RRD); |
2745 | id->idAddr()->iiaAddrMode.amBaseReg = REG_SPBASE; |
2746 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
2747 | id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1); |
2748 | |
2749 | // The offset has already been set in the intrDsc ctor, |
2750 | // make sure we got it right. |
2751 | assert(emitGetInsAmdAny(id) == ssize_t(offset)); |
2752 | |
2753 | id->idReg1(argReg); |
2754 | sz = emitInsSizeAM(id, insCodeMR(INS_mov)); |
2755 | id->idCodeSize(sz); |
2756 | emitCurIGsize += sz; |
2757 | } |
2758 | } |
2759 | |
2760 | //------------------------------------------------------------------------ |
2761 | // emitInsLoadInd: Emits a "mov reg, [mem]" (or a variant such as "movzx" or "movss") |
2762 | // instruction for a GT_IND node. |
2763 | // |
2764 | // Arguments: |
2765 | // ins - the instruction to emit |
2766 | // attr - the instruction operand size |
2767 | // dstReg - the destination register |
2768 | // mem - the GT_IND node |
2769 | // |
2770 | void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem) |
2771 | { |
2772 | assert(mem->OperIs(GT_IND)); |
2773 | |
2774 | GenTree* addr = mem->Addr(); |
2775 | |
2776 | if (addr->OperGet() == GT_CLS_VAR_ADDR) |
2777 | { |
2778 | emitIns_R_C(ins, attr, dstReg, addr->gtClsVar.gtClsVarHnd, 0); |
2779 | return; |
2780 | } |
2781 | |
2782 | if (addr->OperGet() == GT_LCL_VAR_ADDR) |
2783 | { |
2784 | GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); |
2785 | emitIns_R_S(ins, attr, dstReg, varNode->GetLclNum(), 0); |
2786 | codeGen->genUpdateLife(varNode); |
2787 | return; |
2788 | } |
2789 | |
2790 | assert(addr->OperIsAddrMode() || (addr->IsCnsIntOrI() && addr->isContained()) || !addr->isContained()); |
2791 | ssize_t offset = mem->Offset(); |
2792 | instrDesc* id = emitNewInstrAmd(attr, offset); |
2793 | id->idIns(ins); |
2794 | id->idReg1(dstReg); |
2795 | emitHandleMemOp(mem, id, IF_RWR_ARD, ins); |
2796 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); |
2797 | id->idCodeSize(sz); |
2798 | dispIns(id); |
2799 | emitCurIGsize += sz; |
2800 | } |
2801 | |
2802 | //------------------------------------------------------------------------ |
2803 | // emitInsStoreInd: Emits a "mov [mem], reg/imm" (or a variant such as "movss") |
2804 | // instruction for a GT_STOREIND node. |
2805 | // |
2806 | // Arguments: |
2807 | // ins - the instruction to emit |
2808 | // attr - the instruction operand size |
2809 | // mem - the GT_STOREIND node |
2810 | // |
2811 | void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem) |
2812 | { |
2813 | assert(mem->OperIs(GT_STOREIND)); |
2814 | |
2815 | GenTree* addr = mem->Addr(); |
2816 | GenTree* data = mem->Data(); |
2817 | |
2818 | if (addr->OperGet() == GT_CLS_VAR_ADDR) |
2819 | { |
2820 | if (data->isContainedIntOrIImmed()) |
2821 | { |
2822 | emitIns_C_I(ins, attr, addr->gtClsVar.gtClsVarHnd, 0, (int)data->AsIntConCommon()->IconValue()); |
2823 | } |
2824 | else |
2825 | { |
2826 | assert(!data->isContained()); |
2827 | emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0); |
2828 | } |
2829 | return; |
2830 | } |
2831 | |
2832 | if (addr->OperGet() == GT_LCL_VAR_ADDR) |
2833 | { |
2834 | GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); |
2835 | if (data->isContainedIntOrIImmed()) |
2836 | { |
2837 | emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue()); |
2838 | } |
2839 | else |
2840 | { |
2841 | assert(!data->isContained()); |
2842 | emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0); |
2843 | } |
2844 | codeGen->genUpdateLife(varNode); |
2845 | return; |
2846 | } |
2847 | |
2848 | ssize_t offset = mem->Offset(); |
2849 | UNATIVE_OFFSET sz; |
2850 | instrDesc* id; |
2851 | |
2852 | if (data->isContainedIntOrIImmed()) |
2853 | { |
2854 | int icon = (int)data->AsIntConCommon()->IconValue(); |
2855 | id = emitNewInstrAmdCns(attr, offset, icon); |
2856 | id->idIns(ins); |
2857 | emitHandleMemOp(mem, id, IF_AWR_CNS, ins); |
2858 | sz = emitInsSizeAM(id, insCodeMI(ins), icon); |
2859 | id->idCodeSize(sz); |
2860 | } |
2861 | else |
2862 | { |
2863 | assert(!data->isContained()); |
2864 | id = emitNewInstrAmd(attr, offset); |
2865 | id->idIns(ins); |
2866 | emitHandleMemOp(mem, id, IF_AWR_RRD, ins); |
2867 | id->idReg1(data->gtRegNum); |
2868 | sz = emitInsSizeAM(id, insCodeMR(ins)); |
2869 | id->idCodeSize(sz); |
2870 | } |
2871 | |
2872 | dispIns(id); |
2873 | emitCurIGsize += sz; |
2874 | } |
2875 | |
2876 | //------------------------------------------------------------------------ |
2877 | // emitInsStoreLcl: Emits a "mov [mem], reg/imm" (or a variant such as "movss") |
2878 | // instruction for a GT_STORE_LCL_VAR node. |
2879 | // |
2880 | // Arguments: |
2881 | // ins - the instruction to emit |
2882 | // attr - the instruction operand size |
2883 | // varNode - the GT_STORE_LCL_VAR node |
2884 | // |
2885 | void emitter::emitInsStoreLcl(instruction ins, emitAttr attr, GenTreeLclVarCommon* varNode) |
2886 | { |
2887 | assert(varNode->OperIs(GT_STORE_LCL_VAR)); |
2888 | assert(varNode->gtRegNum == REG_NA); // stack store |
2889 | |
2890 | GenTree* data = varNode->gtGetOp1(); |
2891 | codeGen->inst_set_SV_var(varNode); |
2892 | |
2893 | if (data->isContainedIntOrIImmed()) |
2894 | { |
2895 | emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue()); |
2896 | } |
2897 | else |
2898 | { |
2899 | assert(!data->isContained()); |
2900 | emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0); |
2901 | } |
2902 | codeGen->genUpdateLife(varNode); |
2903 | } |
2904 | |
2905 | //------------------------------------------------------------------------ |
2906 | // emitInsBinary: Emits an instruction for a node which takes two operands |
2907 | // |
2908 | // Arguments: |
2909 | // ins - the instruction to emit |
2910 | // attr - the instruction operand size |
2911 | // dst - the destination and first source operand |
2912 | // src - the second source operand |
2913 | // |
2914 | // Assumptions: |
2915 | // i) caller of this routine needs to call genConsumeReg() |
2916 | // ii) caller of this routine needs to call genProduceReg() |
2917 | regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src) |
2918 | { |
2919 | // We can only have one memory operand and only src can be a constant operand |
2920 | // However, the handling for a given operand type (mem, cns, or other) is fairly |
2921 | // consistent regardless of whether they are src or dst. As such, we will find |
2922 | // the type of each operand and only check them against src/dst where relevant. |
2923 | |
2924 | GenTree* memOp = nullptr; |
2925 | GenTree* cnsOp = nullptr; |
2926 | GenTree* otherOp = nullptr; |
2927 | |
2928 | if (dst->isContained() || (dst->isLclField() && (dst->gtRegNum == REG_NA)) || dst->isUsedFromSpillTemp()) |
2929 | { |
2930 | // dst can only be a modrm |
2931 | // dst on 3opImul isn't really the dst |
2932 | assert(dst->isUsedFromMemory() || (dst->gtRegNum == REG_NA) || instrIs3opImul(ins)); |
2933 | assert(!src->isUsedFromMemory()); |
2934 | |
2935 | memOp = dst; |
2936 | |
2937 | if (src->isContained()) |
2938 | { |
2939 | assert(src->IsCnsIntOrI()); |
2940 | cnsOp = src; |
2941 | } |
2942 | else |
2943 | { |
2944 | otherOp = src; |
2945 | } |
2946 | } |
2947 | else if (src->isContained() || src->isUsedFromSpillTemp()) |
2948 | { |
2949 | assert(!dst->isUsedFromMemory()); |
2950 | otherOp = dst; |
2951 | |
2952 | if ((src->IsCnsIntOrI() || src->IsCnsFltOrDbl()) && !src->isUsedFromSpillTemp()) |
2953 | { |
2954 | assert(!src->isUsedFromMemory() || src->IsCnsFltOrDbl()); |
2955 | cnsOp = src; |
2956 | } |
2957 | else |
2958 | { |
2959 | assert(src->isUsedFromMemory()); |
2960 | memOp = src; |
2961 | } |
2962 | } |
2963 | |
2964 | // At this point, we either have a memory operand or we don't. |
2965 | // |
2966 | // If we don't then the logic is very simple and we will either be emitting a |
2967 | // `reg, immed` instruction (if src is a cns) or a `reg, reg` instruction otherwise. |
2968 | // |
2969 | // If we do have a memory operand, the logic is a bit more complicated as we need |
2970 | // to do different things depending on the type of memory operand. These types include: |
2971 | // * Spill temp |
2972 | // * Indirect access |
2973 | // * Local variable |
2974 | // * Class variable |
2975 | // * Addressing mode [base + index * scale + offset] |
2976 | // * Local field |
2977 | // * Local variable |
2978 | // |
2979 | // Most of these types (except Indirect: Class variable and Indirect: Addressing mode) |
2980 | // give us a a local variable number and an offset and access memory on the stack |
2981 | // |
2982 | // Indirect: Class variable is used for access static class variables and gives us a handle |
2983 | // to the memory location we read from |
2984 | // |
2985 | // Indirect: Addressing mode is used for the remaining memory accesses and will give us |
2986 | // a base address, an index, a scale, and an offset. These are combined to let us easily |
2987 | // access the given memory location. |
2988 | // |
2989 | // In all of the memory access cases, we determine which form to emit (e.g. `reg, [mem]` |
2990 | // or `[mem], reg`) by comparing memOp to src to determine which `emitIns_*` method needs |
2991 | // to be called. The exception is for the `[mem], immed` case (for Indirect: Class variable) |
2992 | // where only src can be the immediate. |
2993 | |
2994 | if (memOp != nullptr) |
2995 | { |
2996 | TempDsc* tmpDsc = nullptr; |
2997 | unsigned varNum = BAD_VAR_NUM; |
2998 | unsigned offset = (unsigned)-1; |
2999 | |
3000 | if (memOp->isUsedFromSpillTemp()) |
3001 | { |
3002 | assert(memOp->IsRegOptional()); |
3003 | |
3004 | tmpDsc = codeGen->getSpillTempDsc(memOp); |
3005 | varNum = tmpDsc->tdTempNum(); |
3006 | offset = 0; |
3007 | |
3008 | codeGen->regSet.tmpRlsTemp(tmpDsc); |
3009 | } |
3010 | else if (memOp->isIndir()) |
3011 | { |
3012 | GenTreeIndir* memIndir = memOp->AsIndir(); |
3013 | GenTree* memBase = memIndir->gtOp1; |
3014 | |
3015 | switch (memBase->OperGet()) |
3016 | { |
3017 | case GT_LCL_VAR_ADDR: |
3018 | { |
3019 | varNum = memBase->AsLclVarCommon()->GetLclNum(); |
3020 | offset = 0; |
3021 | |
3022 | // Ensure that all the GenTreeIndir values are set to their defaults. |
3023 | assert(!memIndir->HasIndex()); |
3024 | assert(memIndir->Scale() == 1); |
3025 | assert(memIndir->Offset() == 0); |
3026 | |
3027 | break; |
3028 | } |
3029 | |
3030 | case GT_CLS_VAR_ADDR: |
3031 | { |
3032 | if (memOp == src) |
3033 | { |
3034 | assert(otherOp == dst); |
3035 | assert(cnsOp == nullptr); |
3036 | |
3037 | if (instrHasImplicitRegPairDest(ins)) |
3038 | { |
3039 | // src is a class static variable |
3040 | // dst is implicit - RDX:RAX |
3041 | emitIns_C(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0); |
3042 | } |
3043 | else |
3044 | { |
3045 | // src is a class static variable |
3046 | // dst is a register |
3047 | emitIns_R_C(ins, attr, dst->gtRegNum, memBase->gtClsVar.gtClsVarHnd, 0); |
3048 | } |
3049 | } |
3050 | else |
3051 | { |
3052 | assert(memOp == dst); |
3053 | |
3054 | if (cnsOp != nullptr) |
3055 | { |
3056 | assert(cnsOp == src); |
3057 | assert(otherOp == nullptr); |
3058 | assert(src->IsCnsIntOrI()); |
3059 | |
3060 | // src is an contained immediate |
3061 | // dst is a class static variable |
3062 | emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0, |
3063 | (int)src->gtIntConCommon.IconValue()); |
3064 | } |
3065 | else |
3066 | { |
3067 | assert(otherOp == src); |
3068 | |
3069 | // src is a register |
3070 | // dst is a class static variable |
3071 | emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, src->gtRegNum, 0); |
3072 | } |
3073 | } |
3074 | |
3075 | return dst->gtRegNum; |
3076 | } |
3077 | |
3078 | default: // Addressing mode [base + index * scale + offset] |
3079 | { |
3080 | instrDesc* id = nullptr; |
3081 | |
3082 | if (cnsOp != nullptr) |
3083 | { |
3084 | assert(memOp == dst); |
3085 | assert(cnsOp == src); |
3086 | assert(otherOp == nullptr); |
3087 | assert(src->IsCnsIntOrI()); |
3088 | |
3089 | id = emitNewInstrAmdCns(attr, memIndir->Offset(), (int)src->gtIntConCommon.IconValue()); |
3090 | } |
3091 | else |
3092 | { |
3093 | ssize_t offset = memIndir->Offset(); |
3094 | id = emitNewInstrAmd(attr, offset); |
3095 | id->idIns(ins); |
3096 | |
3097 | GenTree* regTree = (memOp == src) ? dst : src; |
3098 | |
3099 | // there must be one non-contained op |
3100 | assert(!regTree->isContained()); |
3101 | id->idReg1(regTree->gtRegNum); |
3102 | } |
3103 | assert(id != nullptr); |
3104 | |
3105 | id->idIns(ins); // Set the instruction. |
3106 | |
3107 | // Determine the instruction format |
3108 | insFormat fmt = IF_NONE; |
3109 | |
3110 | if (memOp == src) |
3111 | { |
3112 | assert(cnsOp == nullptr); |
3113 | assert(otherOp == dst); |
3114 | |
3115 | if (instrHasImplicitRegPairDest(ins)) |
3116 | { |
3117 | fmt = emitInsModeFormat(ins, IF_ARD); |
3118 | } |
3119 | else |
3120 | { |
3121 | fmt = emitInsModeFormat(ins, IF_RRD_ARD); |
3122 | } |
3123 | } |
3124 | else |
3125 | { |
3126 | assert(memOp == dst); |
3127 | |
3128 | if (cnsOp != nullptr) |
3129 | { |
3130 | assert(cnsOp == src); |
3131 | assert(otherOp == nullptr); |
3132 | assert(src->IsCnsIntOrI()); |
3133 | |
3134 | fmt = emitInsModeFormat(ins, IF_ARD_CNS); |
3135 | } |
3136 | else |
3137 | { |
3138 | assert(otherOp == src); |
3139 | fmt = emitInsModeFormat(ins, IF_ARD_RRD); |
3140 | } |
3141 | } |
3142 | assert(fmt != IF_NONE); |
3143 | emitHandleMemOp(memIndir, id, fmt, ins); |
3144 | |
3145 | // Determine the instruction size |
3146 | UNATIVE_OFFSET sz = 0; |
3147 | |
3148 | if (memOp == src) |
3149 | { |
3150 | assert(otherOp == dst); |
3151 | assert(cnsOp == nullptr); |
3152 | |
3153 | if (instrHasImplicitRegPairDest(ins)) |
3154 | { |
3155 | sz = emitInsSizeAM(id, insCode(ins)); |
3156 | } |
3157 | else |
3158 | { |
3159 | sz = emitInsSizeAM(id, insCodeRM(ins)); |
3160 | } |
3161 | } |
3162 | else |
3163 | { |
3164 | assert(memOp == dst); |
3165 | |
3166 | if (cnsOp != nullptr) |
3167 | { |
3168 | assert(memOp == dst); |
3169 | assert(cnsOp == src); |
3170 | assert(otherOp == nullptr); |
3171 | |
3172 | sz = emitInsSizeAM(id, insCodeMI(ins), (int)src->gtIntConCommon.IconValue()); |
3173 | } |
3174 | else |
3175 | { |
3176 | assert(otherOp == src); |
3177 | sz = emitInsSizeAM(id, insCodeMR(ins)); |
3178 | } |
3179 | } |
3180 | assert(sz != 0); |
3181 | |
3182 | id->idCodeSize(sz); |
3183 | |
3184 | dispIns(id); |
3185 | emitCurIGsize += sz; |
3186 | |
3187 | return (memOp == src) ? dst->gtRegNum : REG_NA; |
3188 | } |
3189 | } |
3190 | } |
3191 | else |
3192 | { |
3193 | switch (memOp->OperGet()) |
3194 | { |
3195 | case GT_LCL_FLD: |
3196 | case GT_STORE_LCL_FLD: |
3197 | { |
3198 | GenTreeLclFld* lclField = memOp->AsLclFld(); |
3199 | varNum = lclField->GetLclNum(); |
3200 | offset = lclField->gtLclFld.gtLclOffs; |
3201 | break; |
3202 | } |
3203 | |
3204 | case GT_LCL_VAR: |
3205 | { |
3206 | assert(memOp->IsRegOptional() || !emitComp->lvaTable[memOp->gtLclVar.gtLclNum].lvIsRegCandidate()); |
3207 | varNum = memOp->AsLclVar()->GetLclNum(); |
3208 | offset = 0; |
3209 | break; |
3210 | } |
3211 | |
3212 | default: |
3213 | unreached(); |
3214 | break; |
3215 | } |
3216 | } |
3217 | |
3218 | // Ensure we got a good varNum and offset. |
3219 | // We also need to check for `tmpDsc != nullptr` since spill temp numbers |
3220 | // are negative and start with -1, which also happens to be BAD_VAR_NUM. |
3221 | assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr)); |
3222 | assert(offset != (unsigned)-1); |
3223 | |
3224 | if (memOp == src) |
3225 | { |
3226 | assert(otherOp == dst); |
3227 | assert(cnsOp == nullptr); |
3228 | |
3229 | if (instrHasImplicitRegPairDest(ins)) |
3230 | { |
3231 | // src is a stack based local variable |
3232 | // dst is implicit - RDX:RAX |
3233 | emitIns_S(ins, attr, varNum, offset); |
3234 | } |
3235 | else |
3236 | { |
3237 | // src is a stack based local variable |
3238 | // dst is a register |
3239 | emitIns_R_S(ins, attr, dst->gtRegNum, varNum, offset); |
3240 | } |
3241 | } |
3242 | else |
3243 | { |
3244 | assert(memOp == dst); |
3245 | assert((dst->gtRegNum == REG_NA) || dst->IsRegOptional()); |
3246 | |
3247 | if (cnsOp != nullptr) |
3248 | { |
3249 | assert(cnsOp == src); |
3250 | assert(otherOp == nullptr); |
3251 | assert(src->IsCnsIntOrI()); |
3252 | |
3253 | // src is an contained immediate |
3254 | // dst is a stack based local variable |
3255 | emitIns_S_I(ins, attr, varNum, offset, (int)src->gtIntConCommon.IconValue()); |
3256 | } |
3257 | else |
3258 | { |
3259 | assert(otherOp == src); |
3260 | assert(!src->isContained()); |
3261 | |
3262 | // src is a register |
3263 | // dst is a stack based local variable |
3264 | emitIns_S_R(ins, attr, src->gtRegNum, varNum, offset); |
3265 | } |
3266 | } |
3267 | } |
3268 | else if (cnsOp != nullptr) // reg, immed |
3269 | { |
3270 | assert(cnsOp == src); |
3271 | assert(otherOp == dst); |
3272 | |
3273 | if (src->IsCnsIntOrI()) |
3274 | { |
3275 | assert(!dst->isContained()); |
3276 | GenTreeIntConCommon* intCns = src->AsIntConCommon(); |
3277 | emitIns_R_I(ins, attr, dst->gtRegNum, intCns->IconValue()); |
3278 | } |
3279 | else |
3280 | { |
3281 | assert(src->IsCnsFltOrDbl()); |
3282 | GenTreeDblCon* dblCns = src->AsDblCon(); |
3283 | |
3284 | CORINFO_FIELD_HANDLE hnd = emitFltOrDblConst(dblCns->gtDconVal, emitTypeSize(dblCns)); |
3285 | emitIns_R_C(ins, attr, dst->gtRegNum, hnd, 0); |
3286 | } |
3287 | } |
3288 | else // reg, reg |
3289 | { |
3290 | assert(otherOp == nullptr); |
3291 | assert(!src->isContained() && !dst->isContained()); |
3292 | |
3293 | if (instrHasImplicitRegPairDest(ins)) |
3294 | { |
3295 | emitIns_R(ins, attr, src->gtRegNum); |
3296 | } |
3297 | else |
3298 | { |
3299 | emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum); |
3300 | } |
3301 | } |
3302 | |
3303 | return dst->gtRegNum; |
3304 | } |
3305 | |
3306 | //------------------------------------------------------------------------ |
3307 | // emitInsRMW: Emit logic for Read-Modify-Write binary instructions. |
3308 | // |
3309 | // Responsible for emitting a single instruction that will perform an operation of the form: |
3310 | // *addr = *addr <BinOp> src |
3311 | // For example: |
3312 | // ADD [RAX], RCX |
3313 | // |
3314 | // Arguments: |
3315 | // ins - instruction to generate |
3316 | // attr - emitter attribute for instruction |
3317 | // storeInd - indir for RMW addressing mode |
3318 | // src - source operand of instruction |
3319 | // |
3320 | // Assumptions: |
3321 | // Lowering has taken care of recognizing the StoreInd pattern of: |
3322 | // StoreInd( AddressTree, BinOp( Ind ( AddressTree ), Operand ) ) |
3323 | // The address to store is already sitting in a register. |
3324 | // |
3325 | // Notes: |
3326 | // This is a no-produce operation, meaning that no register output will |
3327 | // be produced for future use in the code stream. |
3328 | // |
3329 | void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd, GenTree* src) |
3330 | { |
3331 | GenTree* addr = storeInd->Addr(); |
3332 | addr = addr->gtSkipReloadOrCopy(); |
3333 | assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA || |
3334 | addr->OperGet() == GT_CLS_VAR_ADDR || addr->OperGet() == GT_CNS_INT); |
3335 | |
3336 | instrDesc* id = nullptr; |
3337 | UNATIVE_OFFSET sz; |
3338 | |
3339 | ssize_t offset = 0; |
3340 | if (addr->OperGet() != GT_CLS_VAR_ADDR) |
3341 | { |
3342 | offset = storeInd->Offset(); |
3343 | } |
3344 | |
3345 | if (src->isContainedIntOrIImmed()) |
3346 | { |
3347 | GenTreeIntConCommon* intConst = src->AsIntConCommon(); |
3348 | int iconVal = (int)intConst->IconValue(); |
3349 | switch (ins) |
3350 | { |
3351 | case INS_rcl_N: |
3352 | case INS_rcr_N: |
3353 | case INS_rol_N: |
3354 | case INS_ror_N: |
3355 | case INS_shl_N: |
3356 | case INS_shr_N: |
3357 | case INS_sar_N: |
3358 | iconVal &= 0x7F; |
3359 | break; |
3360 | default: |
3361 | break; |
3362 | } |
3363 | |
3364 | id = emitNewInstrAmdCns(attr, offset, iconVal); |
3365 | emitHandleMemOp(storeInd, id, IF_ARW_CNS, ins); |
3366 | id->idIns(ins); |
3367 | sz = emitInsSizeAM(id, insCodeMI(ins), iconVal); |
3368 | } |
3369 | else |
3370 | { |
3371 | assert(!src->isContained()); // there must be one non-contained src |
3372 | |
3373 | // ind, reg |
3374 | id = emitNewInstrAmd(attr, offset); |
3375 | emitHandleMemOp(storeInd, id, IF_ARW_RRD, ins); |
3376 | id->idReg1(src->gtRegNum); |
3377 | id->idIns(ins); |
3378 | sz = emitInsSizeAM(id, insCodeMR(ins)); |
3379 | } |
3380 | |
3381 | id->idCodeSize(sz); |
3382 | |
3383 | dispIns(id); |
3384 | emitCurIGsize += sz; |
3385 | } |
3386 | |
3387 | //------------------------------------------------------------------------ |
3388 | // emitInsRMW: Emit logic for Read-Modify-Write unary instructions. |
3389 | // |
3390 | // Responsible for emitting a single instruction that will perform an operation of the form: |
3391 | // *addr = UnaryOp *addr |
3392 | // For example: |
3393 | // NOT [RAX] |
3394 | // |
3395 | // Arguments: |
3396 | // ins - instruction to generate |
3397 | // attr - emitter attribute for instruction |
3398 | // storeInd - indir for RMW addressing mode |
3399 | // |
3400 | // Assumptions: |
3401 | // Lowering has taken care of recognizing the StoreInd pattern of: |
3402 | // StoreInd( AddressTree, UnaryOp( Ind ( AddressTree ) ) ) |
3403 | // The address to store is already sitting in a register. |
3404 | // |
3405 | // Notes: |
3406 | // This is a no-produce operation, meaning that no register output will |
3407 | // be produced for future use in the code stream. |
3408 | // |
3409 | void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd) |
3410 | { |
3411 | GenTree* addr = storeInd->Addr(); |
3412 | addr = addr->gtSkipReloadOrCopy(); |
3413 | assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_CLS_VAR_ADDR || |
3414 | addr->OperGet() == GT_LEA || addr->OperGet() == GT_CNS_INT); |
3415 | |
3416 | ssize_t offset = 0; |
3417 | if (addr->OperGet() != GT_CLS_VAR_ADDR) |
3418 | { |
3419 | offset = storeInd->Offset(); |
3420 | } |
3421 | |
3422 | instrDesc* id = emitNewInstrAmd(attr, offset); |
3423 | emitHandleMemOp(storeInd, id, IF_ARW, ins); |
3424 | id->idIns(ins); |
3425 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins)); |
3426 | id->idCodeSize(sz); |
3427 | |
3428 | dispIns(id); |
3429 | emitCurIGsize += sz; |
3430 | } |
3431 | |
3432 | /***************************************************************************** |
3433 | * |
3434 | * Add an instruction referencing a single register. |
3435 | */ |
3436 | |
3437 | void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) |
3438 | { |
3439 | emitAttr size = EA_SIZE(attr); |
3440 | |
3441 | assert(size <= EA_PTRSIZE); |
3442 | noway_assert(emitVerifyEncodable(ins, size, reg)); |
3443 | |
3444 | UNATIVE_OFFSET sz; |
3445 | instrDesc* id = emitNewInstrSmall(attr); |
3446 | |
3447 | switch (ins) |
3448 | { |
3449 | case INS_inc: |
3450 | case INS_dec: |
3451 | #ifdef _TARGET_AMD64_ |
3452 | |
3453 | sz = 2; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix) |
3454 | |
3455 | #else // !_TARGET_AMD64_ |
3456 | |
3457 | if (size == EA_1BYTE) |
3458 | sz = 2; // Use the long form as the small one has no 'w' bit |
3459 | else |
3460 | sz = 1; // Use short form |
3461 | |
3462 | #endif // !_TARGET_AMD64_ |
3463 | |
3464 | break; |
3465 | |
3466 | case INS_pop: |
3467 | case INS_pop_hide: |
3468 | case INS_push: |
3469 | case INS_push_hide: |
3470 | |
3471 | /* We don't currently push/pop small values */ |
3472 | |
3473 | assert(size == EA_PTRSIZE); |
3474 | |
3475 | sz = 1; |
3476 | break; |
3477 | |
3478 | default: |
3479 | |
3480 | /* All the sixteen INS_setCCs are contiguous. */ |
3481 | |
3482 | if (INS_seto <= ins && ins <= INS_setg) |
3483 | { |
3484 | // Rough check that we used the endpoints for the range check |
3485 | |
3486 | assert(INS_seto + 0xF == INS_setg); |
3487 | |
3488 | // The caller must specify EA_1BYTE for 'attr' |
3489 | |
3490 | assert(attr == EA_1BYTE); |
3491 | |
3492 | /* We expect this to always be a 'big' opcode */ |
3493 | |
3494 | assert(insEncodeMRreg(ins, reg, attr, insCodeMR(ins)) & 0x00FF0000); |
3495 | |
3496 | size = attr; |
3497 | |
3498 | sz = 3; |
3499 | break; |
3500 | } |
3501 | else |
3502 | { |
3503 | sz = 2; |
3504 | break; |
3505 | } |
3506 | } |
3507 | insFormat fmt = emitInsModeFormat(ins, IF_RRD); |
3508 | |
3509 | id->idIns(ins); |
3510 | id->idInsFmt(fmt); |
3511 | id->idReg1(reg); |
3512 | |
3513 | // 16-bit operand instructions will need a prefix. |
3514 | // This refers to 66h size prefix override. |
3515 | if (size == EA_2BYTE) |
3516 | { |
3517 | sz += 1; |
3518 | } |
3519 | |
3520 | // Vex bytes |
3521 | sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins))); |
3522 | |
3523 | // REX byte |
3524 | if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr)) |
3525 | { |
3526 | sz += emitGetRexPrefixSize(ins); |
3527 | } |
3528 | |
3529 | id->idCodeSize(sz); |
3530 | |
3531 | dispIns(id); |
3532 | emitCurIGsize += sz; |
3533 | |
3534 | emitAdjustStackDepthPushPop(ins); |
3535 | } |
3536 | |
3537 | /***************************************************************************** |
3538 | * |
3539 | * Add an instruction referencing a register and a constant. |
3540 | */ |
3541 | |
3542 | void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val) |
3543 | { |
3544 | emitAttr size = EA_SIZE(attr); |
3545 | |
3546 | // Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE |
3547 | assert(size <= EA_PTRSIZE || IsSSEOrAVXInstruction(ins)); |
3548 | |
3549 | noway_assert(emitVerifyEncodable(ins, size, reg)); |
3550 | |
3551 | #ifdef _TARGET_AMD64_ |
3552 | // mov reg, imm64 is the only opcode which takes a full 8 byte immediate |
3553 | // all other opcodes take a sign-extended 4-byte immediate |
3554 | noway_assert(size < EA_8BYTE || ins == INS_mov || ((int)val == val && !EA_IS_CNS_RELOC(attr))); |
3555 | #endif |
3556 | |
3557 | UNATIVE_OFFSET sz; |
3558 | instrDesc* id; |
3559 | insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS); |
3560 | bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); |
3561 | |
3562 | // BT reg,imm might be useful but it requires special handling of the immediate value |
3563 | // (it is always encoded in a byte). Let's not complicate things until this is needed. |
3564 | assert(ins != INS_bt); |
3565 | |
3566 | // Figure out the size of the instruction |
3567 | switch (ins) |
3568 | { |
3569 | case INS_mov: |
3570 | #ifdef _TARGET_AMD64_ |
3571 | // mov reg, imm64 is equivalent to mov reg, imm32 if the high order bits are all 0 |
3572 | // and this isn't a reloc constant. |
3573 | if (((size > EA_4BYTE) && (0 == (val & 0xFFFFFFFF00000000LL))) && !EA_IS_CNS_RELOC(attr)) |
3574 | { |
3575 | attr = size = EA_4BYTE; |
3576 | } |
3577 | |
3578 | if (size > EA_4BYTE) |
3579 | { |
3580 | sz = 9; // Really it is 10, but we'll add one more later |
3581 | break; |
3582 | } |
3583 | #endif // _TARGET_AMD64_ |
3584 | sz = 5; |
3585 | break; |
3586 | |
3587 | case INS_rcl_N: |
3588 | case INS_rcr_N: |
3589 | case INS_rol_N: |
3590 | case INS_ror_N: |
3591 | case INS_shl_N: |
3592 | case INS_shr_N: |
3593 | case INS_sar_N: |
3594 | assert(val != 1); |
3595 | fmt = IF_RRW_SHF; |
3596 | sz = 3; |
3597 | val &= 0x7F; |
3598 | valInByte = true; // shift amount always placed in a byte |
3599 | break; |
3600 | |
3601 | default: |
3602 | |
3603 | if (EA_IS_CNS_RELOC(attr)) |
3604 | { |
3605 | valInByte = false; // relocs can't be placed in a byte |
3606 | } |
3607 | |
3608 | if (valInByte) |
3609 | { |
3610 | if (IsSSEOrAVXInstruction(ins)) |
3611 | { |
3612 | sz = 5; |
3613 | } |
3614 | else if (size == EA_1BYTE && reg == REG_EAX && !instrIs3opImul(ins)) |
3615 | { |
3616 | sz = 2; |
3617 | } |
3618 | else |
3619 | { |
3620 | sz = 3; |
3621 | } |
3622 | } |
3623 | else |
3624 | { |
3625 | if (reg == REG_EAX && !instrIs3opImul(ins)) |
3626 | { |
3627 | sz = 1; |
3628 | } |
3629 | else |
3630 | { |
3631 | sz = 2; |
3632 | } |
3633 | |
3634 | #ifdef _TARGET_AMD64_ |
3635 | if (size > EA_4BYTE) |
3636 | { |
3637 | // We special-case anything that takes a full 8-byte constant. |
3638 | sz += 4; |
3639 | } |
3640 | else |
3641 | #endif // _TARGET_AMD64_ |
3642 | { |
3643 | sz += EA_SIZE_IN_BYTES(attr); |
3644 | } |
3645 | } |
3646 | break; |
3647 | } |
3648 | |
3649 | // Vex prefix size |
3650 | sz += emitGetVexPrefixSize(ins, attr); |
3651 | |
3652 | // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a |
3653 | // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target |
3654 | // register. So we also need to check if that built-in register is an extended register. |
3655 | if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins)) |
3656 | { |
3657 | sz += emitGetRexPrefixSize(ins); |
3658 | } |
3659 | |
3660 | id = emitNewInstrSC(attr, val); |
3661 | id->idIns(ins); |
3662 | id->idInsFmt(fmt); |
3663 | id->idReg1(reg); |
3664 | |
3665 | // 16-bit operand instructions will need a prefix |
3666 | if (size == EA_2BYTE) |
3667 | { |
3668 | sz += 1; |
3669 | } |
3670 | |
3671 | id->idCodeSize(sz); |
3672 | |
3673 | dispIns(id); |
3674 | emitCurIGsize += sz; |
3675 | |
3676 | if (reg == REG_ESP) |
3677 | { |
3678 | emitAdjustStackDepth(ins, val); |
3679 | } |
3680 | } |
3681 | |
3682 | /***************************************************************************** |
3683 | * |
3684 | * Add an instruction referencing an integer constant. |
3685 | */ |
3686 | |
3687 | void emitter::emitIns_I(instruction ins, emitAttr attr, int val) |
3688 | { |
3689 | UNATIVE_OFFSET sz; |
3690 | instrDesc* id; |
3691 | bool valInByte = ((signed char)val == val); |
3692 | |
3693 | #ifdef _TARGET_AMD64_ |
3694 | // mov reg, imm64 is the only opcode which takes a full 8 byte immediate |
3695 | // all other opcodes take a sign-extended 4-byte immediate |
3696 | noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); |
3697 | #endif |
3698 | |
3699 | if (EA_IS_CNS_RELOC(attr)) |
3700 | { |
3701 | valInByte = false; // relocs can't be placed in a byte |
3702 | } |
3703 | |
3704 | switch (ins) |
3705 | { |
3706 | case INS_loop: |
3707 | case INS_jge: |
3708 | sz = 2; |
3709 | break; |
3710 | |
3711 | case INS_ret: |
3712 | sz = 3; |
3713 | break; |
3714 | |
3715 | case INS_push_hide: |
3716 | case INS_push: |
3717 | sz = valInByte ? 2 : 5; |
3718 | break; |
3719 | |
3720 | default: |
3721 | NO_WAY("unexpected instruction" ); |
3722 | } |
3723 | |
3724 | id = emitNewInstrSC(attr, val); |
3725 | id->idIns(ins); |
3726 | id->idInsFmt(IF_CNS); |
3727 | id->idCodeSize(sz); |
3728 | |
3729 | dispIns(id); |
3730 | emitCurIGsize += sz; |
3731 | |
3732 | emitAdjustStackDepthPushPop(ins); |
3733 | } |
3734 | |
3735 | /***************************************************************************** |
3736 | * |
3737 | * Add a "jump through a table" instruction. |
3738 | */ |
3739 | |
3740 | void emitter::emitIns_IJ(emitAttr attr, regNumber reg, unsigned base) |
3741 | { |
3742 | assert(EA_SIZE(attr) == EA_4BYTE); |
3743 | |
3744 | UNATIVE_OFFSET sz = 3 + 4; |
3745 | const instruction ins = INS_i_jmp; |
3746 | |
3747 | if (IsExtendedReg(reg, attr)) |
3748 | { |
3749 | sz += emitGetRexPrefixSize(ins); |
3750 | } |
3751 | |
3752 | instrDesc* id = emitNewInstrAmd(attr, base); |
3753 | |
3754 | id->idIns(ins); |
3755 | id->idInsFmt(IF_ARD); |
3756 | id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; |
3757 | id->idAddr()->iiaAddrMode.amIndxReg = reg; |
3758 | id->idAddr()->iiaAddrMode.amScale = emitter::OPSZP; |
3759 | |
3760 | #ifdef DEBUG |
3761 | id->idDebugOnlyInfo()->idMemCookie = base; |
3762 | #endif |
3763 | |
3764 | id->idCodeSize(sz); |
3765 | |
3766 | dispIns(id); |
3767 | emitCurIGsize += sz; |
3768 | } |
3769 | |
3770 | /***************************************************************************** |
3771 | * |
3772 | * Add an instruction with a static data member operand. If 'size' is 0, the |
3773 | * instruction operates on the address of the static member instead of its |
3774 | * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]"). |
3775 | */ |
3776 | |
3777 | void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs) |
3778 | { |
3779 | // Static always need relocs |
3780 | if (!jitStaticFldIsGlobAddr(fldHnd)) |
3781 | { |
3782 | attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); |
3783 | } |
3784 | |
3785 | UNATIVE_OFFSET sz; |
3786 | instrDesc* id; |
3787 | |
3788 | /* Are we pushing the offset of the class variable? */ |
3789 | |
3790 | if (EA_IS_OFFSET(attr)) |
3791 | { |
3792 | assert(ins == INS_push); |
3793 | sz = 1 + TARGET_POINTER_SIZE; |
3794 | |
3795 | id = emitNewInstrDsp(EA_1BYTE, offs); |
3796 | id->idIns(ins); |
3797 | id->idInsFmt(IF_MRD_OFF); |
3798 | } |
3799 | else |
3800 | { |
3801 | insFormat fmt = emitInsModeFormat(ins, IF_MRD); |
3802 | |
3803 | id = emitNewInstrDsp(attr, offs); |
3804 | id->idIns(ins); |
3805 | id->idInsFmt(fmt); |
3806 | sz = emitInsSizeCV(id, insCodeMR(ins)); |
3807 | } |
3808 | |
3809 | // Vex prefix size |
3810 | sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); |
3811 | |
3812 | if (TakesRexWPrefix(ins, attr)) |
3813 | { |
3814 | // REX.W prefix |
3815 | sz += emitGetRexPrefixSize(ins); |
3816 | } |
3817 | |
3818 | id->idAddr()->iiaFieldHnd = fldHnd; |
3819 | |
3820 | id->idCodeSize(sz); |
3821 | |
3822 | dispIns(id); |
3823 | emitCurIGsize += sz; |
3824 | |
3825 | emitAdjustStackDepthPushPop(ins); |
3826 | } |
3827 | |
3828 | /***************************************************************************** |
3829 | * |
3830 | * Add an instruction with two register operands. |
3831 | */ |
3832 | |
3833 | void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2) |
3834 | { |
3835 | emitAttr size = EA_SIZE(attr); |
3836 | |
3837 | /* We don't want to generate any useless mov instructions! */ |
3838 | CLANG_FORMAT_COMMENT_ANCHOR; |
3839 | |
3840 | #ifdef _TARGET_AMD64_ |
3841 | // Same-reg 4-byte mov can be useful because it performs a |
3842 | // zero-extension to 8 bytes. |
3843 | assert(ins != INS_mov || reg1 != reg2 || size == EA_4BYTE); |
3844 | #else |
3845 | assert(ins != INS_mov || reg1 != reg2); |
3846 | #endif // _TARGET_AMD64_ |
3847 | |
3848 | assert(size <= EA_32BYTE); |
3849 | noway_assert(emitVerifyEncodable(ins, size, reg1, reg2)); |
3850 | |
3851 | UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr); |
3852 | |
3853 | if (Is4ByteSSEInstruction(ins)) |
3854 | { |
3855 | // The 4-Byte SSE instructions require one additional byte |
3856 | sz += 1; |
3857 | } |
3858 | |
3859 | /* Special case: "XCHG" uses a different format */ |
3860 | insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD); |
3861 | |
3862 | instrDesc* id = emitNewInstrSmall(attr); |
3863 | id->idIns(ins); |
3864 | id->idInsFmt(fmt); |
3865 | id->idReg1(reg1); |
3866 | id->idReg2(reg2); |
3867 | id->idCodeSize(sz); |
3868 | |
3869 | dispIns(id); |
3870 | emitCurIGsize += sz; |
3871 | } |
3872 | |
3873 | /***************************************************************************** |
3874 | * |
3875 | * Add an instruction with two register operands and an integer constant. |
3876 | */ |
3877 | |
3878 | void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival) |
3879 | { |
3880 | // SSE2 version requires 5 bytes and some SSE/AVX version 6 bytes |
3881 | UNATIVE_OFFSET sz = 4; |
3882 | if (IsSSEOrAVXInstruction(ins)) |
3883 | { |
3884 | // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate |
3885 | // SSE: 4 byte opcode + 1 byte ModR/M + 1 byte immediate |
3886 | // SSE: 3 byte opcode + 1 byte ModR/M + 1 byte immediate |
3887 | sz = (UseVEXEncoding() || Is4ByteSSEInstruction(ins)) ? 6 : 5; |
3888 | } |
3889 | |
3890 | #ifdef _TARGET_AMD64_ |
3891 | // mov reg, imm64 is the only opcode which takes a full 8 byte immediate |
3892 | // all other opcodes take a sign-extended 4-byte immediate |
3893 | noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); |
3894 | #endif |
3895 | |
3896 | instrDesc* id = emitNewInstrSC(attr, ival); |
3897 | |
3898 | // REX prefix |
3899 | if (IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr)) |
3900 | { |
3901 | sz += emitGetRexPrefixSize(ins); |
3902 | } |
3903 | |
3904 | if ((ins == INS_pextrq || ins == INS_pinsrq) && !UseVEXEncoding()) |
3905 | { |
3906 | sz += 1; |
3907 | } |
3908 | |
3909 | id->idIns(ins); |
3910 | id->idInsFmt(IF_RRW_RRW_CNS); |
3911 | id->idReg1(reg1); |
3912 | id->idReg2(reg2); |
3913 | id->idCodeSize(sz); |
3914 | |
3915 | dispIns(id); |
3916 | emitCurIGsize += sz; |
3917 | } |
3918 | |
3919 | void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs) |
3920 | { |
3921 | assert(ins == INS_prefetcht0 || ins == INS_prefetcht1 || ins == INS_prefetcht2 || ins == INS_prefetchnta); |
3922 | |
3923 | instrDesc* id = emitNewInstrAmd(attr, offs); |
3924 | |
3925 | id->idIns(ins); |
3926 | |
3927 | id->idInsFmt(IF_ARD); |
3928 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
3929 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
3930 | |
3931 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins)); |
3932 | id->idCodeSize(sz); |
3933 | |
3934 | dispIns(id); |
3935 | emitCurIGsize += sz; |
3936 | } |
3937 | |
3938 | //------------------------------------------------------------------------ |
3939 | // emitIns_AR_R_R: emits the code for an instruction that takes a base memory register, two register operands |
3940 | // and that does not return a value |
3941 | // |
3942 | // Arguments: |
3943 | // ins -- The instruction being emitted |
3944 | // attr -- The emit attribute |
3945 | // targetReg -- The target register |
3946 | // op2Reg -- The register of the second operand |
3947 | // op3Reg -- The register of the third operand |
3948 | // base -- The base register used for the memory address (first operand) |
3949 | // offs -- The offset from base |
3950 | // |
3951 | void emitter::emitIns_AR_R_R( |
3952 | instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs) |
3953 | { |
3954 | assert(IsSSEOrAVXInstruction(ins)); |
3955 | assert(IsThreeOperandAVXInstruction(ins)); |
3956 | |
3957 | instrDesc* id = emitNewInstrAmd(attr, offs); |
3958 | |
3959 | id->idIns(ins); |
3960 | id->idReg1(op2Reg); |
3961 | id->idReg2(op3Reg); |
3962 | |
3963 | id->idInsFmt(IF_AWR_RRD_RRD); |
3964 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
3965 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
3966 | |
3967 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins)); |
3968 | id->idCodeSize(sz); |
3969 | |
3970 | dispIns(id); |
3971 | emitCurIGsize += sz; |
3972 | } |
3973 | |
3974 | void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir) |
3975 | { |
3976 | ssize_t offs = indir->Offset(); |
3977 | instrDesc* id = emitNewInstrAmd(attr, offs); |
3978 | |
3979 | id->idIns(ins); |
3980 | id->idReg1(reg1); |
3981 | |
3982 | emitHandleMemOp(indir, id, IF_RRW_ARD, ins); |
3983 | |
3984 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); |
3985 | id->idCodeSize(sz); |
3986 | |
3987 | dispIns(id); |
3988 | emitCurIGsize += sz; |
3989 | } |
3990 | |
3991 | void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival) |
3992 | { |
3993 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); |
3994 | assert(IsSSEOrAVXInstruction(ins)); |
3995 | |
3996 | ssize_t offs = indir->Offset(); |
3997 | instrDesc* id = emitNewInstrAmdCns(attr, offs, ival); |
3998 | |
3999 | id->idIns(ins); |
4000 | id->idReg1(reg1); |
4001 | |
4002 | emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins); |
4003 | |
4004 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); |
4005 | |
4006 | if (Is4ByteSSEInstruction(ins)) |
4007 | { |
4008 | // The 4-Byte SSE instructions require two additional bytes |
4009 | sz += 2; |
4010 | } |
4011 | |
4012 | id->idCodeSize(sz); |
4013 | |
4014 | dispIns(id); |
4015 | emitCurIGsize += sz; |
4016 | } |
4017 | |
4018 | void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival) |
4019 | { |
4020 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); |
4021 | assert(IsSSEOrAVXInstruction(ins)); |
4022 | |
4023 | instrDesc* id = emitNewInstrAmdCns(attr, offs, ival); |
4024 | |
4025 | id->idIns(ins); |
4026 | id->idReg1(reg1); |
4027 | |
4028 | id->idInsFmt(IF_RRW_ARD_CNS); |
4029 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
4030 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
4031 | |
4032 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); |
4033 | |
4034 | if (Is4ByteSSEInstruction(ins)) |
4035 | { |
4036 | // The 4-Byte SSE instructions require two additional bytes |
4037 | sz += 2; |
4038 | } |
4039 | |
4040 | id->idCodeSize(sz); |
4041 | |
4042 | dispIns(id); |
4043 | emitCurIGsize += sz; |
4044 | } |
4045 | |
4046 | void emitter::emitIns_R_C_I( |
4047 | instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival) |
4048 | { |
4049 | // Static always need relocs |
4050 | if (!jitStaticFldIsGlobAddr(fldHnd)) |
4051 | { |
4052 | attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); |
4053 | } |
4054 | |
4055 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); |
4056 | assert(IsSSEOrAVXInstruction(ins)); |
4057 | |
4058 | instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs); |
4059 | |
4060 | id->idIns(ins); |
4061 | id->idInsFmt(IF_RRW_MRD_CNS); |
4062 | id->idReg1(reg1); |
4063 | id->idAddr()->iiaFieldHnd = fldHnd; |
4064 | |
4065 | UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival); |
4066 | |
4067 | if (Is4ByteSSEInstruction(ins)) |
4068 | { |
4069 | // The 4-Byte SSE instructions require two additional bytes |
4070 | sz += 2; |
4071 | } |
4072 | |
4073 | id->idCodeSize(sz); |
4074 | |
4075 | dispIns(id); |
4076 | emitCurIGsize += sz; |
4077 | } |
4078 | |
4079 | void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival) |
4080 | { |
4081 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); |
4082 | assert(IsSSEOrAVXInstruction(ins)); |
4083 | |
4084 | instrDesc* id = emitNewInstrCns(attr, ival); |
4085 | |
4086 | id->idIns(ins); |
4087 | id->idInsFmt(IF_RRW_SRD_CNS); |
4088 | id->idReg1(reg1); |
4089 | id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); |
4090 | |
4091 | #ifdef DEBUG |
4092 | id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; |
4093 | #endif |
4094 | |
4095 | UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival); |
4096 | |
4097 | if (Is4ByteSSEInstruction(ins)) |
4098 | { |
4099 | // The 4-Byte SSE instructions require two additional bytes |
4100 | sz += 2; |
4101 | } |
4102 | |
4103 | id->idCodeSize(sz); |
4104 | |
4105 | dispIns(id); |
4106 | emitCurIGsize += sz; |
4107 | } |
4108 | |
4109 | void emitter::emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir) |
4110 | { |
4111 | assert(IsSSEOrAVXInstruction(ins)); |
4112 | assert(IsThreeOperandAVXInstruction(ins)); |
4113 | |
4114 | ssize_t offs = indir->Offset(); |
4115 | instrDesc* id = emitNewInstrAmd(attr, offs); |
4116 | |
4117 | id->idIns(ins); |
4118 | id->idReg1(reg1); |
4119 | id->idReg2(reg2); |
4120 | |
4121 | emitHandleMemOp(indir, id, IF_RWR_RRD_ARD, ins); |
4122 | |
4123 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); |
4124 | id->idCodeSize(sz); |
4125 | |
4126 | dispIns(id); |
4127 | emitCurIGsize += sz; |
4128 | } |
4129 | |
4130 | void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs) |
4131 | { |
4132 | assert(IsSSEOrAVXInstruction(ins)); |
4133 | assert(IsThreeOperandAVXInstruction(ins)); |
4134 | |
4135 | instrDesc* id = emitNewInstrAmd(attr, offs); |
4136 | |
4137 | id->idIns(ins); |
4138 | id->idReg1(reg1); |
4139 | id->idReg2(reg2); |
4140 | |
4141 | id->idInsFmt(IF_RWR_RRD_ARD); |
4142 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
4143 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
4144 | |
4145 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); |
4146 | id->idCodeSize(sz); |
4147 | |
4148 | dispIns(id); |
4149 | emitCurIGsize += sz; |
4150 | } |
4151 | |
4152 | //------------------------------------------------------------------------ |
4153 | // IsAVX2GatherInstruction: return true if the instruction is AVX2 Gather |
4154 | // |
4155 | // Arguments: |
4156 | // ins - the instruction to check |
4157 | // Return Value: |
4158 | // true if the instruction is AVX2 Gather |
4159 | // |
4160 | bool IsAVX2GatherInstruction(instruction ins) |
4161 | { |
4162 | switch (ins) |
4163 | { |
4164 | case INS_vpgatherdd: |
4165 | case INS_vpgatherdq: |
4166 | case INS_vpgatherqd: |
4167 | case INS_vpgatherqq: |
4168 | case INS_vgatherdps: |
4169 | case INS_vgatherdpd: |
4170 | case INS_vgatherqps: |
4171 | case INS_vgatherqpd: |
4172 | return true; |
4173 | default: |
4174 | return false; |
4175 | } |
4176 | } |
4177 | |
4178 | //------------------------------------------------------------------------ |
4179 | // emitIns_R_AR_R: Emits an AVX2 Gather instructions |
4180 | // |
4181 | // Arguments: |
4182 | // ins - the instruction to emit |
4183 | // attr - the instruction operand size |
4184 | // reg1 - the destination and first source operand |
4185 | // reg2 - the mask operand (encoded in VEX.vvvv) |
4186 | // base - the base register of address to load |
4187 | // index - the index register of VSIB |
4188 | // scale - the scale number of VSIB |
4189 | // offs - the offset added to the memory address from base |
4190 | // |
4191 | void emitter::emitIns_R_AR_R(instruction ins, |
4192 | emitAttr attr, |
4193 | regNumber reg1, |
4194 | regNumber reg2, |
4195 | regNumber base, |
4196 | regNumber index, |
4197 | int scale, |
4198 | int offs) |
4199 | { |
4200 | assert(IsAVX2GatherInstruction(ins)); |
4201 | |
4202 | instrDesc* id = emitNewInstrAmd(attr, offs); |
4203 | |
4204 | id->idIns(ins); |
4205 | id->idReg1(reg1); |
4206 | id->idReg2(reg2); |
4207 | |
4208 | id->idInsFmt(IF_RWR_ARD_RRD); |
4209 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
4210 | id->idAddr()->iiaAddrMode.amIndxReg = index; |
4211 | id->idAddr()->iiaAddrMode.amScale = emitEncodeSize((emitAttr)scale); |
4212 | |
4213 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); |
4214 | id->idCodeSize(sz); |
4215 | |
4216 | dispIns(id); |
4217 | emitCurIGsize += sz; |
4218 | } |
4219 | |
4220 | void emitter::emitIns_R_R_C( |
4221 | instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs) |
4222 | { |
4223 | assert(IsSSEOrAVXInstruction(ins)); |
4224 | assert(IsThreeOperandAVXInstruction(ins)); |
4225 | |
4226 | // Static always need relocs |
4227 | if (!jitStaticFldIsGlobAddr(fldHnd)) |
4228 | { |
4229 | attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); |
4230 | } |
4231 | |
4232 | instrDesc* id = emitNewInstrDsp(attr, offs); |
4233 | |
4234 | id->idIns(ins); |
4235 | id->idInsFmt(IF_RWR_RRD_MRD); |
4236 | id->idReg1(reg1); |
4237 | id->idReg2(reg2); |
4238 | id->idAddr()->iiaFieldHnd = fldHnd; |
4239 | |
4240 | UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)); |
4241 | id->idCodeSize(sz); |
4242 | |
4243 | dispIns(id); |
4244 | emitCurIGsize += sz; |
4245 | } |
4246 | |
4247 | /***************************************************************************** |
4248 | * |
4249 | * Add an instruction with three register operands. |
4250 | */ |
4251 | |
4252 | void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2) |
4253 | { |
4254 | assert(IsSSEOrAVXInstruction(ins)); |
4255 | assert(IsThreeOperandAVXInstruction(ins)); |
4256 | // Currently vex prefix only use three bytes mode. |
4257 | // size = vex + opcode + ModR/M = 3 + 1 + 1 = 5 |
4258 | // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future |
4259 | UNATIVE_OFFSET sz = 5; |
4260 | |
4261 | instrDesc* id = emitNewInstr(attr); |
4262 | id->idIns(ins); |
4263 | id->idInsFmt(IF_RWR_RRD_RRD); |
4264 | id->idReg1(targetReg); |
4265 | id->idReg2(reg1); |
4266 | id->idReg3(reg2); |
4267 | |
4268 | id->idCodeSize(sz); |
4269 | dispIns(id); |
4270 | emitCurIGsize += sz; |
4271 | } |
4272 | |
4273 | void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs) |
4274 | { |
4275 | assert(IsSSEOrAVXInstruction(ins)); |
4276 | assert(IsThreeOperandAVXInstruction(ins)); |
4277 | |
4278 | instrDesc* id = emitNewInstr(attr); |
4279 | |
4280 | id->idIns(ins); |
4281 | id->idInsFmt(IF_RWR_RRD_SRD); |
4282 | id->idReg1(reg1); |
4283 | id->idReg2(reg2); |
4284 | id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); |
4285 | |
4286 | #ifdef DEBUG |
4287 | id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; |
4288 | #endif |
4289 | |
4290 | UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs); |
4291 | id->idCodeSize(sz); |
4292 | |
4293 | dispIns(id); |
4294 | emitCurIGsize += sz; |
4295 | } |
4296 | |
4297 | void emitter::emitIns_R_R_A_I( |
4298 | instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt) |
4299 | { |
4300 | assert(IsSSEOrAVXInstruction(ins)); |
4301 | assert(IsThreeOperandAVXInstruction(ins)); |
4302 | |
4303 | ssize_t offs = indir->Offset(); |
4304 | instrDesc* id = emitNewInstrAmdCns(attr, offs, ival); |
4305 | |
4306 | id->idIns(ins); |
4307 | id->idReg1(reg1); |
4308 | id->idReg2(reg2); |
4309 | |
4310 | emitHandleMemOp(indir, id, fmt, ins); |
4311 | |
4312 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); |
4313 | id->idCodeSize(sz); |
4314 | |
4315 | dispIns(id); |
4316 | emitCurIGsize += sz; |
4317 | } |
4318 | |
4319 | void emitter::emitIns_R_R_AR_I( |
4320 | instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival) |
4321 | { |
4322 | assert(IsSSEOrAVXInstruction(ins)); |
4323 | assert(IsThreeOperandAVXInstruction(ins)); |
4324 | |
4325 | instrDesc* id = emitNewInstrAmdCns(attr, offs, ival); |
4326 | |
4327 | id->idIns(ins); |
4328 | id->idReg1(reg1); |
4329 | id->idReg2(reg2); |
4330 | |
4331 | id->idInsFmt(IF_RWR_RRD_ARD_CNS); |
4332 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
4333 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
4334 | |
4335 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); |
4336 | id->idCodeSize(sz); |
4337 | |
4338 | dispIns(id); |
4339 | emitCurIGsize += sz; |
4340 | } |
4341 | |
4342 | void emitter::emitIns_R_R_C_I( |
4343 | instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival) |
4344 | { |
4345 | assert(IsSSEOrAVXInstruction(ins)); |
4346 | assert(IsThreeOperandAVXInstruction(ins)); |
4347 | |
4348 | // Static always need relocs |
4349 | if (!jitStaticFldIsGlobAddr(fldHnd)) |
4350 | { |
4351 | attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); |
4352 | } |
4353 | |
4354 | instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs); |
4355 | |
4356 | id->idIns(ins); |
4357 | id->idInsFmt(IF_RWR_RRD_MRD_CNS); |
4358 | id->idReg1(reg1); |
4359 | id->idReg2(reg2); |
4360 | id->idAddr()->iiaFieldHnd = fldHnd; |
4361 | |
4362 | UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival); |
4363 | id->idCodeSize(sz); |
4364 | |
4365 | dispIns(id); |
4366 | emitCurIGsize += sz; |
4367 | } |
4368 | |
4369 | /********************************************************************************** |
4370 | * emitIns_R_R_R_I: Add an instruction with three register operands and an immediate. |
4371 | * |
4372 | * Arguments: |
4373 | * ins - the instruction to add |
4374 | * attr - the emitter attribute for instruction |
4375 | * targetReg - the target (destination) register |
4376 | * reg1 - the first source register |
4377 | * reg2 - the second source register |
4378 | * ival - the immediate value |
4379 | */ |
4380 | |
4381 | void emitter::emitIns_R_R_R_I( |
4382 | instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, int ival) |
4383 | { |
4384 | assert(IsSSEOrAVXInstruction(ins)); |
4385 | assert(IsThreeOperandAVXInstruction(ins)); |
4386 | // Currently vex prefix only use three bytes mode. |
4387 | // size = vex + opcode + ModR/M + 1-byte-cns = 3 + 1 + 1 + 1 = 6 |
4388 | // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future |
4389 | UNATIVE_OFFSET sz = 6; |
4390 | |
4391 | instrDesc* id = emitNewInstrCns(attr, ival); |
4392 | id->idIns(ins); |
4393 | id->idInsFmt(IF_RWR_RRD_RRD_CNS); |
4394 | id->idReg1(targetReg); |
4395 | id->idReg2(reg1); |
4396 | id->idReg3(reg2); |
4397 | |
4398 | id->idCodeSize(sz); |
4399 | dispIns(id); |
4400 | emitCurIGsize += sz; |
4401 | } |
4402 | |
4403 | void emitter::emitIns_R_R_S_I( |
4404 | instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival) |
4405 | { |
4406 | assert(IsSSEOrAVXInstruction(ins)); |
4407 | assert(IsThreeOperandAVXInstruction(ins)); |
4408 | |
4409 | instrDesc* id = emitNewInstrCns(attr, ival); |
4410 | |
4411 | id->idIns(ins); |
4412 | id->idInsFmt(IF_RWR_RRD_SRD_CNS); |
4413 | id->idReg1(reg1); |
4414 | id->idReg2(reg2); |
4415 | id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); |
4416 | |
4417 | #ifdef DEBUG |
4418 | id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; |
4419 | #endif |
4420 | |
4421 | UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival); |
4422 | id->idCodeSize(sz); |
4423 | |
4424 | dispIns(id); |
4425 | emitCurIGsize += sz; |
4426 | } |
4427 | |
4428 | //------------------------------------------------------------------------ |
4429 | // encodeXmmRegAsIval: Encodes a XMM register into imm[7:4] for use by a SIMD instruction |
4430 | // |
4431 | // Arguments |
4432 | // opReg -- The register being encoded |
4433 | // |
4434 | // Returns: |
4435 | // opReg encoded in imm[7:4] |
4436 | static int encodeXmmRegAsIval(regNumber opReg) |
4437 | { |
4438 | // AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb, |
4439 | // which encodes the fourth register into imm8[7:4] |
4440 | assert(opReg >= XMMBASE); |
4441 | int ival = (opReg - XMMBASE) << 4; |
4442 | |
4443 | assert((ival >= 0) && (ival <= 255)); |
4444 | return (int8_t)ival; |
4445 | } |
4446 | |
4447 | //------------------------------------------------------------------------ |
4448 | // emitIns_R_R_A_R: emits the code for an instruction that takes a register operand, a GenTreeIndir address, |
4449 | // another register operand, and that returns a value in register |
4450 | // |
4451 | // Arguments: |
4452 | // ins -- The instruction being emitted |
4453 | // attr -- The emit attribute |
4454 | // targetReg -- The target register |
4455 | // op1Reg -- The register of the first operand |
4456 | // op3Reg -- The register of the third operand |
4457 | // indir -- The GenTreeIndir used for the memory address |
4458 | // |
4459 | // Remarks: |
4460 | // op2 is built from indir |
4461 | // |
4462 | void emitter::emitIns_R_R_A_R( |
4463 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir) |
4464 | { |
4465 | assert(isAvxBlendv(ins)); |
4466 | assert(UseVEXEncoding()); |
4467 | |
4468 | int ival = encodeXmmRegAsIval(op3Reg); |
4469 | ssize_t offs = indir->Offset(); |
4470 | instrDesc* id = emitNewInstrAmdCns(attr, offs, ival); |
4471 | |
4472 | id->idIns(ins); |
4473 | id->idReg1(targetReg); |
4474 | id->idReg2(op1Reg); |
4475 | |
4476 | emitHandleMemOp(indir, id, IF_RWR_RRD_ARD_RRD, ins); |
4477 | |
4478 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); |
4479 | id->idCodeSize(sz); |
4480 | |
4481 | dispIns(id); |
4482 | emitCurIGsize += sz; |
4483 | } |
4484 | |
4485 | //------------------------------------------------------------------------ |
4486 | // emitIns_R_R_AR_R: emits the code for an instruction that takes a register operand, a base memory |
4487 | // register, another register operand, and that returns a value in register |
4488 | // |
4489 | // Arguments: |
4490 | // ins -- The instruction being emitted |
4491 | // attr -- The emit attribute |
4492 | // targetReg -- The target register |
4493 | // op1Reg -- The register of the first operands |
4494 | // op3Reg -- The register of the third operand |
4495 | // base -- The base register used for the memory address |
4496 | // offs -- The offset added to the memory address from base |
4497 | // |
4498 | // Remarks: |
4499 | // op2 is built from base + offs |
4500 | // |
4501 | void emitter::emitIns_R_R_AR_R( |
4502 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base, int offs) |
4503 | { |
4504 | assert(isAvxBlendv(ins)); |
4505 | assert(UseVEXEncoding()); |
4506 | |
4507 | int ival = encodeXmmRegAsIval(op3Reg); |
4508 | instrDesc* id = emitNewInstrAmdCns(attr, offs, ival); |
4509 | |
4510 | id->idIns(ins); |
4511 | id->idReg1(targetReg); |
4512 | id->idReg2(op1Reg); |
4513 | |
4514 | id->idInsFmt(IF_RWR_RRD_ARD_RRD); |
4515 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
4516 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
4517 | |
4518 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); |
4519 | id->idCodeSize(sz); |
4520 | |
4521 | dispIns(id); |
4522 | emitCurIGsize += sz; |
4523 | } |
4524 | |
4525 | //------------------------------------------------------------------------ |
4526 | // emitIns_R_R_C_R: emits the code for an instruction that takes a register operand, a field handle + |
4527 | // offset, another register operand, and that returns a value in register |
4528 | // |
4529 | // Arguments: |
4530 | // ins -- The instruction being emitted |
4531 | // attr -- The emit attribute |
4532 | // targetReg -- The target register |
4533 | // op1Reg -- The register of the first operand |
4534 | // op3Reg -- The register of the third operand |
4535 | // fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address |
4536 | // offs -- The offset added to the memory address from fldHnd |
4537 | // |
4538 | // Remarks: |
4539 | // op2 is built from fldHnd + offs |
4540 | // |
4541 | void emitter::emitIns_R_R_C_R(instruction ins, |
4542 | emitAttr attr, |
4543 | regNumber targetReg, |
4544 | regNumber op1Reg, |
4545 | regNumber op3Reg, |
4546 | CORINFO_FIELD_HANDLE fldHnd, |
4547 | int offs) |
4548 | { |
4549 | assert(isAvxBlendv(ins)); |
4550 | assert(UseVEXEncoding()); |
4551 | |
4552 | // Static always need relocs |
4553 | if (!jitStaticFldIsGlobAddr(fldHnd)) |
4554 | { |
4555 | attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); |
4556 | } |
4557 | |
4558 | int ival = encodeXmmRegAsIval(op3Reg); |
4559 | instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs); |
4560 | |
4561 | id->idIns(ins); |
4562 | id->idReg1(targetReg); |
4563 | id->idReg2(op1Reg); |
4564 | |
4565 | id->idInsFmt(IF_RWR_RRD_MRD_RRD); |
4566 | id->idAddr()->iiaFieldHnd = fldHnd; |
4567 | |
4568 | UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival); |
4569 | id->idCodeSize(sz); |
4570 | |
4571 | dispIns(id); |
4572 | emitCurIGsize += sz; |
4573 | } |
4574 | |
4575 | //------------------------------------------------------------------------ |
4576 | // emitIns_R_R_R_S: emits the code for a instruction that takes a register operand, a variable index + |
4577 | // offset, another register operand, and that returns a value in register |
4578 | // |
4579 | // Arguments: |
4580 | // ins -- The instruction being emitted |
4581 | // attr -- The emit attribute |
4582 | // targetReg -- The target register |
4583 | // op1Reg -- The register of the first operand |
4584 | // op3Reg -- The register of the third operand |
4585 | // varx -- The variable index used for the memory address |
4586 | // offs -- The offset added to the memory address from varx |
4587 | // |
4588 | // Remarks: |
4589 | // op2 is built from varx + offs |
4590 | // |
4591 | void emitter::emitIns_R_R_S_R( |
4592 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs) |
4593 | { |
4594 | assert(isAvxBlendv(ins)); |
4595 | assert(UseVEXEncoding()); |
4596 | |
4597 | int ival = encodeXmmRegAsIval(op3Reg); |
4598 | instrDesc* id = emitNewInstrCns(attr, ival); |
4599 | |
4600 | id->idIns(ins); |
4601 | id->idReg1(targetReg); |
4602 | id->idReg2(op1Reg); |
4603 | |
4604 | id->idInsFmt(IF_RWR_RRD_SRD_RRD); |
4605 | id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); |
4606 | |
4607 | UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival); |
4608 | id->idCodeSize(sz); |
4609 | |
4610 | dispIns(id); |
4611 | emitCurIGsize += sz; |
4612 | } |
4613 | |
4614 | void emitter::emitIns_R_R_R_R( |
4615 | instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, regNumber reg3) |
4616 | { |
4617 | assert(isAvxBlendv(ins)); |
4618 | assert(UseVEXEncoding()); |
4619 | // Currently vex prefix only use three bytes mode. |
4620 | // size = vex + opcode + ModR/M + 1-byte-cns(Reg) = 3 + 1 + 1 + 1 = 6 |
4621 | // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future |
4622 | UNATIVE_OFFSET sz = 6; |
4623 | |
4624 | int ival = encodeXmmRegAsIval(reg3); |
4625 | instrDesc* id = emitNewInstrCns(attr, ival); |
4626 | |
4627 | id->idIns(ins); |
4628 | id->idInsFmt(IF_RWR_RRD_RRD_RRD); |
4629 | id->idReg1(targetReg); |
4630 | id->idReg2(reg1); |
4631 | id->idReg3(reg2); |
4632 | id->idReg4(reg3); |
4633 | |
4634 | id->idCodeSize(sz); |
4635 | dispIns(id); |
4636 | emitCurIGsize += sz; |
4637 | } |
4638 | |
4639 | /***************************************************************************** |
4640 | * |
4641 | * Add an instruction with a register + static member operands. |
4642 | */ |
4643 | void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs) |
4644 | { |
4645 | // Static always need relocs |
4646 | if (!jitStaticFldIsGlobAddr(fldHnd)) |
4647 | { |
4648 | attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); |
4649 | } |
4650 | |
4651 | emitAttr size = EA_SIZE(attr); |
4652 | |
4653 | assert(size <= EA_32BYTE); |
4654 | noway_assert(emitVerifyEncodable(ins, size, reg)); |
4655 | |
4656 | UNATIVE_OFFSET sz; |
4657 | instrDesc* id; |
4658 | |
4659 | // Are we MOV'ing the offset of the class variable into EAX? |
4660 | if (EA_IS_OFFSET(attr)) |
4661 | { |
4662 | id = emitNewInstrDsp(EA_1BYTE, offs); |
4663 | id->idIns(ins); |
4664 | id->idInsFmt(IF_RWR_MRD_OFF); |
4665 | |
4666 | assert(ins == INS_mov && reg == REG_EAX); |
4667 | |
4668 | // Special case: "mov eax, [addr]" is smaller |
4669 | sz = 1 + TARGET_POINTER_SIZE; |
4670 | } |
4671 | else |
4672 | { |
4673 | insFormat fmt = emitInsModeFormat(ins, IF_RRD_MRD); |
4674 | |
4675 | id = emitNewInstrDsp(attr, offs); |
4676 | id->idIns(ins); |
4677 | id->idInsFmt(fmt); |
4678 | |
4679 | #ifdef _TARGET_X86_ |
4680 | // Special case: "mov eax, [addr]" is smaller. |
4681 | // This case is not enabled for amd64 as it always uses RIP relative addressing |
4682 | // and it results in smaller instruction size than encoding 64-bit addr in the |
4683 | // instruction. |
4684 | if (ins == INS_mov && reg == REG_EAX) |
4685 | { |
4686 | sz = 1 + TARGET_POINTER_SIZE; |
4687 | if (size == EA_2BYTE) |
4688 | sz += 1; |
4689 | } |
4690 | else |
4691 | #endif //_TARGET_X86_ |
4692 | { |
4693 | sz = emitInsSizeCV(id, insCodeRM(ins)); |
4694 | } |
4695 | |
4696 | // Special case: mov reg, fs:[ddd] |
4697 | if (fldHnd == FLD_GLOBAL_FS) |
4698 | { |
4699 | sz += 1; |
4700 | } |
4701 | } |
4702 | |
4703 | // VEX prefix |
4704 | sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); |
4705 | |
4706 | // REX prefix |
4707 | if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr)) |
4708 | { |
4709 | sz += emitGetRexPrefixSize(ins); |
4710 | } |
4711 | |
4712 | id->idReg1(reg); |
4713 | id->idCodeSize(sz); |
4714 | |
4715 | id->idAddr()->iiaFieldHnd = fldHnd; |
4716 | |
4717 | dispIns(id); |
4718 | emitCurIGsize += sz; |
4719 | } |
4720 | |
4721 | /***************************************************************************** |
4722 | * |
4723 | * Add an instruction with a static member + register operands. |
4724 | */ |
4725 | |
4726 | void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs) |
4727 | { |
4728 | // Static always need relocs |
4729 | if (!jitStaticFldIsGlobAddr(fldHnd)) |
4730 | { |
4731 | attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); |
4732 | } |
4733 | |
4734 | emitAttr size = EA_SIZE(attr); |
4735 | |
4736 | #if defined(_TARGET_X86_) |
4737 | // For x86 it is valid to storeind a double sized operand in an xmm reg to memory |
4738 | assert(size <= EA_8BYTE); |
4739 | #else |
4740 | assert(size <= EA_PTRSIZE); |
4741 | #endif |
4742 | |
4743 | noway_assert(emitVerifyEncodable(ins, size, reg)); |
4744 | |
4745 | instrDesc* id = emitNewInstrDsp(attr, offs); |
4746 | insFormat fmt = emitInsModeFormat(ins, IF_MRD_RRD); |
4747 | |
4748 | id->idIns(ins); |
4749 | id->idInsFmt(fmt); |
4750 | |
4751 | UNATIVE_OFFSET sz; |
4752 | |
4753 | #ifdef _TARGET_X86_ |
4754 | // Special case: "mov [addr], EAX" is smaller. |
4755 | // This case is not enable for amd64 as it always uses RIP relative addressing |
4756 | // and it will result in smaller instruction size than encoding 64-bit addr in |
4757 | // the instruction. |
4758 | if (ins == INS_mov && reg == REG_EAX) |
4759 | { |
4760 | sz = 1 + TARGET_POINTER_SIZE; |
4761 | if (size == EA_2BYTE) |
4762 | sz += 1; |
4763 | } |
4764 | else |
4765 | #endif //_TARGET_X86_ |
4766 | { |
4767 | sz = emitInsSizeCV(id, insCodeMR(ins)); |
4768 | } |
4769 | |
4770 | // Special case: mov reg, fs:[ddd] |
4771 | if (fldHnd == FLD_GLOBAL_FS) |
4772 | { |
4773 | sz += 1; |
4774 | } |
4775 | |
4776 | // VEX prefix |
4777 | sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); |
4778 | |
4779 | // REX prefix |
4780 | if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr)) |
4781 | { |
4782 | sz += emitGetRexPrefixSize(ins); |
4783 | } |
4784 | |
4785 | id->idReg1(reg); |
4786 | id->idCodeSize(sz); |
4787 | |
4788 | id->idAddr()->iiaFieldHnd = fldHnd; |
4789 | |
4790 | dispIns(id); |
4791 | emitCurIGsize += sz; |
4792 | } |
4793 | |
4794 | /***************************************************************************** |
4795 | * |
4796 | * Add an instruction with a static member + constant. |
4797 | */ |
4798 | |
4799 | void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val) |
4800 | { |
4801 | // Static always need relocs |
4802 | if (!jitStaticFldIsGlobAddr(fldHnd)) |
4803 | { |
4804 | attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); |
4805 | } |
4806 | |
4807 | insFormat fmt; |
4808 | |
4809 | switch (ins) |
4810 | { |
4811 | case INS_rcl_N: |
4812 | case INS_rcr_N: |
4813 | case INS_rol_N: |
4814 | case INS_ror_N: |
4815 | case INS_shl_N: |
4816 | case INS_shr_N: |
4817 | case INS_sar_N: |
4818 | assert(val != 1); |
4819 | fmt = IF_MRW_SHF; |
4820 | val &= 0x7F; |
4821 | break; |
4822 | |
4823 | default: |
4824 | fmt = emitInsModeFormat(ins, IF_MRD_CNS); |
4825 | break; |
4826 | } |
4827 | |
4828 | instrDesc* id = emitNewInstrCnsDsp(attr, val, offs); |
4829 | id->idIns(ins); |
4830 | id->idInsFmt(fmt); |
4831 | |
4832 | code_t code = insCodeMI(ins); |
4833 | UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val); |
4834 | |
4835 | // Vex prefix |
4836 | sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins)); |
4837 | |
4838 | // REX prefix, if not already included in "code" |
4839 | if (TakesRexWPrefix(ins, attr) && !hasRexPrefix(code)) |
4840 | { |
4841 | sz += emitGetRexPrefixSize(ins); |
4842 | } |
4843 | |
4844 | id->idAddr()->iiaFieldHnd = fldHnd; |
4845 | id->idCodeSize(sz); |
4846 | |
4847 | dispIns(id); |
4848 | emitCurIGsize += sz; |
4849 | } |
4850 | |
4851 | void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs) |
4852 | { |
4853 | assert(ins == INS_mov); |
4854 | assert(dst->bbFlags & BBF_JMP_TARGET); |
4855 | |
4856 | instrDescLbl* id = emitNewInstrLbl(); |
4857 | |
4858 | id->idIns(ins); |
4859 | id->idInsFmt(IF_SWR_LABEL); |
4860 | id->idAddr()->iiaBBlabel = dst; |
4861 | |
4862 | /* The label reference is always long */ |
4863 | |
4864 | id->idjShort = 0; |
4865 | id->idjKeepLong = 1; |
4866 | |
4867 | /* Record the current IG and offset within it */ |
4868 | |
4869 | id->idjIG = emitCurIG; |
4870 | id->idjOffs = emitCurIGsize; |
4871 | |
4872 | /* Append this instruction to this IG's jump list */ |
4873 | |
4874 | id->idjNext = emitCurIGjmpList; |
4875 | emitCurIGjmpList = id; |
4876 | |
4877 | UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(id, insCodeMI(ins), varx, offs); |
4878 | id->dstLclVar.initLclVarAddr(varx, offs); |
4879 | #ifdef DEBUG |
4880 | id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; |
4881 | #endif |
4882 | |
4883 | #if EMITTER_STATS |
4884 | emitTotalIGjmps++; |
4885 | #endif |
4886 | |
4887 | #ifndef _TARGET_AMD64_ |
4888 | // Storing the address of a basicBlock will need a reloc |
4889 | // as the instruction uses the absolute address, |
4890 | // not a relative address. |
4891 | // |
4892 | // On Amd64, Absolute code addresses should always go through a reloc to |
4893 | // to be encoded as RIP rel32 offset. |
4894 | if (emitComp->opts.compReloc) |
4895 | #endif |
4896 | { |
4897 | id->idSetIsDspReloc(); |
4898 | } |
4899 | |
4900 | id->idCodeSize(sz); |
4901 | |
4902 | dispIns(id); |
4903 | emitCurIGsize += sz; |
4904 | } |
4905 | |
4906 | /***************************************************************************** |
4907 | * |
4908 | * Add a label instruction. |
4909 | */ |
4910 | void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) |
4911 | { |
4912 | assert(ins == INS_lea); |
4913 | assert(dst->bbFlags & BBF_JMP_TARGET); |
4914 | |
4915 | instrDescJmp* id = emitNewInstrJmp(); |
4916 | |
4917 | id->idIns(ins); |
4918 | id->idReg1(reg); |
4919 | id->idInsFmt(IF_RWR_LABEL); |
4920 | id->idOpSize(EA_SIZE(attr)); // emitNewInstrJmp() sets the size (incorrectly) to EA_1BYTE |
4921 | id->idAddr()->iiaBBlabel = dst; |
4922 | |
4923 | /* The label reference is always long */ |
4924 | |
4925 | id->idjShort = 0; |
4926 | id->idjKeepLong = 1; |
4927 | |
4928 | /* Record the current IG and offset within it */ |
4929 | |
4930 | id->idjIG = emitCurIG; |
4931 | id->idjOffs = emitCurIGsize; |
4932 | |
4933 | /* Append this instruction to this IG's jump list */ |
4934 | |
4935 | id->idjNext = emitCurIGjmpList; |
4936 | emitCurIGjmpList = id; |
4937 | |
4938 | #ifdef DEBUG |
4939 | // Mark the catch return |
4940 | if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET) |
4941 | { |
4942 | id->idDebugOnlyInfo()->idCatchRet = true; |
4943 | } |
4944 | #endif // DEBUG |
4945 | |
4946 | #if EMITTER_STATS |
4947 | emitTotalIGjmps++; |
4948 | #endif |
4949 | |
4950 | // Set the relocation flags - these give hint to zap to perform |
4951 | // relocation of the specified 32bit address. |
4952 | // |
4953 | // Note the relocation flags influence the size estimate. |
4954 | id->idSetRelocFlags(attr); |
4955 | |
4956 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); |
4957 | id->idCodeSize(sz); |
4958 | |
4959 | dispIns(id); |
4960 | emitCurIGsize += sz; |
4961 | } |
4962 | |
4963 | /***************************************************************************** |
4964 | * |
4965 | * The following adds instructions referencing address modes. |
4966 | */ |
4967 | |
4968 | void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int disp) |
4969 | { |
4970 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); |
4971 | |
4972 | #ifdef _TARGET_AMD64_ |
4973 | // mov reg, imm64 is the only opcode which takes a full 8 byte immediate |
4974 | // all other opcodes take a sign-extended 4-byte immediate |
4975 | noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); |
4976 | #endif |
4977 | |
4978 | insFormat fmt; |
4979 | |
4980 | switch (ins) |
4981 | { |
4982 | case INS_rcl_N: |
4983 | case INS_rcr_N: |
4984 | case INS_rol_N: |
4985 | case INS_ror_N: |
4986 | case INS_shl_N: |
4987 | case INS_shr_N: |
4988 | case INS_sar_N: |
4989 | assert(val != 1); |
4990 | fmt = IF_ARW_SHF; |
4991 | val &= 0x7F; |
4992 | break; |
4993 | |
4994 | default: |
4995 | fmt = emitInsModeFormat(ins, IF_ARD_CNS); |
4996 | break; |
4997 | } |
4998 | |
4999 | /* |
5000 | Useful if you want to trap moves with 0 constant |
5001 | if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE) |
5002 | { |
5003 | printf("MOV 0\n"); |
5004 | } |
5005 | */ |
5006 | |
5007 | UNATIVE_OFFSET sz; |
5008 | instrDesc* id = emitNewInstrAmdCns(attr, disp, val); |
5009 | id->idIns(ins); |
5010 | id->idInsFmt(fmt); |
5011 | |
5012 | id->idAddr()->iiaAddrMode.amBaseReg = reg; |
5013 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
5014 | |
5015 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5016 | |
5017 | sz = emitInsSizeAM(id, insCodeMI(ins), val); |
5018 | id->idCodeSize(sz); |
5019 | |
5020 | dispIns(id); |
5021 | emitCurIGsize += sz; |
5022 | } |
5023 | |
5024 | void emitter::emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp) |
5025 | { |
5026 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); |
5027 | |
5028 | #ifdef _TARGET_AMD64_ |
5029 | // mov reg, imm64 is the only opcode which takes a full 8 byte immediate |
5030 | // all other opcodes take a sign-extended 4-byte immediate |
5031 | noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); |
5032 | #endif |
5033 | |
5034 | insFormat fmt; |
5035 | |
5036 | switch (ins) |
5037 | { |
5038 | case INS_rcl_N: |
5039 | case INS_rcr_N: |
5040 | case INS_rol_N: |
5041 | case INS_ror_N: |
5042 | case INS_shl_N: |
5043 | case INS_shr_N: |
5044 | case INS_sar_N: |
5045 | assert(val != 1); |
5046 | fmt = IF_ARW_SHF; |
5047 | val &= 0x7F; |
5048 | break; |
5049 | |
5050 | default: |
5051 | fmt = emitInsModeFormat(ins, IF_ARD_CNS); |
5052 | break; |
5053 | } |
5054 | |
5055 | /* |
5056 | Useful if you want to trap moves with 0 constant |
5057 | if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE) |
5058 | { |
5059 | printf("MOV 0\n"); |
5060 | } |
5061 | */ |
5062 | |
5063 | UNATIVE_OFFSET sz; |
5064 | instrDesc* id = emitNewInstrAmdCns(attr, disp, val); |
5065 | id->idIns(ins); |
5066 | id->idInsFmt(fmt); |
5067 | |
5068 | id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; |
5069 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
5070 | |
5071 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5072 | |
5073 | sz = emitInsSizeAM(id, insCodeMI(ins), val); |
5074 | id->idCodeSize(sz); |
5075 | |
5076 | dispIns(id); |
5077 | emitCurIGsize += sz; |
5078 | } |
5079 | |
5080 | void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp) |
5081 | { |
5082 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE) && (ireg != REG_NA)); |
5083 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); |
5084 | |
5085 | if (ins == INS_lea) |
5086 | { |
5087 | if (ireg == base && disp == 0) |
5088 | { |
5089 | // Maybe the emitter is not the common place for this optimization, but it's a better choke point |
5090 | // for all the emitIns(ins, tree), we would have to be analyzing at each call site |
5091 | // |
5092 | return; |
5093 | } |
5094 | } |
5095 | |
5096 | UNATIVE_OFFSET sz; |
5097 | instrDesc* id = emitNewInstrAmd(attr, disp); |
5098 | insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD); |
5099 | |
5100 | id->idIns(ins); |
5101 | id->idInsFmt(fmt); |
5102 | id->idReg1(ireg); |
5103 | |
5104 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
5105 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
5106 | |
5107 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5108 | |
5109 | sz = emitInsSizeAM(id, insCodeRM(ins)); |
5110 | |
5111 | if (Is4ByteSSEInstruction(ins)) |
5112 | { |
5113 | // The 4-Byte SSE instructions require two additional bytes |
5114 | sz += 2; |
5115 | } |
5116 | |
5117 | id->idCodeSize(sz); |
5118 | |
5119 | dispIns(id); |
5120 | emitCurIGsize += sz; |
5121 | } |
5122 | |
5123 | void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp) |
5124 | { |
5125 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA)); |
5126 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); |
5127 | |
5128 | UNATIVE_OFFSET sz; |
5129 | instrDesc* id = emitNewInstrAmd(attr, disp); |
5130 | insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD); |
5131 | |
5132 | id->idIns(ins); |
5133 | id->idInsFmt(fmt); |
5134 | id->idReg1(ireg); |
5135 | |
5136 | id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; |
5137 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
5138 | |
5139 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5140 | |
5141 | sz = emitInsSizeAM(id, insCodeRM(ins)); |
5142 | id->idCodeSize(sz); |
5143 | |
5144 | dispIns(id); |
5145 | emitCurIGsize += sz; |
5146 | } |
5147 | |
5148 | void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp) |
5149 | { |
5150 | UNATIVE_OFFSET sz; |
5151 | instrDesc* id = emitNewInstrAmd(attr, disp); |
5152 | insFormat fmt; |
5153 | |
5154 | if (ireg == REG_NA) |
5155 | { |
5156 | fmt = emitInsModeFormat(ins, IF_ARD); |
5157 | } |
5158 | else |
5159 | { |
5160 | fmt = emitInsModeFormat(ins, IF_ARD_RRD); |
5161 | |
5162 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE)); |
5163 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); |
5164 | |
5165 | id->idReg1(ireg); |
5166 | } |
5167 | |
5168 | id->idIns(ins); |
5169 | id->idInsFmt(fmt); |
5170 | |
5171 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
5172 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
5173 | |
5174 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5175 | |
5176 | sz = emitInsSizeAM(id, insCodeMR(ins)); |
5177 | id->idCodeSize(sz); |
5178 | |
5179 | dispIns(id); |
5180 | emitCurIGsize += sz; |
5181 | |
5182 | emitAdjustStackDepthPushPop(ins); |
5183 | } |
5184 | |
5185 | void emitter::emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int disp, regNumber ireg, int ival) |
5186 | { |
5187 | assert(ins == INS_vextracti128 || ins == INS_vextractf128); |
5188 | assert(base != REG_NA); |
5189 | assert(ireg != REG_NA); |
5190 | instrDesc* id = emitNewInstrAmdCns(attr, disp, ival); |
5191 | |
5192 | id->idIns(ins); |
5193 | id->idInsFmt(IF_AWR_RRD_CNS); |
5194 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
5195 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
5196 | id->idReg1(ireg); |
5197 | |
5198 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5199 | |
5200 | UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins), ival); |
5201 | id->idCodeSize(sz); |
5202 | |
5203 | dispIns(id); |
5204 | emitCurIGsize += sz; |
5205 | } |
5206 | |
5207 | void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp) |
5208 | { |
5209 | UNATIVE_OFFSET sz; |
5210 | instrDesc* id = emitNewInstrAmd(attr, disp); |
5211 | insFormat fmt; |
5212 | |
5213 | if (ireg == REG_NA) |
5214 | { |
5215 | fmt = emitInsModeFormat(ins, IF_ARD); |
5216 | } |
5217 | else |
5218 | { |
5219 | fmt = emitInsModeFormat(ins, IF_ARD_RRD); |
5220 | |
5221 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); |
5222 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); |
5223 | |
5224 | id->idReg1(ireg); |
5225 | } |
5226 | |
5227 | id->idIns(ins); |
5228 | id->idInsFmt(fmt); |
5229 | |
5230 | id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; |
5231 | id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
5232 | |
5233 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5234 | |
5235 | sz = emitInsSizeAM(id, insCodeMR(ins)); |
5236 | id->idCodeSize(sz); |
5237 | |
5238 | dispIns(id); |
5239 | emitCurIGsize += sz; |
5240 | |
5241 | emitAdjustStackDepthPushPop(ins); |
5242 | } |
5243 | |
5244 | void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp) |
5245 | { |
5246 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); |
5247 | |
5248 | #ifdef _TARGET_AMD64_ |
5249 | // mov reg, imm64 is the only opcode which takes a full 8 byte immediate |
5250 | // all other opcodes take a sign-extended 4-byte immediate |
5251 | noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); |
5252 | #endif |
5253 | |
5254 | insFormat fmt; |
5255 | |
5256 | switch (ins) |
5257 | { |
5258 | case INS_rcl_N: |
5259 | case INS_rcr_N: |
5260 | case INS_rol_N: |
5261 | case INS_ror_N: |
5262 | case INS_shl_N: |
5263 | case INS_shr_N: |
5264 | case INS_sar_N: |
5265 | assert(val != 1); |
5266 | fmt = IF_ARW_SHF; |
5267 | val &= 0x7F; |
5268 | break; |
5269 | |
5270 | default: |
5271 | fmt = emitInsModeFormat(ins, IF_ARD_CNS); |
5272 | break; |
5273 | } |
5274 | |
5275 | UNATIVE_OFFSET sz; |
5276 | instrDesc* id = emitNewInstrAmdCns(attr, disp, val); |
5277 | id->idIns(ins); |
5278 | id->idInsFmt(fmt); |
5279 | |
5280 | id->idAddr()->iiaAddrMode.amBaseReg = reg; |
5281 | id->idAddr()->iiaAddrMode.amIndxReg = rg2; |
5282 | id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1; |
5283 | |
5284 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5285 | |
5286 | sz = emitInsSizeAM(id, insCodeMI(ins), val); |
5287 | id->idCodeSize(sz); |
5288 | |
5289 | dispIns(id); |
5290 | emitCurIGsize += sz; |
5291 | } |
5292 | |
5293 | void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, int disp) |
5294 | { |
5295 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA)); |
5296 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); |
5297 | |
5298 | UNATIVE_OFFSET sz; |
5299 | instrDesc* id = emitNewInstrAmd(attr, disp); |
5300 | insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD); |
5301 | |
5302 | id->idIns(ins); |
5303 | id->idInsFmt(fmt); |
5304 | id->idReg1(ireg); |
5305 | |
5306 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
5307 | id->idAddr()->iiaAddrMode.amIndxReg = index; |
5308 | id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1; |
5309 | |
5310 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5311 | |
5312 | sz = emitInsSizeAM(id, insCodeRM(ins)); |
5313 | id->idCodeSize(sz); |
5314 | |
5315 | dispIns(id); |
5316 | emitCurIGsize += sz; |
5317 | } |
5318 | |
5319 | void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber index, int disp) |
5320 | { |
5321 | UNATIVE_OFFSET sz; |
5322 | instrDesc* id = emitNewInstrAmd(attr, disp); |
5323 | insFormat fmt; |
5324 | |
5325 | if (ireg == REG_NA) |
5326 | { |
5327 | fmt = emitInsModeFormat(ins, IF_ARD); |
5328 | } |
5329 | else |
5330 | { |
5331 | fmt = emitInsModeFormat(ins, IF_ARD_RRD); |
5332 | |
5333 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); |
5334 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); |
5335 | |
5336 | id->idReg1(ireg); |
5337 | } |
5338 | |
5339 | id->idIns(ins); |
5340 | id->idInsFmt(fmt); |
5341 | |
5342 | id->idAddr()->iiaAddrMode.amBaseReg = reg; |
5343 | id->idAddr()->iiaAddrMode.amIndxReg = index; |
5344 | id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1); |
5345 | |
5346 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5347 | |
5348 | sz = emitInsSizeAM(id, insCodeMR(ins)); |
5349 | id->idCodeSize(sz); |
5350 | |
5351 | dispIns(id); |
5352 | emitCurIGsize += sz; |
5353 | |
5354 | emitAdjustStackDepthPushPop(ins); |
5355 | } |
5356 | |
5357 | void emitter::emitIns_I_ARX( |
5358 | instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp) |
5359 | { |
5360 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); |
5361 | |
5362 | #ifdef _TARGET_AMD64_ |
5363 | // mov reg, imm64 is the only opcode which takes a full 8 byte immediate |
5364 | // all other opcodes take a sign-extended 4-byte immediate |
5365 | noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); |
5366 | #endif |
5367 | |
5368 | insFormat fmt; |
5369 | |
5370 | switch (ins) |
5371 | { |
5372 | case INS_rcl_N: |
5373 | case INS_rcr_N: |
5374 | case INS_rol_N: |
5375 | case INS_ror_N: |
5376 | case INS_shl_N: |
5377 | case INS_shr_N: |
5378 | case INS_sar_N: |
5379 | assert(val != 1); |
5380 | fmt = IF_ARW_SHF; |
5381 | val &= 0x7F; |
5382 | break; |
5383 | |
5384 | default: |
5385 | fmt = emitInsModeFormat(ins, IF_ARD_CNS); |
5386 | break; |
5387 | } |
5388 | |
5389 | UNATIVE_OFFSET sz; |
5390 | instrDesc* id = emitNewInstrAmdCns(attr, disp, val); |
5391 | |
5392 | id->idIns(ins); |
5393 | id->idInsFmt(fmt); |
5394 | |
5395 | id->idAddr()->iiaAddrMode.amBaseReg = reg; |
5396 | id->idAddr()->iiaAddrMode.amIndxReg = rg2; |
5397 | id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); |
5398 | |
5399 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5400 | |
5401 | sz = emitInsSizeAM(id, insCodeMI(ins), val); |
5402 | id->idCodeSize(sz); |
5403 | |
5404 | dispIns(id); |
5405 | emitCurIGsize += sz; |
5406 | } |
5407 | |
5408 | void emitter::emitIns_R_ARX( |
5409 | instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp) |
5410 | { |
5411 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA)); |
5412 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); |
5413 | |
5414 | UNATIVE_OFFSET sz; |
5415 | instrDesc* id = emitNewInstrAmd(attr, disp); |
5416 | insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD); |
5417 | |
5418 | id->idIns(ins); |
5419 | id->idInsFmt(fmt); |
5420 | id->idReg1(ireg); |
5421 | |
5422 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
5423 | id->idAddr()->iiaAddrMode.amIndxReg = index; |
5424 | id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); |
5425 | |
5426 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5427 | |
5428 | sz = emitInsSizeAM(id, insCodeRM(ins)); |
5429 | id->idCodeSize(sz); |
5430 | |
5431 | dispIns(id); |
5432 | emitCurIGsize += sz; |
5433 | } |
5434 | |
5435 | void emitter::emitIns_ARX_R( |
5436 | instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp) |
5437 | { |
5438 | UNATIVE_OFFSET sz; |
5439 | instrDesc* id = emitNewInstrAmd(attr, disp); |
5440 | insFormat fmt; |
5441 | |
5442 | if (ireg == REG_NA) |
5443 | { |
5444 | fmt = emitInsModeFormat(ins, IF_ARD); |
5445 | } |
5446 | else |
5447 | { |
5448 | fmt = emitInsModeFormat(ins, IF_ARD_RRD); |
5449 | |
5450 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); |
5451 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); |
5452 | |
5453 | id->idReg1(ireg); |
5454 | } |
5455 | |
5456 | id->idIns(ins); |
5457 | id->idInsFmt(fmt); |
5458 | |
5459 | id->idAddr()->iiaAddrMode.amBaseReg = base; |
5460 | id->idAddr()->iiaAddrMode.amIndxReg = index; |
5461 | id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); |
5462 | |
5463 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5464 | |
5465 | sz = emitInsSizeAM(id, insCodeMR(ins)); |
5466 | id->idCodeSize(sz); |
5467 | |
5468 | dispIns(id); |
5469 | emitCurIGsize += sz; |
5470 | |
5471 | emitAdjustStackDepthPushPop(ins); |
5472 | } |
5473 | |
5474 | void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp) |
5475 | { |
5476 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); |
5477 | |
5478 | #ifdef _TARGET_AMD64_ |
5479 | // mov reg, imm64 is the only opcode which takes a full 8 byte immediate |
5480 | // all other opcodes take a sign-extended 4-byte immediate |
5481 | noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); |
5482 | #endif |
5483 | |
5484 | insFormat fmt; |
5485 | |
5486 | switch (ins) |
5487 | { |
5488 | case INS_rcl_N: |
5489 | case INS_rcr_N: |
5490 | case INS_rol_N: |
5491 | case INS_ror_N: |
5492 | case INS_shl_N: |
5493 | case INS_shr_N: |
5494 | case INS_sar_N: |
5495 | assert(val != 1); |
5496 | fmt = IF_ARW_SHF; |
5497 | val &= 0x7F; |
5498 | break; |
5499 | |
5500 | default: |
5501 | fmt = emitInsModeFormat(ins, IF_ARD_CNS); |
5502 | break; |
5503 | } |
5504 | |
5505 | UNATIVE_OFFSET sz; |
5506 | instrDesc* id = emitNewInstrAmdCns(attr, disp, val); |
5507 | id->idIns(ins); |
5508 | id->idInsFmt(fmt); |
5509 | |
5510 | id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; |
5511 | id->idAddr()->iiaAddrMode.amIndxReg = reg; |
5512 | id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); |
5513 | |
5514 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5515 | |
5516 | sz = emitInsSizeAM(id, insCodeMI(ins), val); |
5517 | id->idCodeSize(sz); |
5518 | |
5519 | dispIns(id); |
5520 | emitCurIGsize += sz; |
5521 | } |
5522 | |
5523 | void emitter::emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp) |
5524 | { |
5525 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA)); |
5526 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); |
5527 | |
5528 | UNATIVE_OFFSET sz; |
5529 | instrDesc* id = emitNewInstrAmd(attr, disp); |
5530 | insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD); |
5531 | |
5532 | id->idIns(ins); |
5533 | id->idInsFmt(fmt); |
5534 | id->idReg1(ireg); |
5535 | |
5536 | id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; |
5537 | id->idAddr()->iiaAddrMode.amIndxReg = reg; |
5538 | id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); |
5539 | |
5540 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5541 | |
5542 | sz = emitInsSizeAM(id, insCodeRM(ins)); |
5543 | id->idCodeSize(sz); |
5544 | |
5545 | dispIns(id); |
5546 | emitCurIGsize += sz; |
5547 | } |
5548 | |
5549 | void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp) |
5550 | { |
5551 | UNATIVE_OFFSET sz; |
5552 | instrDesc* id = emitNewInstrAmd(attr, disp); |
5553 | insFormat fmt; |
5554 | |
5555 | if (ireg == REG_NA) |
5556 | { |
5557 | fmt = emitInsModeFormat(ins, IF_ARD); |
5558 | } |
5559 | else |
5560 | { |
5561 | fmt = emitInsModeFormat(ins, IF_ARD_RRD); |
5562 | noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg)); |
5563 | assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE)); |
5564 | |
5565 | id->idReg1(ireg); |
5566 | } |
5567 | |
5568 | id->idIns(ins); |
5569 | id->idInsFmt(fmt); |
5570 | |
5571 | id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; |
5572 | id->idAddr()->iiaAddrMode.amIndxReg = reg; |
5573 | id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul); |
5574 | |
5575 | assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly |
5576 | |
5577 | sz = emitInsSizeAM(id, insCodeMR(ins)); |
5578 | id->idCodeSize(sz); |
5579 | |
5580 | dispIns(id); |
5581 | emitCurIGsize += sz; |
5582 | |
5583 | emitAdjustStackDepthPushPop(ins); |
5584 | } |
5585 | |
5586 | #ifdef FEATURE_HW_INTRINSICS |
5587 | //------------------------------------------------------------------------ |
5588 | // emitIns_SIMD_R_R_I: emits the code for a SIMD instruction that takes a register operand, an immediate operand |
5589 | // and that returns a value in register |
5590 | // |
5591 | // Arguments: |
5592 | // ins -- The instruction being emitted |
5593 | // attr -- The emit attribute |
5594 | // targetReg -- The target register |
5595 | // op1Reg -- The register of the first operand |
5596 | // ival -- The immediate value |
5597 | // |
5598 | void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival) |
5599 | { |
5600 | if (UseVEXEncoding() || IsDstSrcImmAvxInstruction(ins)) |
5601 | { |
5602 | emitIns_R_R_I(ins, attr, targetReg, op1Reg, ival); |
5603 | } |
5604 | else |
5605 | { |
5606 | if (op1Reg != targetReg) |
5607 | { |
5608 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5609 | } |
5610 | emitIns_R_I(ins, attr, targetReg, ival); |
5611 | } |
5612 | } |
5613 | |
5614 | //------------------------------------------------------------------------ |
5615 | // emitIns_SIMD_R_R_A: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address, |
5616 | // and that returns a value in register |
5617 | // |
5618 | // Arguments: |
5619 | // ins -- The instruction being emitted |
5620 | // attr -- The emit attribute |
5621 | // targetReg -- The target register |
5622 | // op1Reg -- The register of the first operand |
5623 | // indir -- The GenTreeIndir used for the memory address |
5624 | // |
5625 | void emitter::emitIns_SIMD_R_R_A( |
5626 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir) |
5627 | { |
5628 | if (UseVEXEncoding()) |
5629 | { |
5630 | emitIns_R_R_A(ins, attr, targetReg, op1Reg, indir); |
5631 | } |
5632 | else |
5633 | { |
5634 | if (op1Reg != targetReg) |
5635 | { |
5636 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5637 | } |
5638 | emitIns_R_A(ins, attr, targetReg, indir); |
5639 | } |
5640 | } |
5641 | |
5642 | //------------------------------------------------------------------------ |
5643 | // emitIns_SIMD_R_R_AR: emits the code for a SIMD instruction that takes a register operand, a base memory register, |
5644 | // and that returns a value in register |
5645 | // |
5646 | // Arguments: |
5647 | // ins -- The instruction being emitted |
5648 | // attr -- The emit attribute |
5649 | // targetReg -- The target register |
5650 | // op1Reg -- The register of the first operand |
5651 | // base -- The base register used for the memory address |
5652 | // |
5653 | void emitter::emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base) |
5654 | { |
5655 | if (UseVEXEncoding()) |
5656 | { |
5657 | emitIns_R_R_AR(ins, attr, targetReg, op1Reg, base, 0); |
5658 | } |
5659 | else |
5660 | { |
5661 | if (op1Reg != targetReg) |
5662 | { |
5663 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5664 | } |
5665 | emitIns_R_AR(ins, attr, targetReg, base, 0); |
5666 | } |
5667 | } |
5668 | |
5669 | //------------------------------------------------------------------------ |
5670 | // emitIns_SIMD_R_R_C: emits the code for a SIMD instruction that takes a register operand, a field handle + offset, |
5671 | // and that returns a value in register |
5672 | // |
5673 | // Arguments: |
5674 | // ins -- The instruction being emitted |
5675 | // attr -- The emit attribute |
5676 | // targetReg -- The target register |
5677 | // op1Reg -- The register of the first operand |
5678 | // fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address |
5679 | // offs -- The offset added to the memory address from fldHnd |
5680 | // |
5681 | void emitter::emitIns_SIMD_R_R_C( |
5682 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, CORINFO_FIELD_HANDLE fldHnd, int offs) |
5683 | { |
5684 | if (UseVEXEncoding()) |
5685 | { |
5686 | emitIns_R_R_C(ins, attr, targetReg, op1Reg, fldHnd, offs); |
5687 | } |
5688 | else |
5689 | { |
5690 | if (op1Reg != targetReg) |
5691 | { |
5692 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5693 | } |
5694 | emitIns_R_C(ins, attr, targetReg, fldHnd, offs); |
5695 | } |
5696 | } |
5697 | |
5698 | //------------------------------------------------------------------------ |
5699 | // emitIns_SIMD_R_R_R: emits the code for a SIMD instruction that takes two register operands, and that returns a |
5700 | // value in register |
5701 | // |
5702 | // Arguments: |
5703 | // ins -- The instruction being emitted |
5704 | // attr -- The emit attribute |
5705 | // targetReg -- The target register |
5706 | // op1Reg -- The register of the first operand |
5707 | // op2Reg -- The register of the second operand |
5708 | // |
5709 | void emitter::emitIns_SIMD_R_R_R( |
5710 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg) |
5711 | { |
5712 | if (UseVEXEncoding()) |
5713 | { |
5714 | emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg); |
5715 | } |
5716 | else |
5717 | { |
5718 | if (op1Reg != targetReg) |
5719 | { |
5720 | // Ensure we aren't overwriting op2 |
5721 | assert(op2Reg != targetReg); |
5722 | |
5723 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5724 | } |
5725 | emitIns_R_R(ins, attr, targetReg, op2Reg); |
5726 | } |
5727 | } |
5728 | |
5729 | //------------------------------------------------------------------------ |
5730 | // emitIns_SIMD_R_R_S: emits the code for a SIMD instruction that takes a register operand, a variable index + offset, |
5731 | // and that returns a value in register |
5732 | // |
5733 | // Arguments: |
5734 | // ins -- The instruction being emitted |
5735 | // attr -- The emit attribute |
5736 | // targetReg -- The target register |
5737 | // op1Reg -- The register of the first operand |
5738 | // varx -- The variable index used for the memory address |
5739 | // offs -- The offset added to the memory address from varx |
5740 | // |
5741 | void emitter::emitIns_SIMD_R_R_S( |
5742 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs) |
5743 | { |
5744 | if (UseVEXEncoding()) |
5745 | { |
5746 | emitIns_R_R_S(ins, attr, targetReg, op1Reg, varx, offs); |
5747 | } |
5748 | else |
5749 | { |
5750 | if (op1Reg != targetReg) |
5751 | { |
5752 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5753 | } |
5754 | emitIns_R_S(ins, attr, targetReg, varx, offs); |
5755 | } |
5756 | } |
5757 | |
5758 | //------------------------------------------------------------------------ |
5759 | // emitIns_SIMD_R_R_A_I: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address, |
5760 | // an immediate operand, and that returns a value in register |
5761 | // |
5762 | // Arguments: |
5763 | // ins -- The instruction being emitted |
5764 | // attr -- The emit attribute |
5765 | // targetReg -- The target register |
5766 | // op1Reg -- The register of the first operand |
5767 | // indir -- The GenTreeIndir used for the memory address |
5768 | // ival -- The immediate value |
5769 | // |
5770 | void emitter::emitIns_SIMD_R_R_A_I( |
5771 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir, int ival) |
5772 | { |
5773 | if (UseVEXEncoding()) |
5774 | { |
5775 | emitIns_R_R_A_I(ins, attr, targetReg, op1Reg, indir, ival, IF_RWR_RRD_ARD_CNS); |
5776 | } |
5777 | else |
5778 | { |
5779 | if (op1Reg != targetReg) |
5780 | { |
5781 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5782 | } |
5783 | emitIns_R_A_I(ins, attr, targetReg, indir, ival); |
5784 | } |
5785 | } |
5786 | |
5787 | //------------------------------------------------------------------------ |
5788 | // emitIns_SIMD_R_R_AR_I: emits the code for a SIMD instruction that takes a register operand, a base memory register, |
5789 | // an immediate operand, and that returns a value in register |
5790 | // |
5791 | // Arguments: |
5792 | // ins -- The instruction being emitted |
5793 | // attr -- The emit attribute |
5794 | // targetReg -- The target register |
5795 | // op1Reg -- The register of the first operand |
5796 | // base -- The base register used for the memory address |
5797 | // ival -- The immediate value |
5798 | // |
5799 | void emitter::emitIns_SIMD_R_R_AR_I( |
5800 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base, int ival) |
5801 | { |
5802 | if (UseVEXEncoding()) |
5803 | { |
5804 | emitIns_R_R_AR_I(ins, attr, targetReg, op1Reg, base, 0, ival); |
5805 | } |
5806 | else |
5807 | { |
5808 | if (op1Reg != targetReg) |
5809 | { |
5810 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5811 | } |
5812 | emitIns_R_AR_I(ins, attr, targetReg, base, 0, ival); |
5813 | } |
5814 | } |
5815 | |
5816 | //------------------------------------------------------------------------ |
5817 | // emitIns_SIMD_R_R_C_I: emits the code for a SIMD instruction that takes a register operand, a field handle + offset, |
5818 | // an immediate operand, and that returns a value in register |
5819 | // |
5820 | // Arguments: |
5821 | // ins -- The instruction being emitted |
5822 | // attr -- The emit attribute |
5823 | // targetReg -- The target register |
5824 | // op1Reg -- The register of the first operand |
5825 | // fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address |
5826 | // offs -- The offset added to the memory address from fldHnd |
5827 | // ival -- The immediate value |
5828 | // |
5829 | void emitter::emitIns_SIMD_R_R_C_I(instruction ins, |
5830 | emitAttr attr, |
5831 | regNumber targetReg, |
5832 | regNumber op1Reg, |
5833 | CORINFO_FIELD_HANDLE fldHnd, |
5834 | int offs, |
5835 | int ival) |
5836 | { |
5837 | if (UseVEXEncoding()) |
5838 | { |
5839 | emitIns_R_R_C_I(ins, attr, targetReg, op1Reg, fldHnd, offs, ival); |
5840 | } |
5841 | else |
5842 | { |
5843 | if (op1Reg != targetReg) |
5844 | { |
5845 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5846 | } |
5847 | emitIns_R_C_I(ins, attr, targetReg, fldHnd, offs, ival); |
5848 | } |
5849 | } |
5850 | |
5851 | //------------------------------------------------------------------------ |
5852 | // emitIns_SIMD_R_R_R_I: emits the code for a SIMD instruction that takes two register operands, an immediate operand, |
5853 | // and that returns a value in register |
5854 | // |
5855 | // Arguments: |
5856 | // ins -- The instruction being emitted |
5857 | // attr -- The emit attribute |
5858 | // targetReg -- The target register |
5859 | // op1Reg -- The register of the first operand |
5860 | // op2Reg -- The register of the second operand |
5861 | // ival -- The immediate value |
5862 | // |
5863 | void emitter::emitIns_SIMD_R_R_R_I( |
5864 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int ival) |
5865 | { |
5866 | if (UseVEXEncoding()) |
5867 | { |
5868 | emitIns_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, ival); |
5869 | } |
5870 | else |
5871 | { |
5872 | if (op1Reg != targetReg) |
5873 | { |
5874 | // Ensure we aren't overwriting op2 |
5875 | assert(op2Reg != targetReg); |
5876 | |
5877 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5878 | } |
5879 | emitIns_R_R_I(ins, attr, targetReg, op2Reg, ival); |
5880 | } |
5881 | } |
5882 | |
5883 | //------------------------------------------------------------------------ |
5884 | // emitIns_SIMD_R_R_S_I: emits the code for a SIMD instruction that takes a register operand, a variable index + offset, |
5885 | // an imediate operand, and that returns a value in register |
5886 | // |
5887 | // Arguments: |
5888 | // ins -- The instruction being emitted |
5889 | // attr -- The emit attribute |
5890 | // targetReg -- The target register |
5891 | // op1Reg -- The register of the first operand |
5892 | // varx -- The variable index used for the memory address |
5893 | // offs -- The offset added to the memory address from varx |
5894 | // ival -- The immediate value |
5895 | // |
5896 | void emitter::emitIns_SIMD_R_R_S_I( |
5897 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs, int ival) |
5898 | { |
5899 | if (UseVEXEncoding()) |
5900 | { |
5901 | emitIns_R_R_S_I(ins, attr, targetReg, op1Reg, varx, offs, ival); |
5902 | } |
5903 | else |
5904 | { |
5905 | if (op1Reg != targetReg) |
5906 | { |
5907 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5908 | } |
5909 | emitIns_R_S_I(ins, attr, targetReg, varx, offs, ival); |
5910 | } |
5911 | } |
5912 | |
5913 | //------------------------------------------------------------------------ |
5914 | // emitIns_SIMD_R_R_R_A: emits the code for a SIMD instruction that takes two register operands, a GenTreeIndir address, |
5915 | // and that returns a value in register |
5916 | // |
5917 | // Arguments: |
5918 | // ins -- The instruction being emitted |
5919 | // attr -- The emit attribute |
5920 | // targetReg -- The target register |
5921 | // op1Reg -- The register of the first operand |
5922 | // op2Reg -- The register of the second operand |
5923 | // indir -- The GenTreeIndir used for the memory address |
5924 | // |
5925 | void emitter::emitIns_SIMD_R_R_R_A( |
5926 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir) |
5927 | { |
5928 | assert(IsFMAInstruction(ins)); |
5929 | assert(UseVEXEncoding()); |
5930 | |
5931 | if (op1Reg != targetReg) |
5932 | { |
5933 | // Ensure we aren't overwriting op2 |
5934 | assert(op2Reg != targetReg); |
5935 | |
5936 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5937 | } |
5938 | |
5939 | emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir); |
5940 | } |
5941 | |
5942 | //------------------------------------------------------------------------ |
5943 | // emitIns_SIMD_R_R_R_AR: emits the code for a SIMD instruction that takes two register operands, a base memory |
5944 | // register, and that returns a value in register |
5945 | // |
5946 | // Arguments: |
5947 | // ins -- The instruction being emitted |
5948 | // attr -- The emit attribute |
5949 | // targetReg -- The target register |
5950 | // op1Reg -- The register of the first operands |
5951 | // op2Reg -- The register of the second operand |
5952 | // base -- The base register used for the memory address |
5953 | // |
5954 | void emitter::emitIns_SIMD_R_R_R_AR( |
5955 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber base) |
5956 | { |
5957 | assert(IsFMAInstruction(ins)); |
5958 | assert(UseVEXEncoding()); |
5959 | |
5960 | if (op1Reg != targetReg) |
5961 | { |
5962 | // Ensure we aren't overwriting op2 |
5963 | assert(op2Reg != targetReg); |
5964 | |
5965 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
5966 | } |
5967 | |
5968 | emitIns_R_R_AR(ins, attr, targetReg, op2Reg, base, 0); |
5969 | } |
5970 | |
5971 | //------------------------------------------------------------------------ |
5972 | // emitIns_SIMD_R_R_R_C: emits the code for a SIMD instruction that takes two register operands, a field handle + |
5973 | // offset, and that returns a value in register |
5974 | // |
5975 | // Arguments: |
5976 | // ins -- The instruction being emitted |
5977 | // attr -- The emit attribute |
5978 | // targetReg -- The target register |
5979 | // op1Reg -- The register of the first operand |
5980 | // op2Reg -- The register of the second operand |
5981 | // fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address |
5982 | // offs -- The offset added to the memory address from fldHnd |
5983 | // |
5984 | void emitter::emitIns_SIMD_R_R_R_C(instruction ins, |
5985 | emitAttr attr, |
5986 | regNumber targetReg, |
5987 | regNumber op1Reg, |
5988 | regNumber op2Reg, |
5989 | CORINFO_FIELD_HANDLE fldHnd, |
5990 | int offs) |
5991 | { |
5992 | assert(IsFMAInstruction(ins)); |
5993 | assert(UseVEXEncoding()); |
5994 | |
5995 | if (op1Reg != targetReg) |
5996 | { |
5997 | // Ensure we aren't overwriting op2 |
5998 | assert(op2Reg != targetReg); |
5999 | |
6000 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
6001 | } |
6002 | |
6003 | emitIns_R_R_C(ins, attr, targetReg, op2Reg, fldHnd, offs); |
6004 | } |
6005 | |
6006 | //------------------------------------------------------------------------ |
6007 | // emitIns_SIMD_R_R_R_R: emits the code for a SIMD instruction that takes three register operands, and that returns a |
6008 | // value in register |
6009 | // |
6010 | // Arguments: |
6011 | // ins -- The instruction being emitted |
6012 | // attr -- The emit attribute |
6013 | // targetReg -- The target register |
6014 | // op1Reg -- The register of the first operand |
6015 | // op2Reg -- The register of the second operand |
6016 | // op3Reg -- The register of the second operand |
6017 | // |
6018 | void emitter::emitIns_SIMD_R_R_R_R( |
6019 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg) |
6020 | { |
6021 | if (IsFMAInstruction(ins)) |
6022 | { |
6023 | assert(UseVEXEncoding()); |
6024 | |
6025 | if (op1Reg != targetReg) |
6026 | { |
6027 | // Ensure we aren't overwriting op2 or op3 |
6028 | |
6029 | assert(op2Reg != targetReg); |
6030 | assert(op3Reg != targetReg); |
6031 | |
6032 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
6033 | } |
6034 | |
6035 | emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg); |
6036 | } |
6037 | else if (UseVEXEncoding()) |
6038 | { |
6039 | assert(isAvxBlendv(ins) || isSse41Blendv(ins)); |
6040 | |
6041 | // convert SSE encoding of SSE4.1 instructions to VEX encoding |
6042 | switch (ins) |
6043 | { |
6044 | case INS_blendvps: |
6045 | ins = INS_vblendvps; |
6046 | break; |
6047 | case INS_blendvpd: |
6048 | ins = INS_vblendvpd; |
6049 | break; |
6050 | case INS_pblendvb: |
6051 | ins = INS_vpblendvb; |
6052 | break; |
6053 | default: |
6054 | break; |
6055 | } |
6056 | emitIns_R_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, op3Reg); |
6057 | } |
6058 | else |
6059 | { |
6060 | assert(isSse41Blendv(ins)); |
6061 | // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 |
6062 | if (op3Reg != REG_XMM0) |
6063 | { |
6064 | // Ensure we aren't overwriting op1 or op2 |
6065 | assert(op1Reg != REG_XMM0); |
6066 | assert(op2Reg != REG_XMM0); |
6067 | |
6068 | emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg); |
6069 | } |
6070 | if (op1Reg != targetReg) |
6071 | { |
6072 | // Ensure we aren't overwriting op2 or oop3 (which should be REG_XMM0) |
6073 | assert(op2Reg != targetReg); |
6074 | assert(targetReg != REG_XMM0); |
6075 | |
6076 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
6077 | } |
6078 | emitIns_R_R(ins, attr, targetReg, op2Reg); |
6079 | } |
6080 | } |
6081 | |
6082 | //------------------------------------------------------------------------ |
6083 | // emitIns_SIMD_R_R_R_S: emits the code for a SIMD instruction that takes two register operands, a variable index + |
6084 | // offset, and that returns a value in register |
6085 | // |
6086 | // Arguments: |
6087 | // ins -- The instruction being emitted |
6088 | // attr -- The emit attribute |
6089 | // targetReg -- The target register |
6090 | // op1Reg -- The register of the first operand |
6091 | // op2Reg -- The register of the second operand |
6092 | // varx -- The variable index used for the memory address |
6093 | // offs -- The offset added to the memory address from varx |
6094 | // |
6095 | void emitter::emitIns_SIMD_R_R_R_S( |
6096 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs) |
6097 | { |
6098 | assert(IsFMAInstruction(ins)); |
6099 | assert(UseVEXEncoding()); |
6100 | |
6101 | if (op1Reg != targetReg) |
6102 | { |
6103 | // Ensure we aren't overwriting op2 |
6104 | assert(op2Reg != targetReg); |
6105 | |
6106 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
6107 | } |
6108 | |
6109 | emitIns_R_R_S(ins, attr, targetReg, op2Reg, varx, offs); |
6110 | } |
6111 | |
6112 | //------------------------------------------------------------------------ |
6113 | // emitIns_SIMD_R_R_A_R: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address, |
6114 | // another register operand, and that returns a value in register |
6115 | // |
6116 | // Arguments: |
6117 | // ins -- The instruction being emitted |
6118 | // attr -- The emit attribute |
6119 | // targetReg -- The target register |
6120 | // op1Reg -- The register of the first operand |
6121 | // op3Reg -- The register of the third operand |
6122 | // indir -- The GenTreeIndir used for the memory address |
6123 | // |
6124 | void emitter::emitIns_SIMD_R_R_A_R( |
6125 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir) |
6126 | { |
6127 | if (UseVEXEncoding()) |
6128 | { |
6129 | assert(isAvxBlendv(ins) || isSse41Blendv(ins)); |
6130 | |
6131 | // convert SSE encoding of SSE4.1 instructions to VEX encoding |
6132 | switch (ins) |
6133 | { |
6134 | case INS_blendvps: |
6135 | { |
6136 | ins = INS_vblendvps; |
6137 | break; |
6138 | } |
6139 | |
6140 | case INS_blendvpd: |
6141 | { |
6142 | ins = INS_vblendvpd; |
6143 | break; |
6144 | } |
6145 | |
6146 | case INS_pblendvb: |
6147 | { |
6148 | ins = INS_vpblendvb; |
6149 | break; |
6150 | } |
6151 | |
6152 | default: |
6153 | { |
6154 | break; |
6155 | } |
6156 | } |
6157 | |
6158 | emitIns_R_R_A_R(ins, attr, targetReg, op1Reg, op3Reg, indir); |
6159 | } |
6160 | else |
6161 | { |
6162 | assert(isSse41Blendv(ins)); |
6163 | |
6164 | // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 |
6165 | if (op3Reg != REG_XMM0) |
6166 | { |
6167 | // Ensure we aren't overwriting op1 |
6168 | assert(op1Reg != REG_XMM0); |
6169 | |
6170 | emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg); |
6171 | } |
6172 | if (op1Reg != targetReg) |
6173 | { |
6174 | // Ensure we aren't overwriting op3 (which should be REG_XMM0) |
6175 | assert(targetReg != REG_XMM0); |
6176 | |
6177 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
6178 | } |
6179 | |
6180 | emitIns_R_A(ins, attr, targetReg, indir); |
6181 | } |
6182 | } |
6183 | |
6184 | //------------------------------------------------------------------------ |
6185 | // emitIns_SIMD_R_R_AR_R: emits the code for a SIMD instruction that takes a register operand, a base memory |
6186 | // register, another register operand, and that returns a value in register |
6187 | // |
6188 | // Arguments: |
6189 | // ins -- The instruction being emitted |
6190 | // attr -- The emit attribute |
6191 | // targetReg -- The target register |
6192 | // op1Reg -- The register of the first operands |
6193 | // op3Reg -- The register of the third operand |
6194 | // base -- The base register used for the memory address |
6195 | // |
6196 | void emitter::emitIns_SIMD_R_R_AR_R( |
6197 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base) |
6198 | { |
6199 | if (UseVEXEncoding()) |
6200 | { |
6201 | assert(isAvxBlendv(ins) || isSse41Blendv(ins)); |
6202 | |
6203 | // convert SSE encoding of SSE4.1 instructions to VEX encoding |
6204 | switch (ins) |
6205 | { |
6206 | case INS_blendvps: |
6207 | { |
6208 | ins = INS_vblendvps; |
6209 | break; |
6210 | } |
6211 | |
6212 | case INS_blendvpd: |
6213 | { |
6214 | ins = INS_vblendvpd; |
6215 | break; |
6216 | } |
6217 | |
6218 | case INS_pblendvb: |
6219 | { |
6220 | ins = INS_vpblendvb; |
6221 | break; |
6222 | } |
6223 | |
6224 | default: |
6225 | { |
6226 | break; |
6227 | } |
6228 | } |
6229 | |
6230 | emitIns_R_R_AR_R(ins, attr, targetReg, op1Reg, op3Reg, base, 0); |
6231 | } |
6232 | else |
6233 | { |
6234 | assert(isSse41Blendv(ins)); |
6235 | |
6236 | // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 |
6237 | if (op3Reg != REG_XMM0) |
6238 | { |
6239 | // Ensure we aren't overwriting op1 |
6240 | assert(op1Reg != REG_XMM0); |
6241 | |
6242 | emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg); |
6243 | } |
6244 | if (op1Reg != targetReg) |
6245 | { |
6246 | // Ensure we aren't overwriting op3 (which should be REG_XMM0) |
6247 | assert(targetReg != REG_XMM0); |
6248 | |
6249 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
6250 | } |
6251 | |
6252 | emitIns_R_AR(ins, attr, targetReg, base, 0); |
6253 | } |
6254 | } |
6255 | |
6256 | //------------------------------------------------------------------------ |
6257 | // emitIns_SIMD_R_R_C_R: emits the code for a SIMD instruction that takes a register operand, a field handle + |
6258 | // offset, another register operand, and that returns a value in register |
6259 | // |
6260 | // Arguments: |
6261 | // ins -- The instruction being emitted |
6262 | // attr -- The emit attribute |
6263 | // targetReg -- The target register |
6264 | // op1Reg -- The register of the first operand |
6265 | // op3Reg -- The register of the third operand |
6266 | // fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address |
6267 | // offs -- The offset added to the memory address from fldHnd |
6268 | // |
6269 | void emitter::emitIns_SIMD_R_R_C_R(instruction ins, |
6270 | emitAttr attr, |
6271 | regNumber targetReg, |
6272 | regNumber op1Reg, |
6273 | regNumber op3Reg, |
6274 | CORINFO_FIELD_HANDLE fldHnd, |
6275 | int offs) |
6276 | { |
6277 | if (UseVEXEncoding()) |
6278 | { |
6279 | assert(isAvxBlendv(ins) || isSse41Blendv(ins)); |
6280 | |
6281 | // convert SSE encoding of SSE4.1 instructions to VEX encoding |
6282 | switch (ins) |
6283 | { |
6284 | case INS_blendvps: |
6285 | { |
6286 | ins = INS_vblendvps; |
6287 | break; |
6288 | } |
6289 | |
6290 | case INS_blendvpd: |
6291 | { |
6292 | ins = INS_vblendvpd; |
6293 | break; |
6294 | } |
6295 | |
6296 | case INS_pblendvb: |
6297 | { |
6298 | ins = INS_vpblendvb; |
6299 | break; |
6300 | } |
6301 | |
6302 | default: |
6303 | { |
6304 | break; |
6305 | } |
6306 | } |
6307 | |
6308 | emitIns_R_R_C_R(ins, attr, targetReg, op1Reg, op3Reg, fldHnd, offs); |
6309 | } |
6310 | else |
6311 | { |
6312 | assert(isSse41Blendv(ins)); |
6313 | |
6314 | // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 |
6315 | if (op3Reg != REG_XMM0) |
6316 | { |
6317 | // Ensure we aren't overwriting op1 |
6318 | assert(op1Reg != REG_XMM0); |
6319 | |
6320 | emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg); |
6321 | } |
6322 | if (op1Reg != targetReg) |
6323 | { |
6324 | // Ensure we aren't overwriting op3 (which should be REG_XMM0) |
6325 | assert(targetReg != REG_XMM0); |
6326 | |
6327 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
6328 | } |
6329 | |
6330 | emitIns_R_C(ins, attr, targetReg, fldHnd, offs); |
6331 | } |
6332 | } |
6333 | |
6334 | //------------------------------------------------------------------------ |
6335 | // emitIns_SIMD_R_R_S_R: emits the code for a SIMD instruction that takes a register operand, a variable index + |
6336 | // offset, another register operand, and that returns a value in register |
6337 | // |
6338 | // Arguments: |
6339 | // ins -- The instruction being emitted |
6340 | // attr -- The emit attribute |
6341 | // targetReg -- The target register |
6342 | // op1Reg -- The register of the first operand |
6343 | // op3Reg -- The register of the third operand |
6344 | // varx -- The variable index used for the memory address |
6345 | // offs -- The offset added to the memory address from varx |
6346 | // |
6347 | void emitter::emitIns_SIMD_R_R_S_R( |
6348 | instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs) |
6349 | { |
6350 | if (UseVEXEncoding()) |
6351 | { |
6352 | assert(isAvxBlendv(ins) || isSse41Blendv(ins)); |
6353 | |
6354 | // convert SSE encoding of SSE4.1 instructions to VEX encoding |
6355 | switch (ins) |
6356 | { |
6357 | case INS_blendvps: |
6358 | { |
6359 | ins = INS_vblendvps; |
6360 | break; |
6361 | } |
6362 | |
6363 | case INS_blendvpd: |
6364 | { |
6365 | ins = INS_vblendvpd; |
6366 | break; |
6367 | } |
6368 | |
6369 | case INS_pblendvb: |
6370 | { |
6371 | ins = INS_vpblendvb; |
6372 | break; |
6373 | } |
6374 | |
6375 | default: |
6376 | { |
6377 | break; |
6378 | } |
6379 | } |
6380 | |
6381 | emitIns_R_R_S_R(ins, attr, targetReg, op1Reg, op3Reg, varx, offs); |
6382 | } |
6383 | else |
6384 | { |
6385 | assert(isSse41Blendv(ins)); |
6386 | |
6387 | // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 |
6388 | if (op3Reg != REG_XMM0) |
6389 | { |
6390 | // Ensure we aren't overwriting op1 |
6391 | assert(op1Reg != REG_XMM0); |
6392 | |
6393 | emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg); |
6394 | } |
6395 | if (op1Reg != targetReg) |
6396 | { |
6397 | // Ensure we aren't overwriting op3 (which should be REG_XMM0) |
6398 | assert(targetReg != REG_XMM0); |
6399 | |
6400 | emitIns_R_R(INS_movaps, attr, targetReg, op1Reg); |
6401 | } |
6402 | |
6403 | emitIns_R_S(ins, attr, targetReg, varx, offs); |
6404 | } |
6405 | } |
6406 | #endif // FEATURE_HW_INTRINSICS |
6407 | |
6408 | /***************************************************************************** |
6409 | * |
6410 | * The following add instructions referencing stack-based local variables. |
6411 | */ |
6412 | |
6413 | void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) |
6414 | { |
6415 | instrDesc* id = emitNewInstr(attr); |
6416 | UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs); |
6417 | insFormat fmt = emitInsModeFormat(ins, IF_SRD); |
6418 | |
6419 | // 16-bit operand instructions will need a prefix |
6420 | if (EA_SIZE(attr) == EA_2BYTE) |
6421 | { |
6422 | sz += 1; |
6423 | } |
6424 | |
6425 | // VEX prefix |
6426 | sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); |
6427 | |
6428 | // 64-bit operand instructions will need a REX.W prefix |
6429 | if (TakesRexWPrefix(ins, attr)) |
6430 | { |
6431 | sz += emitGetRexPrefixSize(ins); |
6432 | } |
6433 | |
6434 | id->idIns(ins); |
6435 | id->idInsFmt(fmt); |
6436 | id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); |
6437 | id->idCodeSize(sz); |
6438 | |
6439 | #ifdef DEBUG |
6440 | id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; |
6441 | #endif |
6442 | dispIns(id); |
6443 | emitCurIGsize += sz; |
6444 | |
6445 | emitAdjustStackDepthPushPop(ins); |
6446 | } |
6447 | |
6448 | void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs) |
6449 | { |
6450 | instrDesc* id = emitNewInstr(attr); |
6451 | UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs); |
6452 | insFormat fmt = emitInsModeFormat(ins, IF_SRD_RRD); |
6453 | |
6454 | #ifdef _TARGET_X86_ |
6455 | if (attr == EA_1BYTE) |
6456 | { |
6457 | assert(isByteReg(ireg)); |
6458 | } |
6459 | #endif |
6460 | // 16-bit operand instructions will need a prefix |
6461 | if (EA_SIZE(attr) == EA_2BYTE) |
6462 | { |
6463 | sz++; |
6464 | } |
6465 | |
6466 | // VEX prefix |
6467 | sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); |
6468 | |
6469 | // 64-bit operand instructions will need a REX.W prefix |
6470 | if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr)) |
6471 | { |
6472 | sz += emitGetRexPrefixSize(ins); |
6473 | } |
6474 | |
6475 | id->idIns(ins); |
6476 | id->idInsFmt(fmt); |
6477 | id->idReg1(ireg); |
6478 | id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); |
6479 | id->idCodeSize(sz); |
6480 | #ifdef DEBUG |
6481 | id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; |
6482 | #endif |
6483 | dispIns(id); |
6484 | emitCurIGsize += sz; |
6485 | } |
6486 | |
6487 | void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs) |
6488 | { |
6489 | emitAttr size = EA_SIZE(attr); |
6490 | noway_assert(emitVerifyEncodable(ins, size, ireg)); |
6491 | |
6492 | instrDesc* id = emitNewInstr(attr); |
6493 | UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs); |
6494 | insFormat fmt = emitInsModeFormat(ins, IF_RRD_SRD); |
6495 | |
6496 | // Most 16-bit operand instructions need a prefix |
6497 | if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx) |
6498 | { |
6499 | sz++; |
6500 | } |
6501 | |
6502 | // VEX prefix |
6503 | sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); |
6504 | |
6505 | // 64-bit operand instructions will need a REX.W prefix |
6506 | if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr)) |
6507 | { |
6508 | sz += emitGetRexPrefixSize(ins); |
6509 | } |
6510 | |
6511 | if (ins == INS_crc32) |
6512 | { |
6513 | sz += 1; |
6514 | } |
6515 | |
6516 | id->idIns(ins); |
6517 | id->idInsFmt(fmt); |
6518 | id->idReg1(ireg); |
6519 | id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); |
6520 | id->idCodeSize(sz); |
6521 | #ifdef DEBUG |
6522 | id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; |
6523 | #endif |
6524 | dispIns(id); |
6525 | emitCurIGsize += sz; |
6526 | } |
6527 | |
6528 | void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val) |
6529 | { |
6530 | #ifdef _TARGET_AMD64_ |
6531 | // mov reg, imm64 is the only opcode which takes a full 8 byte immediate |
6532 | // all other opcodes take a sign-extended 4-byte immediate |
6533 | noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr)); |
6534 | #endif |
6535 | |
6536 | insFormat fmt; |
6537 | |
6538 | switch (ins) |
6539 | { |
6540 | case INS_rcl_N: |
6541 | case INS_rcr_N: |
6542 | case INS_rol_N: |
6543 | case INS_ror_N: |
6544 | case INS_shl_N: |
6545 | case INS_shr_N: |
6546 | case INS_sar_N: |
6547 | assert(val != 1); |
6548 | fmt = IF_SRW_SHF; |
6549 | val &= 0x7F; |
6550 | break; |
6551 | |
6552 | default: |
6553 | fmt = emitInsModeFormat(ins, IF_SRD_CNS); |
6554 | break; |
6555 | } |
6556 | |
6557 | instrDesc* id = emitNewInstrCns(attr, val); |
6558 | id->idIns(ins); |
6559 | id->idInsFmt(fmt); |
6560 | UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMI(ins), varx, offs, val); |
6561 | |
6562 | // VEX prefix |
6563 | sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins)); |
6564 | |
6565 | // 64-bit operand instructions will need a REX.W prefix |
6566 | if (TakesRexWPrefix(ins, attr)) |
6567 | { |
6568 | sz += emitGetRexPrefixSize(ins); |
6569 | } |
6570 | |
6571 | id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); |
6572 | id->idCodeSize(sz); |
6573 | #ifdef DEBUG |
6574 | id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; |
6575 | #endif |
6576 | dispIns(id); |
6577 | emitCurIGsize += sz; |
6578 | } |
6579 | |
6580 | /***************************************************************************** |
6581 | * |
6582 | * Record that a jump instruction uses the short encoding |
6583 | * |
6584 | */ |
6585 | void emitter::emitSetShortJump(instrDescJmp* id) |
6586 | { |
6587 | if (id->idjKeepLong) |
6588 | { |
6589 | return; |
6590 | } |
6591 | |
6592 | id->idjShort = true; |
6593 | } |
6594 | |
6595 | /***************************************************************************** |
6596 | * |
6597 | * Add a jmp instruction. |
6598 | */ |
6599 | |
6600 | void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */) |
6601 | { |
6602 | UNATIVE_OFFSET sz; |
6603 | instrDescJmp* id = emitNewInstrJmp(); |
6604 | |
6605 | assert(dst->bbFlags & BBF_JMP_TARGET); |
6606 | |
6607 | id->idIns(ins); |
6608 | id->idInsFmt(IF_LABEL); |
6609 | id->idAddr()->iiaBBlabel = dst; |
6610 | |
6611 | #ifdef DEBUG |
6612 | // Mark the finally call |
6613 | if (ins == INS_call && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY) |
6614 | { |
6615 | id->idDebugOnlyInfo()->idFinallyCall = true; |
6616 | } |
6617 | #endif // DEBUG |
6618 | |
6619 | /* Assume the jump will be long */ |
6620 | |
6621 | id->idjShort = 0; |
6622 | id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); |
6623 | |
6624 | /* Record the jump's IG and offset within it */ |
6625 | |
6626 | id->idjIG = emitCurIG; |
6627 | id->idjOffs = emitCurIGsize; |
6628 | |
6629 | /* Append this jump to this IG's jump list */ |
6630 | |
6631 | id->idjNext = emitCurIGjmpList; |
6632 | emitCurIGjmpList = id; |
6633 | |
6634 | #if EMITTER_STATS |
6635 | emitTotalIGjmps++; |
6636 | #endif |
6637 | |
6638 | /* Figure out the max. size of the jump/call instruction */ |
6639 | |
6640 | if (ins == INS_call) |
6641 | { |
6642 | sz = CALL_INST_SIZE; |
6643 | } |
6644 | else if (ins == INS_push || ins == INS_push_hide) |
6645 | { |
6646 | // Pushing the address of a basicBlock will need a reloc |
6647 | // as the instruction uses the absolute address, |
6648 | // not a relative address |
6649 | if (emitComp->opts.compReloc) |
6650 | { |
6651 | id->idSetIsDspReloc(); |
6652 | } |
6653 | sz = PUSH_INST_SIZE; |
6654 | } |
6655 | else |
6656 | { |
6657 | insGroup* tgt; |
6658 | |
6659 | /* This is a jump - assume the worst */ |
6660 | |
6661 | sz = (ins == INS_jmp) ? JMP_SIZE_LARGE : JCC_SIZE_LARGE; |
6662 | |
6663 | /* Can we guess at the jump distance? */ |
6664 | |
6665 | tgt = (insGroup*)emitCodeGetCookie(dst); |
6666 | |
6667 | if (tgt) |
6668 | { |
6669 | int ; |
6670 | UNATIVE_OFFSET srcOffs; |
6671 | int jmpDist; |
6672 | |
6673 | assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL); |
6674 | |
6675 | /* This is a backward jump - figure out the distance */ |
6676 | |
6677 | srcOffs = emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL; |
6678 | |
6679 | /* Compute the distance estimate */ |
6680 | |
6681 | jmpDist = srcOffs - tgt->igOffs; |
6682 | assert((int)jmpDist > 0); |
6683 | |
6684 | /* How much beyond the max. short distance does the jump go? */ |
6685 | |
6686 | extra = jmpDist + JMP_DIST_SMALL_MAX_NEG; |
6687 | |
6688 | #if DEBUG_EMIT |
6689 | if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) |
6690 | { |
6691 | if (INTERESTING_JUMP_NUM == 0) |
6692 | { |
6693 | printf("[0] Jump %u:\n" , id->idDebugOnlyInfo()->idNum); |
6694 | } |
6695 | printf("[0] Jump source is at %08X\n" , srcOffs); |
6696 | printf("[0] Label block is at %08X\n" , tgt->igOffs); |
6697 | printf("[0] Jump distance - %04X\n" , jmpDist); |
6698 | if (extra > 0) |
6699 | { |
6700 | printf("[0] Distance excess = %d \n" , extra); |
6701 | } |
6702 | } |
6703 | #endif |
6704 | |
6705 | if (extra <= 0 && !id->idjKeepLong) |
6706 | { |
6707 | /* Wonderful - this jump surely will be short */ |
6708 | |
6709 | emitSetShortJump(id); |
6710 | sz = JMP_SIZE_SMALL; |
6711 | } |
6712 | } |
6713 | #if DEBUG_EMIT |
6714 | else |
6715 | { |
6716 | if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) |
6717 | { |
6718 | if (INTERESTING_JUMP_NUM == 0) |
6719 | { |
6720 | printf("[0] Jump %u:\n" , id->idDebugOnlyInfo()->idNum); |
6721 | } |
6722 | printf("[0] Jump source is at %04X/%08X\n" , emitCurIGsize, |
6723 | emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL); |
6724 | printf("[0] Label block is unknown\n" ); |
6725 | } |
6726 | } |
6727 | #endif |
6728 | } |
6729 | |
6730 | id->idCodeSize(sz); |
6731 | |
6732 | dispIns(id); |
6733 | emitCurIGsize += sz; |
6734 | |
6735 | emitAdjustStackDepthPushPop(ins); |
6736 | } |
6737 | |
6738 | #if !FEATURE_FIXED_OUT_ARGS |
6739 | |
6740 | //------------------------------------------------------------------------ |
6741 | // emitAdjustStackDepthPushPop: Adjust the current and maximum stack depth. |
6742 | // |
6743 | // Arguments: |
6744 | // ins - the instruction. Only INS_push and INS_pop adjust the stack depth. |
6745 | // |
6746 | // Notes: |
6747 | // 1. Alters emitCurStackLvl and possibly emitMaxStackDepth. |
6748 | // 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere) |
6749 | // |
6750 | void emitter::emitAdjustStackDepthPushPop(instruction ins) |
6751 | { |
6752 | if (ins == INS_push) |
6753 | { |
6754 | emitCurStackLvl += emitCntStackDepth; |
6755 | |
6756 | if (emitMaxStackDepth < emitCurStackLvl) |
6757 | { |
6758 | JITDUMP("Upping emitMaxStackDepth from %d to %d\n" , emitMaxStackDepth, emitCurStackLvl); |
6759 | emitMaxStackDepth = emitCurStackLvl; |
6760 | } |
6761 | } |
6762 | else if (ins == INS_pop) |
6763 | { |
6764 | emitCurStackLvl -= emitCntStackDepth; |
6765 | assert((int)emitCurStackLvl >= 0); |
6766 | } |
6767 | } |
6768 | |
6769 | //------------------------------------------------------------------------ |
6770 | // emitAdjustStackDepth: Adjust the current and maximum stack depth. |
6771 | // |
6772 | // Arguments: |
6773 | // ins - the instruction. Only INS_add and INS_sub adjust the stack depth. |
6774 | // It is assumed that the add/sub is on the stack pointer. |
6775 | // val - the number of bytes to add to or subtract from the stack pointer. |
6776 | // |
6777 | // Notes: |
6778 | // 1. Alters emitCurStackLvl and possibly emitMaxStackDepth. |
6779 | // 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere) |
6780 | // |
6781 | void emitter::emitAdjustStackDepth(instruction ins, ssize_t val) |
6782 | { |
6783 | // If we're in the prolog or epilog, or otherwise not tracking the stack depth, just return. |
6784 | if (emitCntStackDepth == 0) |
6785 | return; |
6786 | |
6787 | if (ins == INS_sub) |
6788 | { |
6789 | S_UINT32 newStackLvl(emitCurStackLvl); |
6790 | newStackLvl += S_UINT32(val); |
6791 | noway_assert(!newStackLvl.IsOverflow()); |
6792 | |
6793 | emitCurStackLvl = newStackLvl.Value(); |
6794 | |
6795 | if (emitMaxStackDepth < emitCurStackLvl) |
6796 | { |
6797 | JITDUMP("Upping emitMaxStackDepth from %d to %d\n" , emitMaxStackDepth, emitCurStackLvl); |
6798 | emitMaxStackDepth = emitCurStackLvl; |
6799 | } |
6800 | } |
6801 | else if (ins == INS_add) |
6802 | { |
6803 | S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val); |
6804 | noway_assert(!newStackLvl.IsOverflow()); |
6805 | |
6806 | emitCurStackLvl = newStackLvl.Value(); |
6807 | } |
6808 | } |
6809 | |
6810 | #endif // EMIT_TRACK_STACK_DEPTH |
6811 | |
6812 | /***************************************************************************** |
6813 | * |
6814 | * Add a call instruction (direct or indirect). |
6815 | * argSize<0 means that the caller will pop the arguments |
6816 | * |
6817 | * The other arguments are interpreted depending on callType as shown: |
6818 | * Unless otherwise specified, ireg,xreg,xmul,disp should have default values. |
6819 | * |
6820 | * EC_FUNC_TOKEN : addr is the method address |
6821 | * EC_FUNC_TOKEN_INDIR : addr is the indirect method address |
6822 | * EC_FUNC_ADDR : addr is the absolute address of the function |
6823 | * EC_FUNC_VIRTUAL : "call [ireg+disp]" |
6824 | * |
6825 | * If callType is one of these emitCallTypes, addr has to be NULL. |
6826 | * EC_INDIR_R : "call ireg". |
6827 | * EC_INDIR_SR : "call lcl<disp>" (eg. call [ebp-8]). |
6828 | * EC_INDIR_C : "call clsVar<disp>" (eg. call [clsVarAddr]) |
6829 | * EC_INDIR_ARD : "call [ireg+xreg*xmul+disp]" |
6830 | * |
6831 | */ |
6832 | |
6833 | // clang-format off |
6834 | void emitter::emitIns_Call(EmitCallType callType, |
6835 | CORINFO_METHOD_HANDLE methHnd, |
6836 | INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE |
6837 | void* addr, |
6838 | ssize_t argSize, |
6839 | emitAttr retSize |
6840 | MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), |
6841 | VARSET_VALARG_TP ptrVars, |
6842 | regMaskTP gcrefRegs, |
6843 | regMaskTP byrefRegs, |
6844 | IL_OFFSETX ilOffset, // = BAD_IL_OFFSET |
6845 | regNumber ireg, // = REG_NA |
6846 | regNumber xreg, // = REG_NA |
6847 | unsigned xmul, // = 0 |
6848 | ssize_t disp, // = 0 |
6849 | bool isJump) // = false |
6850 | // clang-format on |
6851 | { |
6852 | /* Sanity check the arguments depending on callType */ |
6853 | |
6854 | assert(callType < EC_COUNT); |
6855 | assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR && callType != EC_FUNC_ADDR) || |
6856 | (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0)); |
6857 | assert(callType != EC_FUNC_VIRTUAL || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0)); |
6858 | assert(callType < EC_INDIR_R || callType == EC_INDIR_ARD || callType == EC_INDIR_C || addr == nullptr); |
6859 | assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0)); |
6860 | assert(callType != EC_INDIR_SR || |
6861 | (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp < (int)emitComp->lvaCount)); |
6862 | assert(callType != EC_INDIR_C || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp != 0)); |
6863 | |
6864 | // Our stack level should be always greater than the bytes of arguments we push. Just |
6865 | // a sanity test. |
6866 | assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel); |
6867 | |
6868 | #if STACK_PROBES |
6869 | if (emitComp->opts.compNeedStackProbes) |
6870 | { |
6871 | // If we've pushed more than JIT_RESERVED_STACK allows, do an additional stack probe |
6872 | // Else, just make sure the prolog does a probe for us. Invariant we're trying |
6873 | // to get is that at any point we go out to unmanaged code, there is at least |
6874 | // CORINFO_STACKPROBE_DEPTH bytes of stack available. |
6875 | // |
6876 | // The reason why we are not doing one probe for the max size at the prolog |
6877 | // is that when don't have the max depth precomputed (it can depend on codegen), |
6878 | // and we need it at the time we generate locallocs |
6879 | // |
6880 | // Compiler::lvaAssignFrameOffsets sets up compLclFrameSize, which takes in |
6881 | // account everything except for the arguments of a callee. |
6882 | // |
6883 | // |
6884 | // |
6885 | if ((TARGET_POINTER_SIZE + // return address for call |
6886 | emitComp->genStackLevel + |
6887 | // Current stack level. This gets resetted on every |
6888 | // localloc and on the prolog (invariant is that |
6889 | // genStackLevel is 0 on basic block entry and exit and |
6890 | // after any alloca). genStackLevel will include any arguments |
6891 | // to the call, so we will insert an additional probe if |
6892 | // we've consumed more than JIT_RESERVED_STACK bytes |
6893 | // of stack, which is what the prolog probe covers (in |
6894 | // addition to the EE requested size) |
6895 | (emitComp->compHndBBtabCount * TARGET_POINTER_SIZE) |
6896 | // Hidden slots for calling finallys |
6897 | ) >= JIT_RESERVED_STACK) |
6898 | { |
6899 | // This happens when you have a call with a lot of arguments or a call is done |
6900 | // when there's a lot of stuff pushed on the stack (for example a call whos returned |
6901 | // value is an argument of another call that has pushed stuff on the stack) |
6902 | // This should't be very frequent. |
6903 | // For different values of JIT_RESERVED_STACK |
6904 | // |
6905 | // For mscorlib (109605 calls) |
6906 | // |
6907 | // 14190 probes in prologs (56760 bytes of code) |
6908 | // |
6909 | // JIT_RESERVED_STACK = 16 : 5452 extra probes |
6910 | // JIT_RESERVED_STACK = 32 : 1084 extra probes |
6911 | // JIT_RESERVED_STACK = 64 : 1 extra probes |
6912 | // JIT_RESERVED_STACK = 96 : 0 extra probes |
6913 | emitComp->genGenerateStackProbe(); |
6914 | } |
6915 | else |
6916 | { |
6917 | if (emitComp->compGeneratingProlog || emitComp->compGeneratingEpilog) |
6918 | { |
6919 | if (emitComp->compStackProbePrologDone) |
6920 | { |
6921 | // We already generated a probe and this call is not happening |
6922 | // at a depth >= JIT_RESERVED_STACK, so nothing to do here |
6923 | } |
6924 | else |
6925 | { |
6926 | // 3 possible ways to get here: |
6927 | // - We are in an epilog and haven't generated a probe in the prolog. |
6928 | // This shouldn't happen as we don't generate any calls in epilog. |
6929 | // - We are in the prolog, but doing a call before generating the probe. |
6930 | // This shouldn't happen at all. |
6931 | // - We are in the prolog, did not generate a probe but now we need |
6932 | // to generate a probe because we need a call (eg: profiler). We'll |
6933 | // need a probe. |
6934 | // |
6935 | // In any case, we need a probe |
6936 | |
6937 | // Ignore the profiler callback for now. |
6938 | if (!emitComp->compIsProfilerHookNeeded()) |
6939 | { |
6940 | assert(!"We do not expect to get here" ); |
6941 | emitComp->genGenerateStackProbe(); |
6942 | } |
6943 | } |
6944 | } |
6945 | else |
6946 | { |
6947 | // We will need a probe and will generate it in the prolog |
6948 | emitComp->genNeedPrologStackProbe = true; |
6949 | } |
6950 | } |
6951 | } |
6952 | #endif // STACK_PROBES |
6953 | |
6954 | // Trim out any callee-trashed registers from the live set. |
6955 | regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); |
6956 | gcrefRegs &= savedSet; |
6957 | byrefRegs &= savedSet; |
6958 | |
6959 | #ifdef DEBUG |
6960 | if (EMIT_GC_VERBOSE) |
6961 | { |
6962 | printf("\t\t\t\t\t\t\tCall: GCvars=%s " , VarSetOps::ToString(emitComp, ptrVars)); |
6963 | dumpConvertedVarSet(emitComp, ptrVars); |
6964 | printf(", gcrefRegs=" ); |
6965 | printRegMaskInt(gcrefRegs); |
6966 | emitDispRegSet(gcrefRegs); |
6967 | printf(", byrefRegs=" ); |
6968 | printRegMaskInt(byrefRegs); |
6969 | emitDispRegSet(byrefRegs); |
6970 | printf("\n" ); |
6971 | } |
6972 | #endif |
6973 | |
6974 | /* Managed RetVal: emit sequence point for the call */ |
6975 | if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET) |
6976 | { |
6977 | codeGen->genIPmappingAdd(ilOffset, false); |
6978 | } |
6979 | |
6980 | /* |
6981 | We need to allocate the appropriate instruction descriptor based |
6982 | on whether this is a direct/indirect call, and whether we need to |
6983 | record an updated set of live GC variables. |
6984 | |
6985 | The stats for a ton of classes is as follows: |
6986 | |
6987 | Direct call w/o GC vars 220,216 |
6988 | Indir. call w/o GC vars 144,781 |
6989 | |
6990 | Direct call with GC vars 9,440 |
6991 | Indir. call with GC vars 5,768 |
6992 | */ |
6993 | |
6994 | instrDesc* id; |
6995 | |
6996 | assert(argSize % REGSIZE_BYTES == 0); |
6997 | int argCnt = (int)(argSize / (int)REGSIZE_BYTES); // we need a signed-divide |
6998 | |
6999 | if (callType >= EC_FUNC_VIRTUAL) |
7000 | { |
7001 | /* Indirect call, virtual calls */ |
7002 | |
7003 | assert(callType == EC_FUNC_VIRTUAL || callType == EC_INDIR_R || callType == EC_INDIR_SR || |
7004 | callType == EC_INDIR_C || callType == EC_INDIR_ARD); |
7005 | |
7006 | id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, |
7007 | retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize)); |
7008 | } |
7009 | else |
7010 | { |
7011 | // Helper/static/nonvirtual/function calls (direct or through handle), |
7012 | // and calls to an absolute addr. |
7013 | |
7014 | assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR || callType == EC_FUNC_ADDR); |
7015 | |
7016 | id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, |
7017 | retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize)); |
7018 | } |
7019 | |
7020 | /* Update the emitter's live GC ref sets */ |
7021 | |
7022 | VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); |
7023 | emitThisGCrefRegs = gcrefRegs; |
7024 | emitThisByrefRegs = byrefRegs; |
7025 | |
7026 | /* Set the instruction - special case jumping a function */ |
7027 | instruction ins = INS_call; |
7028 | |
7029 | if (isJump) |
7030 | { |
7031 | assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR); |
7032 | if (callType == EC_FUNC_TOKEN) |
7033 | { |
7034 | ins = INS_l_jmp; |
7035 | } |
7036 | else |
7037 | { |
7038 | ins = INS_i_jmp; |
7039 | } |
7040 | } |
7041 | id->idIns(ins); |
7042 | |
7043 | id->idSetIsNoGC(emitNoGChelper(methHnd)); |
7044 | |
7045 | UNATIVE_OFFSET sz; |
7046 | |
7047 | // Record the address: method, indirection, or funcptr |
7048 | if (callType >= EC_FUNC_VIRTUAL) |
7049 | { |
7050 | // This is an indirect call (either a virtual call or func ptr call) |
7051 | |
7052 | switch (callType) |
7053 | { |
7054 | case EC_INDIR_C: |
7055 | // Indirect call using an absolute code address. |
7056 | // Must be marked as relocatable and is done at the |
7057 | // branch target location. |
7058 | goto CALL_ADDR_MODE; |
7059 | |
7060 | case EC_INDIR_R: // the address is in a register |
7061 | |
7062 | id->idSetIsCallRegPtr(); |
7063 | |
7064 | __fallthrough; |
7065 | |
7066 | case EC_INDIR_ARD: // the address is an indirection |
7067 | |
7068 | goto CALL_ADDR_MODE; |
7069 | |
7070 | case EC_INDIR_SR: // the address is in a lcl var |
7071 | |
7072 | id->idInsFmt(IF_SRD); |
7073 | // disp is really a lclVarNum |
7074 | noway_assert((unsigned)disp == (size_t)disp); |
7075 | id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, 0); |
7076 | sz = emitInsSizeSV(id, insCodeMR(INS_call), (unsigned)disp, 0); |
7077 | |
7078 | break; |
7079 | |
7080 | case EC_FUNC_VIRTUAL: |
7081 | |
7082 | CALL_ADDR_MODE: |
7083 | |
7084 | // fall-through |
7085 | |
7086 | // The function is "ireg" if id->idIsCallRegPtr(), |
7087 | // else [ireg+xmul*xreg+disp] |
7088 | |
7089 | id->idInsFmt(IF_ARD); |
7090 | |
7091 | id->idAddr()->iiaAddrMode.amBaseReg = ireg; |
7092 | id->idAddr()->iiaAddrMode.amIndxReg = xreg; |
7093 | id->idAddr()->iiaAddrMode.amScale = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1; |
7094 | |
7095 | sz = emitInsSizeAM(id, insCodeMR(INS_call)); |
7096 | |
7097 | if (ireg == REG_NA && xreg == REG_NA) |
7098 | { |
7099 | if (codeGen->genCodeIndirAddrNeedsReloc(disp)) |
7100 | { |
7101 | id->idSetIsDspReloc(); |
7102 | } |
7103 | #ifdef _TARGET_AMD64_ |
7104 | else |
7105 | { |
7106 | // An absolute indir address that doesn't need reloc should fit within 32-bits |
7107 | // to be encoded as offset relative to zero. This addr mode requires an extra |
7108 | // SIB byte |
7109 | noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr); |
7110 | sz++; |
7111 | } |
7112 | #endif //_TARGET_AMD64_ |
7113 | } |
7114 | |
7115 | break; |
7116 | |
7117 | default: |
7118 | NO_WAY("unexpected instruction" ); |
7119 | break; |
7120 | } |
7121 | } |
7122 | else if (callType == EC_FUNC_TOKEN_INDIR) |
7123 | { |
7124 | /* "call [method_addr]" */ |
7125 | |
7126 | assert(addr != nullptr); |
7127 | |
7128 | id->idInsFmt(IF_METHPTR); |
7129 | id->idAddr()->iiaAddr = (BYTE*)addr; |
7130 | sz = 6; |
7131 | |
7132 | // Since this is an indirect call through a pointer and we don't |
7133 | // currently pass in emitAttr into this function, we query codegen |
7134 | // whether addr needs a reloc. |
7135 | if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr)) |
7136 | { |
7137 | id->idSetIsDspReloc(); |
7138 | } |
7139 | #ifdef _TARGET_AMD64_ |
7140 | else |
7141 | { |
7142 | // An absolute indir address that doesn't need reloc should fit within 32-bits |
7143 | // to be encoded as offset relative to zero. This addr mode requires an extra |
7144 | // SIB byte |
7145 | noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr); |
7146 | sz++; |
7147 | } |
7148 | #endif //_TARGET_AMD64_ |
7149 | } |
7150 | else |
7151 | { |
7152 | /* This is a simple direct call: "call helper/method/addr" */ |
7153 | |
7154 | assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR); |
7155 | |
7156 | assert(addr != nullptr); |
7157 | |
7158 | id->idInsFmt(IF_METHOD); |
7159 | sz = 5; |
7160 | |
7161 | id->idAddr()->iiaAddr = (BYTE*)addr; |
7162 | |
7163 | if (callType == EC_FUNC_ADDR) |
7164 | { |
7165 | id->idSetIsCallAddr(); |
7166 | } |
7167 | |
7168 | // Direct call to a method and no addr indirection is needed. |
7169 | if (codeGen->genCodeAddrNeedsReloc((size_t)addr)) |
7170 | { |
7171 | id->idSetIsDspReloc(); |
7172 | } |
7173 | } |
7174 | |
7175 | #ifdef DEBUG |
7176 | if (emitComp->verbose && 0) |
7177 | { |
7178 | if (id->idIsLargeCall()) |
7179 | { |
7180 | if (callType >= EC_FUNC_VIRTUAL) |
7181 | { |
7182 | printf("[%02u] Rec call GC vars = %s\n" , id->idDebugOnlyInfo()->idNum, |
7183 | VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars)); |
7184 | } |
7185 | else |
7186 | { |
7187 | printf("[%02u] Rec call GC vars = %s\n" , id->idDebugOnlyInfo()->idNum, |
7188 | VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars)); |
7189 | } |
7190 | } |
7191 | } |
7192 | |
7193 | id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token |
7194 | id->idDebugOnlyInfo()->idCallSig = sigInfo; |
7195 | #endif // DEBUG |
7196 | |
7197 | #ifdef LATE_DISASM |
7198 | if (addr != nullptr) |
7199 | { |
7200 | codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); |
7201 | } |
7202 | #endif // LATE_DISASM |
7203 | |
7204 | id->idCodeSize(sz); |
7205 | |
7206 | dispIns(id); |
7207 | emitCurIGsize += sz; |
7208 | |
7209 | #if !FEATURE_FIXED_OUT_ARGS |
7210 | |
7211 | /* The call will pop the arguments */ |
7212 | |
7213 | if (emitCntStackDepth && argSize > 0) |
7214 | { |
7215 | noway_assert((ssize_t)emitCurStackLvl >= argSize); |
7216 | emitCurStackLvl -= (int)argSize; |
7217 | assert((int)emitCurStackLvl >= 0); |
7218 | } |
7219 | |
7220 | #endif // !FEATURE_FIXED_OUT_ARGS |
7221 | } |
7222 | |
7223 | #ifdef DEBUG |
7224 | /***************************************************************************** |
7225 | * |
7226 | * The following called for each recorded instruction -- use for debugging. |
7227 | */ |
7228 | void emitter::emitInsSanityCheck(instrDesc* id) |
7229 | { |
7230 | // make certain you only try to put relocs on things that can have them. |
7231 | ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()]; |
7232 | if ((idOp == ID_OP_SCNS) && id->idIsLargeCns()) |
7233 | { |
7234 | idOp = ID_OP_CNS; |
7235 | } |
7236 | |
7237 | if (id->idIsDspReloc()) |
7238 | { |
7239 | assert(idOp == ID_OP_NONE || idOp == ID_OP_AMD || idOp == ID_OP_DSP || idOp == ID_OP_DSP_CNS || |
7240 | idOp == ID_OP_AMD_CNS || idOp == ID_OP_SPEC || idOp == ID_OP_CALL || idOp == ID_OP_JMP || |
7241 | idOp == ID_OP_LBL); |
7242 | } |
7243 | |
7244 | if (id->idIsCnsReloc()) |
7245 | { |
7246 | assert(idOp == ID_OP_CNS || idOp == ID_OP_AMD_CNS || idOp == ID_OP_DSP_CNS || idOp == ID_OP_SPEC || |
7247 | idOp == ID_OP_CALL || idOp == ID_OP_JMP); |
7248 | } |
7249 | } |
7250 | #endif |
7251 | |
7252 | /***************************************************************************** |
7253 | * |
7254 | * Return the allocated size (in bytes) of the given instruction descriptor. |
7255 | */ |
7256 | |
7257 | size_t emitter::emitSizeOfInsDsc(instrDesc* id) |
7258 | { |
7259 | if (emitIsScnsInsDsc(id)) |
7260 | { |
7261 | return SMALL_IDSC_SIZE; |
7262 | } |
7263 | |
7264 | assert((unsigned)id->idInsFmt() < emitFmtCount); |
7265 | |
7266 | ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()]; |
7267 | |
7268 | // An INS_call instruction may use a "fat" direct/indirect call descriptor |
7269 | // except for a local call to a label (i.e. call to a finally) |
7270 | // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the |
7271 | // INS_call instruction always uses one of these idOps |
7272 | |
7273 | if (id->idIns() == INS_call) |
7274 | { |
7275 | assert(idOp == ID_OP_CALL || // is a direct call |
7276 | idOp == ID_OP_SPEC || // is a indirect call |
7277 | idOp == ID_OP_JMP); // is a local call to finally clause |
7278 | } |
7279 | |
7280 | switch (idOp) |
7281 | { |
7282 | case ID_OP_NONE: |
7283 | break; |
7284 | |
7285 | case ID_OP_LBL: |
7286 | return sizeof(instrDescLbl); |
7287 | |
7288 | case ID_OP_JMP: |
7289 | return sizeof(instrDescJmp); |
7290 | |
7291 | case ID_OP_CALL: |
7292 | case ID_OP_SPEC: |
7293 | if (id->idIsLargeCall()) |
7294 | { |
7295 | /* Must be a "fat" indirect call descriptor */ |
7296 | return sizeof(instrDescCGCA); |
7297 | } |
7298 | |
7299 | __fallthrough; |
7300 | |
7301 | case ID_OP_SCNS: |
7302 | case ID_OP_CNS: |
7303 | case ID_OP_DSP: |
7304 | case ID_OP_DSP_CNS: |
7305 | case ID_OP_AMD: |
7306 | case ID_OP_AMD_CNS: |
7307 | if (id->idIsLargeCns()) |
7308 | { |
7309 | if (id->idIsLargeDsp()) |
7310 | { |
7311 | return sizeof(instrDescCnsDsp); |
7312 | } |
7313 | else |
7314 | { |
7315 | return sizeof(instrDescCns); |
7316 | } |
7317 | } |
7318 | else |
7319 | { |
7320 | if (id->idIsLargeDsp()) |
7321 | { |
7322 | return sizeof(instrDescDsp); |
7323 | } |
7324 | else |
7325 | { |
7326 | return sizeof(instrDesc); |
7327 | } |
7328 | } |
7329 | |
7330 | default: |
7331 | NO_WAY("unexpected instruction descriptor format" ); |
7332 | break; |
7333 | } |
7334 | |
7335 | return sizeof(instrDesc); |
7336 | } |
7337 | |
7338 | /*****************************************************************************/ |
7339 | #ifdef DEBUG |
7340 | /***************************************************************************** |
7341 | * |
7342 | * Return a string that represents the given register. |
7343 | */ |
7344 | |
7345 | const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) |
7346 | { |
7347 | static char rb[2][128]; |
7348 | static unsigned char rbc = 0; |
7349 | |
7350 | const char* rn = emitComp->compRegVarName(reg, varName); |
7351 | |
7352 | #ifdef _TARGET_AMD64_ |
7353 | char suffix = '\0'; |
7354 | |
7355 | switch (EA_SIZE(attr)) |
7356 | { |
7357 | case EA_32BYTE: |
7358 | return emitYMMregName(reg); |
7359 | |
7360 | case EA_16BYTE: |
7361 | return emitXMMregName(reg); |
7362 | |
7363 | case EA_8BYTE: |
7364 | if ((REG_XMM0 <= reg) && (reg <= REG_XMM15)) |
7365 | { |
7366 | return emitXMMregName(reg); |
7367 | } |
7368 | break; |
7369 | |
7370 | case EA_4BYTE: |
7371 | if ((REG_XMM0 <= reg) && (reg <= REG_XMM15)) |
7372 | { |
7373 | return emitXMMregName(reg); |
7374 | } |
7375 | |
7376 | if (reg > REG_R15) |
7377 | { |
7378 | break; |
7379 | } |
7380 | |
7381 | if (reg > REG_RDI) |
7382 | { |
7383 | suffix = 'd'; |
7384 | goto APPEND_SUFFIX; |
7385 | } |
7386 | rbc = (rbc + 1) % 2; |
7387 | rb[rbc][0] = 'e'; |
7388 | rb[rbc][1] = rn[1]; |
7389 | rb[rbc][2] = rn[2]; |
7390 | rb[rbc][3] = 0; |
7391 | rn = rb[rbc]; |
7392 | break; |
7393 | |
7394 | case EA_2BYTE: |
7395 | if (reg > REG_RDI) |
7396 | { |
7397 | suffix = 'w'; |
7398 | goto APPEND_SUFFIX; |
7399 | } |
7400 | rn++; |
7401 | break; |
7402 | |
7403 | case EA_1BYTE: |
7404 | if (reg > REG_RDI) |
7405 | { |
7406 | suffix = 'b'; |
7407 | APPEND_SUFFIX: |
7408 | rbc = (rbc + 1) % 2; |
7409 | rb[rbc][0] = rn[0]; |
7410 | rb[rbc][1] = rn[1]; |
7411 | if (rn[2]) |
7412 | { |
7413 | assert(rn[3] == 0); |
7414 | rb[rbc][2] = rn[2]; |
7415 | rb[rbc][3] = suffix; |
7416 | rb[rbc][4] = 0; |
7417 | } |
7418 | else |
7419 | { |
7420 | rb[rbc][2] = suffix; |
7421 | rb[rbc][3] = 0; |
7422 | } |
7423 | } |
7424 | else |
7425 | { |
7426 | rbc = (rbc + 1) % 2; |
7427 | rb[rbc][0] = rn[1]; |
7428 | if (reg < 4) |
7429 | { |
7430 | rb[rbc][1] = 'l'; |
7431 | rb[rbc][2] = 0; |
7432 | } |
7433 | else |
7434 | { |
7435 | rb[rbc][1] = rn[2]; |
7436 | rb[rbc][2] = 'l'; |
7437 | rb[rbc][3] = 0; |
7438 | } |
7439 | } |
7440 | |
7441 | rn = rb[rbc]; |
7442 | break; |
7443 | |
7444 | default: |
7445 | break; |
7446 | } |
7447 | #endif // _TARGET_AMD64_ |
7448 | |
7449 | #ifdef _TARGET_X86_ |
7450 | assert(strlen(rn) >= 3); |
7451 | |
7452 | switch (EA_SIZE(attr)) |
7453 | { |
7454 | case EA_32BYTE: |
7455 | return emitYMMregName(reg); |
7456 | |
7457 | case EA_16BYTE: |
7458 | return emitXMMregName(reg); |
7459 | |
7460 | case EA_8BYTE: |
7461 | if ((REG_XMM0 <= reg) && (reg <= REG_XMM7)) |
7462 | { |
7463 | return emitXMMregName(reg); |
7464 | } |
7465 | break; |
7466 | |
7467 | case EA_4BYTE: |
7468 | if ((REG_XMM0 <= reg) && (reg <= REG_XMM7)) |
7469 | { |
7470 | return emitXMMregName(reg); |
7471 | } |
7472 | break; |
7473 | |
7474 | case EA_2BYTE: |
7475 | rn++; |
7476 | break; |
7477 | |
7478 | case EA_1BYTE: |
7479 | rbc = (rbc + 1) % 2; |
7480 | rb[rbc][0] = rn[1]; |
7481 | rb[rbc][1] = 'l'; |
7482 | strcpy_s(&rb[rbc][2], sizeof(rb[0]) - 2, rn + 3); |
7483 | |
7484 | rn = rb[rbc]; |
7485 | break; |
7486 | |
7487 | default: |
7488 | break; |
7489 | } |
7490 | #endif // _TARGET_X86_ |
7491 | |
7492 | #if 0 |
7493 | // The following is useful if you want register names to be tagged with * or ^ representing gcref or byref, respectively, |
7494 | // however it's possibly not interesting most of the time. |
7495 | if (EA_IS_GCREF(attr) || EA_IS_BYREF(attr)) |
7496 | { |
7497 | if (rn != rb[rbc]) |
7498 | { |
7499 | rbc = (rbc+1)%2; |
7500 | strcpy_s(rb[rbc], sizeof(rb[rbc]), rn); |
7501 | rn = rb[rbc]; |
7502 | } |
7503 | |
7504 | if (EA_IS_GCREF(attr)) |
7505 | { |
7506 | strcat_s(rb[rbc], sizeof(rb[rbc]), "*" ); |
7507 | } |
7508 | else if (EA_IS_BYREF(attr)) |
7509 | { |
7510 | strcat_s(rb[rbc], sizeof(rb[rbc]), "^" ); |
7511 | } |
7512 | } |
7513 | #endif // 0 |
7514 | |
7515 | return rn; |
7516 | } |
7517 | |
7518 | /***************************************************************************** |
7519 | * |
7520 | * Return a string that represents the given FP register. |
7521 | */ |
7522 | |
7523 | const char* emitter::emitFPregName(unsigned reg, bool varName) |
7524 | { |
7525 | assert(reg < REG_COUNT); |
7526 | |
7527 | return emitComp->compFPregVarName((regNumber)(reg), varName); |
7528 | } |
7529 | |
7530 | /***************************************************************************** |
7531 | * |
7532 | * Return a string that represents the given XMM register. |
7533 | */ |
7534 | |
7535 | const char* emitter::emitXMMregName(unsigned reg) |
7536 | { |
7537 | static const char* const regNames[] = { |
7538 | #define REGDEF(name, rnum, mask, sname) "x" sname, |
7539 | #include "register.h" |
7540 | }; |
7541 | |
7542 | assert(reg < REG_COUNT); |
7543 | assert(reg < _countof(regNames)); |
7544 | |
7545 | return regNames[reg]; |
7546 | } |
7547 | |
7548 | /***************************************************************************** |
7549 | * |
7550 | * Return a string that represents the given YMM register. |
7551 | */ |
7552 | |
7553 | const char* emitter::emitYMMregName(unsigned reg) |
7554 | { |
7555 | static const char* const regNames[] = { |
7556 | #define REGDEF(name, rnum, mask, sname) "y" sname, |
7557 | #include "register.h" |
7558 | }; |
7559 | |
7560 | assert(reg < REG_COUNT); |
7561 | assert(reg < _countof(regNames)); |
7562 | |
7563 | return regNames[reg]; |
7564 | } |
7565 | |
7566 | /***************************************************************************** |
7567 | * |
7568 | * Display a static data member reference. |
7569 | */ |
7570 | |
7571 | void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc /* = false */) |
7572 | { |
7573 | int doffs; |
7574 | |
7575 | /* Filter out the special case of fs:[offs] */ |
7576 | |
7577 | // Munge any pointers if we want diff-able disassembly |
7578 | if (emitComp->opts.disDiffable) |
7579 | { |
7580 | ssize_t top12bits = (offs >> 20); |
7581 | if ((top12bits != 0) && (top12bits != -1)) |
7582 | { |
7583 | offs = 0xD1FFAB1E; |
7584 | } |
7585 | } |
7586 | |
7587 | if (fldHnd == FLD_GLOBAL_FS) |
7588 | { |
7589 | printf("FS:[0x%04X]" , offs); |
7590 | return; |
7591 | } |
7592 | |
7593 | if (fldHnd == FLD_GLOBAL_DS) |
7594 | { |
7595 | printf("[0x%04X]" , offs); |
7596 | return; |
7597 | } |
7598 | |
7599 | printf("[" ); |
7600 | |
7601 | doffs = Compiler::eeGetJitDataOffs(fldHnd); |
7602 | |
7603 | if (reloc) |
7604 | { |
7605 | printf("reloc " ); |
7606 | } |
7607 | |
7608 | if (doffs >= 0) |
7609 | { |
7610 | if (doffs & 1) |
7611 | { |
7612 | printf("@CNS%02u" , doffs - 1); |
7613 | } |
7614 | else |
7615 | { |
7616 | printf("@RWD%02u" , doffs); |
7617 | } |
7618 | |
7619 | if (offs) |
7620 | { |
7621 | printf("%+Id" , offs); |
7622 | } |
7623 | } |
7624 | else |
7625 | { |
7626 | printf("classVar[%#x]" , emitComp->dspPtr(fldHnd)); |
7627 | |
7628 | if (offs) |
7629 | { |
7630 | printf("%+Id" , offs); |
7631 | } |
7632 | } |
7633 | |
7634 | printf("]" ); |
7635 | |
7636 | if (emitComp->opts.varNames && offs < 0) |
7637 | { |
7638 | printf("'%s" , emitComp->eeGetFieldName(fldHnd)); |
7639 | if (offs) |
7640 | { |
7641 | printf("%+Id" , offs); |
7642 | } |
7643 | printf("'" ); |
7644 | } |
7645 | } |
7646 | |
7647 | /***************************************************************************** |
7648 | * |
7649 | * Display a stack frame reference. |
7650 | */ |
7651 | |
7652 | void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) |
7653 | { |
7654 | int addr; |
7655 | bool bEBP; |
7656 | |
7657 | printf("[" ); |
7658 | |
7659 | if (!asmfm || emitComp->lvaDoneFrameLayout == Compiler::NO_FRAME_LAYOUT) |
7660 | { |
7661 | if (varx < 0) |
7662 | { |
7663 | printf("TEMP_%02u" , -varx); |
7664 | } |
7665 | else |
7666 | { |
7667 | printf("V%02u" , +varx); |
7668 | } |
7669 | |
7670 | if (disp < 0) |
7671 | { |
7672 | printf("-0x%X" , -disp); |
7673 | } |
7674 | else if (disp > 0) |
7675 | { |
7676 | printf("+0x%X" , +disp); |
7677 | } |
7678 | } |
7679 | |
7680 | if (emitComp->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT) |
7681 | { |
7682 | if (!asmfm) |
7683 | { |
7684 | printf(" " ); |
7685 | } |
7686 | |
7687 | addr = emitComp->lvaFrameAddress(varx, &bEBP) + disp; |
7688 | |
7689 | if (bEBP) |
7690 | { |
7691 | printf(STR_FPBASE); |
7692 | |
7693 | if (addr < 0) |
7694 | { |
7695 | printf("-%02XH" , -addr); |
7696 | } |
7697 | else if (addr > 0) |
7698 | { |
7699 | printf("+%02XH" , addr); |
7700 | } |
7701 | } |
7702 | else |
7703 | { |
7704 | /* Adjust the offset by amount currently pushed on the stack */ |
7705 | |
7706 | printf(STR_SPBASE); |
7707 | |
7708 | if (addr < 0) |
7709 | { |
7710 | printf("-%02XH" , -addr); |
7711 | } |
7712 | else if (addr > 0) |
7713 | { |
7714 | printf("+%02XH" , addr); |
7715 | } |
7716 | |
7717 | #if !FEATURE_FIXED_OUT_ARGS |
7718 | |
7719 | if (emitCurStackLvl) |
7720 | printf("+%02XH" , emitCurStackLvl); |
7721 | |
7722 | #endif // !FEATURE_FIXED_OUT_ARGS |
7723 | } |
7724 | } |
7725 | |
7726 | printf("]" ); |
7727 | |
7728 | if (varx >= 0 && emitComp->opts.varNames) |
7729 | { |
7730 | LclVarDsc* varDsc; |
7731 | const char* varName; |
7732 | |
7733 | assert((unsigned)varx < emitComp->lvaCount); |
7734 | varDsc = emitComp->lvaTable + varx; |
7735 | varName = emitComp->compLocalVarName(varx, offs); |
7736 | |
7737 | if (varName) |
7738 | { |
7739 | printf("'%s" , varName); |
7740 | |
7741 | if (disp < 0) |
7742 | { |
7743 | printf("-%d" , -disp); |
7744 | } |
7745 | else if (disp > 0) |
7746 | { |
7747 | printf("+%d" , +disp); |
7748 | } |
7749 | |
7750 | printf("'" ); |
7751 | } |
7752 | } |
7753 | } |
7754 | |
7755 | /***************************************************************************** |
7756 | * |
7757 | * Display an reloc value |
7758 | * If we are formatting for an assembly listing don't print the hex value |
7759 | * since it will prevent us from doing assembly diffs |
7760 | */ |
7761 | void emitter::emitDispReloc(ssize_t value) |
7762 | { |
7763 | if (emitComp->opts.disAsm) |
7764 | { |
7765 | printf("(reloc)" ); |
7766 | } |
7767 | else |
7768 | { |
7769 | printf("(reloc 0x%Ix)" , emitComp->dspPtr(value)); |
7770 | } |
7771 | } |
7772 | |
7773 | /***************************************************************************** |
7774 | * |
7775 | * Display an address mode. |
7776 | */ |
7777 | |
7778 | void emitter::emitDispAddrMode(instrDesc* id, bool noDetail) |
7779 | { |
7780 | bool nsep = false; |
7781 | ssize_t disp; |
7782 | |
7783 | unsigned jtno = 0; |
7784 | dataSection* jdsc = nullptr; |
7785 | |
7786 | /* The displacement field is in an unusual place for calls */ |
7787 | |
7788 | disp = (id->idIns() == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id); |
7789 | |
7790 | /* Display a jump table label if this is a switch table jump */ |
7791 | |
7792 | if (id->idIns() == INS_i_jmp) |
7793 | { |
7794 | UNATIVE_OFFSET offs = 0; |
7795 | |
7796 | /* Find the appropriate entry in the data section list */ |
7797 | |
7798 | for (jdsc = emitConsDsc.dsdList, jtno = 0; jdsc; jdsc = jdsc->dsNext) |
7799 | { |
7800 | UNATIVE_OFFSET size = jdsc->dsSize; |
7801 | |
7802 | /* Is this a label table? */ |
7803 | |
7804 | if (size & 1) |
7805 | { |
7806 | size--; |
7807 | jtno++; |
7808 | |
7809 | if (offs == id->idDebugOnlyInfo()->idMemCookie) |
7810 | { |
7811 | break; |
7812 | } |
7813 | } |
7814 | |
7815 | offs += size; |
7816 | } |
7817 | |
7818 | /* If we've found a matching entry then is a table jump */ |
7819 | |
7820 | if (jdsc) |
7821 | { |
7822 | if (id->idIsDspReloc()) |
7823 | { |
7824 | printf("reloc " ); |
7825 | } |
7826 | printf("J_M%03u_DS%02u" , Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie); |
7827 | } |
7828 | |
7829 | disp -= id->idDebugOnlyInfo()->idMemCookie; |
7830 | } |
7831 | |
7832 | bool frameRef = false; |
7833 | |
7834 | printf("[" ); |
7835 | |
7836 | if (id->idAddr()->iiaAddrMode.amBaseReg != REG_NA) |
7837 | { |
7838 | printf("%s" , emitRegName(id->idAddr()->iiaAddrMode.amBaseReg)); |
7839 | nsep = true; |
7840 | if (id->idAddr()->iiaAddrMode.amBaseReg == REG_ESP) |
7841 | { |
7842 | frameRef = true; |
7843 | } |
7844 | else if (emitComp->isFramePointerUsed() && id->idAddr()->iiaAddrMode.amBaseReg == REG_EBP) |
7845 | { |
7846 | frameRef = true; |
7847 | } |
7848 | } |
7849 | |
7850 | if (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA) |
7851 | { |
7852 | size_t scale = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale); |
7853 | |
7854 | if (nsep) |
7855 | { |
7856 | printf("+" ); |
7857 | } |
7858 | if (scale > 1) |
7859 | { |
7860 | printf("%u*" , scale); |
7861 | } |
7862 | printf("%s" , emitRegName(id->idAddr()->iiaAddrMode.amIndxReg)); |
7863 | nsep = true; |
7864 | } |
7865 | |
7866 | if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp)) |
7867 | { |
7868 | if (nsep) |
7869 | { |
7870 | printf("+" ); |
7871 | } |
7872 | emitDispReloc(disp); |
7873 | } |
7874 | else |
7875 | { |
7876 | // Munge any pointers if we want diff-able disassembly |
7877 | // It's assumed to be a pointer when disp is outside of the range (-1M, +1M); top bits are not 0 or -1 |
7878 | if (!frameRef && emitComp->opts.disDiffable && (static_cast<size_t>((disp >> 20) + 1) > 1)) |
7879 | { |
7880 | if (nsep) |
7881 | { |
7882 | printf("+" ); |
7883 | } |
7884 | printf("D1FFAB1EH" ); |
7885 | } |
7886 | else if (disp > 0) |
7887 | { |
7888 | if (nsep) |
7889 | { |
7890 | printf("+" ); |
7891 | } |
7892 | if (frameRef) |
7893 | { |
7894 | printf("%02XH" , disp); |
7895 | } |
7896 | else if (disp < 1000) |
7897 | { |
7898 | printf("%d" , disp); |
7899 | } |
7900 | else if (disp <= 0xFFFF) |
7901 | { |
7902 | printf("%04XH" , disp); |
7903 | } |
7904 | else |
7905 | { |
7906 | printf("%08XH" , disp); |
7907 | } |
7908 | } |
7909 | else if (disp < 0) |
7910 | { |
7911 | if (frameRef) |
7912 | { |
7913 | printf("-%02XH" , -disp); |
7914 | } |
7915 | else if (disp > -1000) |
7916 | { |
7917 | printf("-%d" , -disp); |
7918 | } |
7919 | else if (disp >= -0xFFFF) |
7920 | { |
7921 | printf("-%04XH" , -disp); |
7922 | } |
7923 | else if (disp < -0xFFFFFF) |
7924 | { |
7925 | if (nsep) |
7926 | { |
7927 | printf("+" ); |
7928 | } |
7929 | printf("%08XH" , disp); |
7930 | } |
7931 | else |
7932 | { |
7933 | printf("-%08XH" , -disp); |
7934 | } |
7935 | } |
7936 | else if (!nsep) |
7937 | { |
7938 | printf("%04XH" , disp); |
7939 | } |
7940 | } |
7941 | |
7942 | printf("]" ); |
7943 | |
7944 | // pretty print string if it looks like one |
7945 | if ((id->idGCref() == GCT_GCREF) && (id->idIns() == INS_mov) && (id->idAddr()->iiaAddrMode.amBaseReg == REG_NA)) |
7946 | { |
7947 | const wchar_t* str = emitComp->eeGetCPString(disp); |
7948 | if (str != nullptr) |
7949 | { |
7950 | printf(" '%S'" , str); |
7951 | } |
7952 | } |
7953 | |
7954 | if (jdsc && !noDetail) |
7955 | { |
7956 | unsigned cnt = (jdsc->dsSize - 1) / TARGET_POINTER_SIZE; |
7957 | BasicBlock** bbp = (BasicBlock**)jdsc->dsCont; |
7958 | |
7959 | #ifdef _TARGET_AMD64_ |
7960 | #define SIZE_LETTER "Q" |
7961 | #else |
7962 | #define SIZE_LETTER "D" |
7963 | #endif |
7964 | printf("\n\n J_M%03u_DS%02u LABEL " SIZE_LETTER "WORD" , Compiler::s_compMethodsCount, jtno); |
7965 | |
7966 | /* Display the label table (it's stored as "BasicBlock*" values) */ |
7967 | |
7968 | do |
7969 | { |
7970 | insGroup* lab; |
7971 | |
7972 | /* Convert the BasicBlock* value to an IG address */ |
7973 | |
7974 | lab = (insGroup*)emitCodeGetCookie(*bbp++); |
7975 | assert(lab); |
7976 | |
7977 | printf("\n D" SIZE_LETTER " G_M%03u_IG%02u" , Compiler::s_compMethodsCount, lab->igNum); |
7978 | } while (--cnt); |
7979 | } |
7980 | } |
7981 | |
7982 | /***************************************************************************** |
7983 | * |
7984 | * If the given instruction is a shift, display the 2nd operand. |
7985 | */ |
7986 | |
7987 | void emitter::emitDispShift(instruction ins, int cnt) |
7988 | { |
7989 | switch (ins) |
7990 | { |
7991 | case INS_rcl_1: |
7992 | case INS_rcr_1: |
7993 | case INS_rol_1: |
7994 | case INS_ror_1: |
7995 | case INS_shl_1: |
7996 | case INS_shr_1: |
7997 | case INS_sar_1: |
7998 | printf(", 1" ); |
7999 | break; |
8000 | |
8001 | case INS_rcl: |
8002 | case INS_rcr: |
8003 | case INS_rol: |
8004 | case INS_ror: |
8005 | case INS_shl: |
8006 | case INS_shr: |
8007 | case INS_sar: |
8008 | printf(", cl" ); |
8009 | break; |
8010 | |
8011 | case INS_rcl_N: |
8012 | case INS_rcr_N: |
8013 | case INS_rol_N: |
8014 | case INS_ror_N: |
8015 | case INS_shl_N: |
8016 | case INS_shr_N: |
8017 | case INS_sar_N: |
8018 | printf(", %d" , cnt); |
8019 | break; |
8020 | |
8021 | default: |
8022 | break; |
8023 | } |
8024 | } |
8025 | |
8026 | /***************************************************************************** |
8027 | * |
8028 | * Display (optionally) the bytes for the instruction encoding in hex |
8029 | */ |
8030 | |
8031 | void emitter::emitDispInsHex(BYTE* code, size_t sz) |
8032 | { |
8033 | // We do not display the instruction hex if we want diff-able disassembly |
8034 | if (!emitComp->opts.disDiffable) |
8035 | { |
8036 | #ifdef _TARGET_AMD64_ |
8037 | // how many bytes per instruction we format for |
8038 | const size_t digits = 10; |
8039 | #else // _TARGET_X86 |
8040 | const size_t digits = 6; |
8041 | #endif |
8042 | printf(" " ); |
8043 | for (unsigned i = 0; i < sz; i++) |
8044 | { |
8045 | printf("%02X" , (*((BYTE*)(code + i)))); |
8046 | } |
8047 | |
8048 | if (sz < digits) |
8049 | { |
8050 | printf("%.*s" , 2 * (digits - sz), " " ); |
8051 | } |
8052 | } |
8053 | } |
8054 | |
8055 | /***************************************************************************** |
8056 | * |
8057 | * Display the given instruction. |
8058 | */ |
8059 | |
8060 | void emitter::emitDispIns( |
8061 | instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig) |
8062 | { |
8063 | emitAttr attr; |
8064 | const char* sstr; |
8065 | |
8066 | instruction ins = id->idIns(); |
8067 | |
8068 | if (emitComp->verbose) |
8069 | { |
8070 | unsigned idNum = id->idDebugOnlyInfo()->idNum; |
8071 | printf("IN%04x: " , idNum); |
8072 | } |
8073 | |
8074 | #define ID_INFO_DSP_RELOC ((bool)(id->idIsDspReloc())) |
8075 | |
8076 | /* Display a constant value if the instruction references one */ |
8077 | |
8078 | if (!isNew) |
8079 | { |
8080 | switch (id->idInsFmt()) |
8081 | { |
8082 | int offs; |
8083 | |
8084 | case IF_MRD_RRD: |
8085 | case IF_MWR_RRD: |
8086 | case IF_MRW_RRD: |
8087 | |
8088 | case IF_RRD_MRD: |
8089 | case IF_RWR_MRD: |
8090 | case IF_RRW_MRD: |
8091 | |
8092 | case IF_MRD_CNS: |
8093 | case IF_MWR_CNS: |
8094 | case IF_MRW_CNS: |
8095 | case IF_MRW_SHF: |
8096 | |
8097 | case IF_MRD: |
8098 | case IF_MWR: |
8099 | case IF_MRW: |
8100 | |
8101 | case IF_MRD_OFF: |
8102 | |
8103 | /* Is this actually a reference to a data section? */ |
8104 | |
8105 | offs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd); |
8106 | |
8107 | if (offs >= 0) |
8108 | { |
8109 | void* addr; |
8110 | |
8111 | /* Display a data section reference */ |
8112 | |
8113 | assert((unsigned)offs < emitConsDsc.dsdOffs); |
8114 | addr = emitConsBlock ? emitConsBlock + offs : nullptr; |
8115 | |
8116 | #if 0 |
8117 | // TODO-XArch-Cleanup: Fix or remove this code. |
8118 | /* Is the operand an integer or floating-point value? */ |
8119 | |
8120 | bool isFP = false; |
8121 | |
8122 | if (CodeGen::instIsFP(id->idIns())) |
8123 | { |
8124 | switch (id->idIns()) |
8125 | { |
8126 | case INS_fild: |
8127 | case INS_fildl: |
8128 | break; |
8129 | |
8130 | default: |
8131 | isFP = true; |
8132 | break; |
8133 | } |
8134 | } |
8135 | |
8136 | if (offs & 1) |
8137 | printf("@CNS%02u" , offs); |
8138 | else |
8139 | printf("@RWD%02u" , offs); |
8140 | |
8141 | printf(" " ); |
8142 | |
8143 | if (addr) |
8144 | { |
8145 | addr = 0; |
8146 | // TODO-XArch-Bug?: |
8147 | // This was busted by switching the order |
8148 | // in which we output the code block vs. |
8149 | // the data blocks -- when we get here, |
8150 | // the data block has not been filled in |
8151 | // yet, so we'll display garbage. |
8152 | |
8153 | if (isFP) |
8154 | { |
8155 | if (id->idOpSize() == EA_4BYTE) |
8156 | printf("DF %f \n" , addr ? *(float *)addr : 0); |
8157 | else |
8158 | printf("DQ %lf\n" , addr ? *(double *)addr : 0); |
8159 | } |
8160 | else |
8161 | { |
8162 | if (id->idOpSize() <= EA_4BYTE) |
8163 | printf("DD %d \n" , addr ? *(int *)addr : 0); |
8164 | else |
8165 | printf("DQ %D \n" , addr ? *(__int64 *)addr : 0); |
8166 | } |
8167 | } |
8168 | #endif |
8169 | } |
8170 | break; |
8171 | |
8172 | default: |
8173 | break; |
8174 | } |
8175 | } |
8176 | |
8177 | // printf("[F=%s] " , emitIfName(id->idInsFmt())); |
8178 | // printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum); |
8179 | // printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth); |
8180 | // printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32)); |
8181 | // printf("[A=%08X] " , emitSimpleStkMask); |
8182 | // printf("[A=%08X] " , emitSimpleByrefStkMask); |
8183 | // printf("[L=%02u] " , id->idCodeSize()); |
8184 | |
8185 | if (!emitComp->opts.dspEmit && !isNew && !asmfm) |
8186 | { |
8187 | doffs = true; |
8188 | } |
8189 | |
8190 | /* Display the instruction offset */ |
8191 | |
8192 | emitDispInsOffs(offset, doffs); |
8193 | |
8194 | if (code != nullptr) |
8195 | { |
8196 | /* Display the instruction hex code */ |
8197 | |
8198 | emitDispInsHex(code, sz); |
8199 | } |
8200 | |
8201 | /* Display the instruction name */ |
8202 | |
8203 | sstr = codeGen->genInsName(ins); |
8204 | |
8205 | if (IsAVXInstruction(ins) && !IsBMIInstruction(ins)) |
8206 | { |
8207 | printf(" v%-8s" , sstr); |
8208 | } |
8209 | else |
8210 | { |
8211 | printf(" %-9s" , sstr); |
8212 | } |
8213 | #ifndef FEATURE_PAL |
8214 | if (strnlen_s(sstr, 10) >= 8) |
8215 | #else // FEATURE_PAL |
8216 | if (strnlen(sstr, 10) >= 8) |
8217 | #endif // FEATURE_PAL |
8218 | { |
8219 | printf(" " ); |
8220 | } |
8221 | |
8222 | /* By now the size better be set to something */ |
8223 | |
8224 | assert(emitInstCodeSz(id) || emitInstHasNoCode(ins)); |
8225 | |
8226 | /* Figure out the operand size */ |
8227 | |
8228 | if (id->idGCref() == GCT_GCREF) |
8229 | { |
8230 | attr = EA_GCREF; |
8231 | sstr = "gword ptr " ; |
8232 | } |
8233 | else if (id->idGCref() == GCT_BYREF) |
8234 | { |
8235 | attr = EA_BYREF; |
8236 | sstr = "bword ptr " ; |
8237 | } |
8238 | else |
8239 | { |
8240 | attr = id->idOpSize(); |
8241 | sstr = codeGen->genSizeStr(attr); |
8242 | |
8243 | if (ins == INS_lea) |
8244 | { |
8245 | #ifdef _TARGET_AMD64_ |
8246 | assert((attr == EA_4BYTE) || (attr == EA_8BYTE)); |
8247 | #else |
8248 | assert(attr == EA_4BYTE); |
8249 | #endif |
8250 | sstr = "" ; |
8251 | } |
8252 | } |
8253 | |
8254 | /* Now see what instruction format we've got */ |
8255 | |
8256 | // First print the implicit register usage |
8257 | if (instrHasImplicitRegPairDest(ins)) |
8258 | { |
8259 | printf("%s:%s, " , emitRegName(REG_EDX, id->idOpSize()), emitRegName(REG_EAX, id->idOpSize())); |
8260 | } |
8261 | else if (instrIs3opImul(ins)) |
8262 | { |
8263 | regNumber tgtReg = inst3opImulReg(ins); |
8264 | printf("%s, " , emitRegName(tgtReg, id->idOpSize())); |
8265 | } |
8266 | |
8267 | switch (id->idInsFmt()) |
8268 | { |
8269 | ssize_t val; |
8270 | ssize_t offs; |
8271 | CnsVal cnsVal; |
8272 | const char* methodName; |
8273 | |
8274 | case IF_CNS: |
8275 | val = emitGetInsSC(id); |
8276 | #ifdef _TARGET_AMD64_ |
8277 | // no 8-byte immediates allowed here! |
8278 | assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL)); |
8279 | #endif |
8280 | if (id->idIsCnsReloc()) |
8281 | { |
8282 | emitDispReloc(val); |
8283 | } |
8284 | else |
8285 | { |
8286 | PRINT_CONSTANT: |
8287 | // Munge any pointers if we want diff-able disassembly |
8288 | if (emitComp->opts.disDiffable) |
8289 | { |
8290 | ssize_t top14bits = (val >> 18); |
8291 | if ((top14bits != 0) && (top14bits != -1)) |
8292 | { |
8293 | val = 0xD1FFAB1E; |
8294 | } |
8295 | } |
8296 | if ((val > -1000) && (val < 1000)) |
8297 | { |
8298 | printf("%d" , val); |
8299 | } |
8300 | else if ((val > 0) || (val < -0xFFFFFF)) |
8301 | { |
8302 | printf("0x%IX" , val); |
8303 | } |
8304 | else |
8305 | { // (val < 0) |
8306 | printf("-0x%IX" , -val); |
8307 | } |
8308 | } |
8309 | break; |
8310 | |
8311 | case IF_ARD: |
8312 | case IF_AWR: |
8313 | case IF_ARW: |
8314 | |
8315 | if (ins == INS_call && id->idIsCallRegPtr()) |
8316 | { |
8317 | printf("%s" , emitRegName(id->idAddr()->iiaAddrMode.amBaseReg)); |
8318 | break; |
8319 | } |
8320 | |
8321 | printf("%s" , sstr); |
8322 | emitDispAddrMode(id, isNew); |
8323 | emitDispShift(ins); |
8324 | |
8325 | if (ins == INS_call) |
8326 | { |
8327 | assert(id->idInsFmt() == IF_ARD); |
8328 | |
8329 | /* Ignore indirect calls */ |
8330 | |
8331 | if (id->idDebugOnlyInfo()->idMemCookie == 0) |
8332 | { |
8333 | break; |
8334 | } |
8335 | |
8336 | assert(id->idDebugOnlyInfo()->idMemCookie); |
8337 | |
8338 | /* This is a virtual call */ |
8339 | |
8340 | methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); |
8341 | printf("%s" , methodName); |
8342 | } |
8343 | break; |
8344 | |
8345 | case IF_RRD_ARD: |
8346 | case IF_RWR_ARD: |
8347 | case IF_RRW_ARD: |
8348 | #ifdef _TARGET_AMD64_ |
8349 | if (ins == INS_movsxd) |
8350 | { |
8351 | printf("%s, %s" , emitRegName(id->idReg1(), EA_8BYTE), sstr); |
8352 | } |
8353 | else |
8354 | #endif |
8355 | if (ins == INS_movsx || ins == INS_movzx) |
8356 | { |
8357 | printf("%s, %s" , emitRegName(id->idReg1(), EA_PTRSIZE), sstr); |
8358 | } |
8359 | else if ((ins == INS_crc32) && (attr != EA_8BYTE)) |
8360 | { |
8361 | // The idReg1 is always 4 bytes, but the size of idReg2 can vary. |
8362 | // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx` |
8363 | printf("%s, %s" , emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); |
8364 | } |
8365 | else |
8366 | { |
8367 | printf("%s, %s" , emitRegName(id->idReg1(), attr), sstr); |
8368 | } |
8369 | emitDispAddrMode(id); |
8370 | break; |
8371 | |
8372 | case IF_RRW_ARD_CNS: |
8373 | case IF_RWR_ARD_CNS: |
8374 | { |
8375 | printf("%s, %s" , emitRegName(id->idReg1(), attr), sstr); |
8376 | emitDispAddrMode(id); |
8377 | emitGetInsAmdCns(id, &cnsVal); |
8378 | |
8379 | val = cnsVal.cnsVal; |
8380 | printf(", " ); |
8381 | |
8382 | if (cnsVal.cnsReloc) |
8383 | { |
8384 | emitDispReloc(val); |
8385 | } |
8386 | else |
8387 | { |
8388 | goto PRINT_CONSTANT; |
8389 | } |
8390 | |
8391 | break; |
8392 | } |
8393 | |
8394 | case IF_AWR_RRD_CNS: |
8395 | { |
8396 | assert(ins == INS_vextracti128 || ins == INS_vextractf128); |
8397 | // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr" |
8398 | sstr = codeGen->genSizeStr(EA_ATTR(16)); |
8399 | printf(sstr); |
8400 | emitDispAddrMode(id); |
8401 | printf(", %s" , emitRegName(id->idReg1(), attr)); |
8402 | |
8403 | emitGetInsAmdCns(id, &cnsVal); |
8404 | |
8405 | val = cnsVal.cnsVal; |
8406 | printf(", " ); |
8407 | |
8408 | if (cnsVal.cnsReloc) |
8409 | { |
8410 | emitDispReloc(val); |
8411 | } |
8412 | else |
8413 | { |
8414 | goto PRINT_CONSTANT; |
8415 | } |
8416 | |
8417 | break; |
8418 | } |
8419 | |
8420 | case IF_RWR_RRD_ARD: |
8421 | printf("%s, %s, %s" , emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); |
8422 | emitDispAddrMode(id); |
8423 | break; |
8424 | |
8425 | case IF_RWR_ARD_RRD: |
8426 | if (ins == INS_vpgatherqd || ins == INS_vgatherqps) |
8427 | { |
8428 | attr = EA_16BYTE; |
8429 | } |
8430 | sstr = codeGen->genSizeStr(EA_ATTR(4)); |
8431 | printf("%s, %s" , emitRegName(id->idReg1(), attr), sstr); |
8432 | emitDispAddrMode(id); |
8433 | printf(", %s" , emitRegName(id->idReg2(), attr)); |
8434 | break; |
8435 | |
8436 | case IF_RWR_RRD_ARD_CNS: |
8437 | { |
8438 | printf("%s, %s, %s" , emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); |
8439 | emitDispAddrMode(id); |
8440 | emitGetInsAmdCns(id, &cnsVal); |
8441 | |
8442 | val = cnsVal.cnsVal; |
8443 | printf(", " ); |
8444 | |
8445 | if (cnsVal.cnsReloc) |
8446 | { |
8447 | emitDispReloc(val); |
8448 | } |
8449 | else |
8450 | { |
8451 | goto PRINT_CONSTANT; |
8452 | } |
8453 | |
8454 | break; |
8455 | } |
8456 | |
8457 | case IF_RWR_RRD_ARD_RRD: |
8458 | { |
8459 | printf("%s, " , emitRegName(id->idReg1(), attr)); |
8460 | printf("%s, " , emitRegName(id->idReg2(), attr)); |
8461 | emitDispAddrMode(id); |
8462 | |
8463 | emitGetInsAmdCns(id, &cnsVal); |
8464 | val = (cnsVal.cnsVal >> 4) + XMMBASE; |
8465 | printf(", %s" , emitRegName((regNumber)val, attr)); |
8466 | break; |
8467 | } |
8468 | |
8469 | case IF_ARD_RRD: |
8470 | case IF_AWR_RRD: |
8471 | case IF_ARW_RRD: |
8472 | |
8473 | printf("%s" , sstr); |
8474 | emitDispAddrMode(id); |
8475 | printf(", %s" , emitRegName(id->idReg1(), attr)); |
8476 | break; |
8477 | |
8478 | case IF_AWR_RRD_RRD: |
8479 | { |
8480 | printf("%s" , sstr); |
8481 | emitDispAddrMode(id); |
8482 | printf(", %s" , emitRegName(id->idReg1(), attr)); |
8483 | printf(", %s" , emitRegName(id->idReg2(), attr)); |
8484 | break; |
8485 | } |
8486 | |
8487 | case IF_ARD_CNS: |
8488 | case IF_AWR_CNS: |
8489 | case IF_ARW_CNS: |
8490 | case IF_ARW_SHF: |
8491 | |
8492 | printf("%s" , sstr); |
8493 | emitDispAddrMode(id); |
8494 | emitGetInsAmdCns(id, &cnsVal); |
8495 | val = cnsVal.cnsVal; |
8496 | #ifdef _TARGET_AMD64_ |
8497 | // no 8-byte immediates allowed here! |
8498 | assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL)); |
8499 | #endif |
8500 | if (id->idInsFmt() == IF_ARW_SHF) |
8501 | { |
8502 | emitDispShift(ins, (BYTE)val); |
8503 | } |
8504 | else |
8505 | { |
8506 | printf(", " ); |
8507 | if (cnsVal.cnsReloc) |
8508 | { |
8509 | emitDispReloc(val); |
8510 | } |
8511 | else |
8512 | { |
8513 | goto PRINT_CONSTANT; |
8514 | } |
8515 | } |
8516 | break; |
8517 | |
8518 | case IF_SRD: |
8519 | case IF_SWR: |
8520 | case IF_SRW: |
8521 | |
8522 | printf("%s" , sstr); |
8523 | |
8524 | #if !FEATURE_FIXED_OUT_ARGS |
8525 | if (ins == INS_pop) |
8526 | emitCurStackLvl -= sizeof(int); |
8527 | #endif |
8528 | |
8529 | emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), |
8530 | id->idDebugOnlyInfo()->idVarRefOffs, asmfm); |
8531 | |
8532 | #if !FEATURE_FIXED_OUT_ARGS |
8533 | if (ins == INS_pop) |
8534 | emitCurStackLvl += sizeof(int); |
8535 | #endif |
8536 | |
8537 | emitDispShift(ins); |
8538 | break; |
8539 | |
8540 | case IF_SRD_RRD: |
8541 | case IF_SWR_RRD: |
8542 | case IF_SRW_RRD: |
8543 | |
8544 | printf("%s" , sstr); |
8545 | |
8546 | emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), |
8547 | id->idDebugOnlyInfo()->idVarRefOffs, asmfm); |
8548 | |
8549 | printf(", %s" , emitRegName(id->idReg1(), attr)); |
8550 | break; |
8551 | |
8552 | case IF_SRD_CNS: |
8553 | case IF_SWR_CNS: |
8554 | case IF_SRW_CNS: |
8555 | case IF_SRW_SHF: |
8556 | |
8557 | printf("%s" , sstr); |
8558 | |
8559 | emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), |
8560 | id->idDebugOnlyInfo()->idVarRefOffs, asmfm); |
8561 | |
8562 | emitGetInsCns(id, &cnsVal); |
8563 | val = cnsVal.cnsVal; |
8564 | #ifdef _TARGET_AMD64_ |
8565 | // no 8-byte immediates allowed here! |
8566 | assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL)); |
8567 | #endif |
8568 | if (id->idInsFmt() == IF_SRW_SHF) |
8569 | { |
8570 | emitDispShift(ins, (BYTE)val); |
8571 | } |
8572 | else |
8573 | { |
8574 | printf(", " ); |
8575 | if (cnsVal.cnsReloc) |
8576 | { |
8577 | emitDispReloc(val); |
8578 | } |
8579 | else |
8580 | { |
8581 | goto PRINT_CONSTANT; |
8582 | } |
8583 | } |
8584 | break; |
8585 | |
8586 | case IF_RRD_SRD: |
8587 | case IF_RWR_SRD: |
8588 | case IF_RRW_SRD: |
8589 | #ifdef _TARGET_AMD64_ |
8590 | if (ins == INS_movsxd) |
8591 | { |
8592 | printf("%s, %s" , emitRegName(id->idReg1(), EA_8BYTE), sstr); |
8593 | } |
8594 | else |
8595 | #endif |
8596 | if (ins == INS_movsx || ins == INS_movzx) |
8597 | { |
8598 | printf("%s, %s" , emitRegName(id->idReg1(), EA_PTRSIZE), sstr); |
8599 | } |
8600 | else if ((ins == INS_crc32) && (attr != EA_8BYTE)) |
8601 | { |
8602 | // The idReg1 is always 4 bytes, but the size of idReg2 can vary. |
8603 | // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx` |
8604 | printf("%s, %s" , emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); |
8605 | } |
8606 | else |
8607 | { |
8608 | printf("%s, %s" , emitRegName(id->idReg1(), attr), sstr); |
8609 | } |
8610 | |
8611 | emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), |
8612 | id->idDebugOnlyInfo()->idVarRefOffs, asmfm); |
8613 | |
8614 | break; |
8615 | |
8616 | case IF_RRW_SRD_CNS: |
8617 | case IF_RWR_SRD_CNS: |
8618 | { |
8619 | printf("%s, %s" , emitRegName(id->idReg1(), attr), sstr); |
8620 | emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), |
8621 | id->idDebugOnlyInfo()->idVarRefOffs, asmfm); |
8622 | emitGetInsCns(id, &cnsVal); |
8623 | |
8624 | val = cnsVal.cnsVal; |
8625 | printf(", " ); |
8626 | |
8627 | if (cnsVal.cnsReloc) |
8628 | { |
8629 | emitDispReloc(val); |
8630 | } |
8631 | else |
8632 | { |
8633 | goto PRINT_CONSTANT; |
8634 | } |
8635 | break; |
8636 | } |
8637 | |
8638 | case IF_RWR_RRD_SRD: |
8639 | printf("%s, %s, %s" , emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); |
8640 | emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), |
8641 | id->idDebugOnlyInfo()->idVarRefOffs, asmfm); |
8642 | break; |
8643 | |
8644 | case IF_RWR_RRD_SRD_CNS: |
8645 | { |
8646 | printf("%s, %s, %s" , emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); |
8647 | emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), |
8648 | id->idDebugOnlyInfo()->idVarRefOffs, asmfm); |
8649 | emitGetInsCns(id, &cnsVal); |
8650 | |
8651 | val = cnsVal.cnsVal; |
8652 | printf(", " ); |
8653 | |
8654 | if (cnsVal.cnsReloc) |
8655 | { |
8656 | emitDispReloc(val); |
8657 | } |
8658 | else |
8659 | { |
8660 | goto PRINT_CONSTANT; |
8661 | } |
8662 | break; |
8663 | } |
8664 | |
8665 | case IF_RWR_RRD_SRD_RRD: |
8666 | { |
8667 | printf("%s, " , emitRegName(id->idReg1(), attr)); |
8668 | printf("%s, " , emitRegName(id->idReg2(), attr)); |
8669 | emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), |
8670 | id->idDebugOnlyInfo()->idVarRefOffs, asmfm); |
8671 | |
8672 | emitGetInsCns(id, &cnsVal); |
8673 | val = (cnsVal.cnsVal >> 4) + XMMBASE; |
8674 | printf(", %s" , emitRegName((regNumber)val, attr)); |
8675 | break; |
8676 | } |
8677 | |
8678 | case IF_RRD_RRD: |
8679 | case IF_RWR_RRD: |
8680 | case IF_RRW_RRD: |
8681 | if (ins == INS_mov_i2xmm) |
8682 | { |
8683 | printf("%s, %s" , emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr)); |
8684 | } |
8685 | else if (ins == INS_mov_xmm2i) |
8686 | { |
8687 | printf("%s, %s" , emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), EA_16BYTE)); |
8688 | } |
8689 | else if (ins == INS_pmovmskb) |
8690 | { |
8691 | printf("%s, %s" , emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); |
8692 | } |
8693 | else if ((ins == INS_cvtsi2ss) || (ins == INS_cvtsi2sd)) |
8694 | { |
8695 | printf(" %s, %s" , emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr)); |
8696 | } |
8697 | else if ((ins == INS_cvttsd2si) || (ins == INS_cvtss2si) || (ins == INS_cvtsd2si) || (ins == INS_cvttss2si)) |
8698 | { |
8699 | printf(" %s, %s" , emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); |
8700 | } |
8701 | #ifdef _TARGET_AMD64_ |
8702 | else if (ins == INS_movsxd) |
8703 | { |
8704 | printf("%s, %s" , emitRegName(id->idReg1(), EA_8BYTE), emitRegName(id->idReg2(), EA_4BYTE)); |
8705 | } |
8706 | #endif // _TARGET_AMD64_ |
8707 | else if (ins == INS_movsx || ins == INS_movzx) |
8708 | { |
8709 | printf("%s, %s" , emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr)); |
8710 | } |
8711 | else if (ins == INS_bt) |
8712 | { |
8713 | // INS_bt operands are reversed. Display them in the normal order. |
8714 | printf("%s, %s" , emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), attr)); |
8715 | } |
8716 | #ifdef FEATURE_HW_INTRINSICS |
8717 | else if (ins == INS_crc32 && attr != EA_8BYTE) |
8718 | { |
8719 | // The idReg1 is always 4 bytes, but the size of idReg2 can vary. |
8720 | // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx` |
8721 | printf("%s, %s" , emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); |
8722 | } |
8723 | #endif // FEATURE_HW_INTRINSICS |
8724 | else |
8725 | { |
8726 | printf("%s, %s" , emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr)); |
8727 | } |
8728 | break; |
8729 | |
8730 | case IF_RRW_RRW: |
8731 | assert(ins == INS_xchg); |
8732 | printf("%s," , emitRegName(id->idReg1(), attr)); |
8733 | printf(" %s" , emitRegName(id->idReg2(), attr)); |
8734 | break; |
8735 | |
8736 | case IF_RWR_RRD_RRD: |
8737 | { |
8738 | assert(IsAVXInstruction(ins)); |
8739 | assert(IsThreeOperandAVXInstruction(ins)); |
8740 | regNumber reg2 = id->idReg2(); |
8741 | regNumber reg3 = id->idReg3(); |
8742 | if (ins == INS_bextr || ins == INS_bzhi) |
8743 | { |
8744 | // BMI bextr and bzhi encodes the reg2 in VEX.vvvv and reg3 in modRM, |
8745 | // which is different from most of other instructions |
8746 | regNumber tmp = reg2; |
8747 | reg2 = reg3; |
8748 | reg3 = tmp; |
8749 | } |
8750 | printf("%s, " , emitRegName(id->idReg1(), attr)); |
8751 | printf("%s, " , emitRegName(reg2, attr)); |
8752 | printf("%s" , emitRegName(reg3, attr)); |
8753 | break; |
8754 | } |
8755 | |
8756 | case IF_RWR_RRD_RRD_CNS: |
8757 | assert(IsAVXInstruction(ins)); |
8758 | assert(IsThreeOperandAVXInstruction(ins)); |
8759 | printf("%s, " , emitRegName(id->idReg1(), attr)); |
8760 | printf("%s, " , emitRegName(id->idReg2(), attr)); |
8761 | printf("%s, " , emitRegName(id->idReg3(), attr)); |
8762 | val = emitGetInsSC(id); |
8763 | goto PRINT_CONSTANT; |
8764 | break; |
8765 | case IF_RWR_RRD_RRD_RRD: |
8766 | assert(IsAVXOnlyInstruction(ins)); |
8767 | assert(UseVEXEncoding()); |
8768 | printf("%s, " , emitRegName(id->idReg1(), attr)); |
8769 | printf("%s, " , emitRegName(id->idReg2(), attr)); |
8770 | printf("%s, " , emitRegName(id->idReg3(), attr)); |
8771 | printf("%s" , emitRegName(id->idReg4(), attr)); |
8772 | break; |
8773 | case IF_RRW_RRW_CNS: |
8774 | printf("%s," , emitRegName(id->idReg1(), attr)); |
8775 | printf(" %s" , emitRegName(id->idReg2(), attr)); |
8776 | val = emitGetInsSC(id); |
8777 | #ifdef _TARGET_AMD64_ |
8778 | // no 8-byte immediates allowed here! |
8779 | assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL)); |
8780 | #endif |
8781 | printf(", " ); |
8782 | if (id->idIsCnsReloc()) |
8783 | { |
8784 | emitDispReloc(val); |
8785 | } |
8786 | else |
8787 | { |
8788 | goto PRINT_CONSTANT; |
8789 | } |
8790 | break; |
8791 | |
8792 | case IF_RRD: |
8793 | case IF_RWR: |
8794 | case IF_RRW: |
8795 | printf("%s" , emitRegName(id->idReg1(), attr)); |
8796 | emitDispShift(ins); |
8797 | break; |
8798 | |
8799 | case IF_RRW_SHF: |
8800 | printf("%s" , emitRegName(id->idReg1(), attr)); |
8801 | emitDispShift(ins, (BYTE)emitGetInsSC(id)); |
8802 | break; |
8803 | |
8804 | case IF_RRD_MRD: |
8805 | case IF_RWR_MRD: |
8806 | case IF_RRW_MRD: |
8807 | |
8808 | if (ins == INS_movsx || ins == INS_movzx) |
8809 | { |
8810 | attr = EA_PTRSIZE; |
8811 | } |
8812 | #ifdef _TARGET_AMD64_ |
8813 | else if (ins == INS_movsxd) |
8814 | { |
8815 | attr = EA_PTRSIZE; |
8816 | } |
8817 | #endif |
8818 | else if ((ins == INS_crc32) && (attr != EA_8BYTE)) |
8819 | { |
8820 | // The idReg1 is always 4 bytes, but the size of idReg2 can vary. |
8821 | // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx` |
8822 | printf("%s, %s" , emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); |
8823 | } |
8824 | printf("%s, %s" , emitRegName(id->idReg1(), attr), sstr); |
8825 | offs = emitGetInsDsp(id); |
8826 | emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); |
8827 | break; |
8828 | |
8829 | case IF_RRW_MRD_CNS: |
8830 | case IF_RWR_MRD_CNS: |
8831 | { |
8832 | printf("%s, %s" , emitRegName(id->idReg1(), attr), sstr); |
8833 | offs = emitGetInsDsp(id); |
8834 | emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); |
8835 | emitGetInsDcmCns(id, &cnsVal); |
8836 | |
8837 | val = cnsVal.cnsVal; |
8838 | printf(", " ); |
8839 | |
8840 | if (cnsVal.cnsReloc) |
8841 | { |
8842 | emitDispReloc(val); |
8843 | } |
8844 | else |
8845 | { |
8846 | goto PRINT_CONSTANT; |
8847 | } |
8848 | break; |
8849 | } |
8850 | |
8851 | case IF_MWR_RRD_CNS: |
8852 | { |
8853 | assert(ins == INS_vextracti128 || ins == INS_vextractf128); |
8854 | // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr" |
8855 | sstr = codeGen->genSizeStr(EA_ATTR(16)); |
8856 | printf(sstr); |
8857 | offs = emitGetInsDsp(id); |
8858 | emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); |
8859 | printf(", %s" , emitRegName(id->idReg1(), attr)); |
8860 | emitGetInsDcmCns(id, &cnsVal); |
8861 | |
8862 | val = cnsVal.cnsVal; |
8863 | printf(", " ); |
8864 | |
8865 | if (cnsVal.cnsReloc) |
8866 | { |
8867 | emitDispReloc(val); |
8868 | } |
8869 | else |
8870 | { |
8871 | goto PRINT_CONSTANT; |
8872 | } |
8873 | |
8874 | break; |
8875 | } |
8876 | |
8877 | case IF_RWR_RRD_MRD: |
8878 | printf("%s, %s, %s" , emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); |
8879 | offs = emitGetInsDsp(id); |
8880 | emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); |
8881 | break; |
8882 | |
8883 | case IF_RWR_RRD_MRD_CNS: |
8884 | { |
8885 | printf("%s, %s, %s" , emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); |
8886 | offs = emitGetInsDsp(id); |
8887 | emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); |
8888 | emitGetInsDcmCns(id, &cnsVal); |
8889 | |
8890 | val = cnsVal.cnsVal; |
8891 | printf(", " ); |
8892 | |
8893 | if (cnsVal.cnsReloc) |
8894 | { |
8895 | emitDispReloc(val); |
8896 | } |
8897 | else |
8898 | { |
8899 | goto PRINT_CONSTANT; |
8900 | } |
8901 | break; |
8902 | } |
8903 | |
8904 | case IF_RWR_RRD_MRD_RRD: |
8905 | { |
8906 | printf("%s, " , emitRegName(id->idReg1(), attr)); |
8907 | printf("%s, " , emitRegName(id->idReg2(), attr)); |
8908 | |
8909 | offs = emitGetInsDsp(id); |
8910 | emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); |
8911 | |
8912 | emitGetInsDcmCns(id, &cnsVal); |
8913 | val = (cnsVal.cnsVal >> 4) + XMMBASE; |
8914 | printf(", %s" , emitRegName((regNumber)val, attr)); |
8915 | break; |
8916 | } |
8917 | |
8918 | case IF_RWR_MRD_OFF: |
8919 | |
8920 | printf("%s, %s" , emitRegName(id->idReg1(), attr), "offset" ); |
8921 | offs = emitGetInsDsp(id); |
8922 | emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); |
8923 | break; |
8924 | |
8925 | case IF_MRD_RRD: |
8926 | case IF_MWR_RRD: |
8927 | case IF_MRW_RRD: |
8928 | |
8929 | printf("%s" , sstr); |
8930 | offs = emitGetInsDsp(id); |
8931 | emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); |
8932 | printf(", %s" , emitRegName(id->idReg1(), attr)); |
8933 | break; |
8934 | |
8935 | case IF_MRD_CNS: |
8936 | case IF_MWR_CNS: |
8937 | case IF_MRW_CNS: |
8938 | case IF_MRW_SHF: |
8939 | |
8940 | printf("%s" , sstr); |
8941 | offs = emitGetInsDsp(id); |
8942 | emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); |
8943 | emitGetInsDcmCns(id, &cnsVal); |
8944 | val = cnsVal.cnsVal; |
8945 | #ifdef _TARGET_AMD64_ |
8946 | // no 8-byte immediates allowed here! |
8947 | assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL)); |
8948 | #endif |
8949 | if (cnsVal.cnsReloc) |
8950 | { |
8951 | emitDispReloc(val); |
8952 | } |
8953 | else if (id->idInsFmt() == IF_MRW_SHF) |
8954 | { |
8955 | emitDispShift(ins, (BYTE)val); |
8956 | } |
8957 | else |
8958 | { |
8959 | printf(", " ); |
8960 | goto PRINT_CONSTANT; |
8961 | } |
8962 | break; |
8963 | |
8964 | case IF_MRD: |
8965 | case IF_MWR: |
8966 | case IF_MRW: |
8967 | |
8968 | printf("%s" , sstr); |
8969 | offs = emitGetInsDsp(id); |
8970 | emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); |
8971 | emitDispShift(ins); |
8972 | break; |
8973 | |
8974 | case IF_MRD_OFF: |
8975 | |
8976 | printf("offset " ); |
8977 | offs = emitGetInsDsp(id); |
8978 | emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); |
8979 | break; |
8980 | |
8981 | case IF_RRD_CNS: |
8982 | case IF_RWR_CNS: |
8983 | case IF_RRW_CNS: |
8984 | printf("%s, " , emitRegName(id->idReg1(), attr)); |
8985 | val = emitGetInsSC(id); |
8986 | if (id->idIsCnsReloc()) |
8987 | { |
8988 | emitDispReloc(val); |
8989 | } |
8990 | else |
8991 | { |
8992 | goto PRINT_CONSTANT; |
8993 | } |
8994 | break; |
8995 | |
8996 | case IF_LABEL: |
8997 | case IF_RWR_LABEL: |
8998 | case IF_SWR_LABEL: |
8999 | |
9000 | if (ins == INS_lea) |
9001 | { |
9002 | printf("%s, " , emitRegName(id->idReg1(), attr)); |
9003 | } |
9004 | else if (ins == INS_mov) |
9005 | { |
9006 | /* mov dword ptr [frame.callSiteReturnAddress], label */ |
9007 | assert(id->idInsFmt() == IF_SWR_LABEL); |
9008 | instrDescLbl* idlbl = (instrDescLbl*)id; |
9009 | |
9010 | emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), 0, asmfm); |
9011 | |
9012 | printf(", " ); |
9013 | } |
9014 | |
9015 | if (((instrDescJmp*)id)->idjShort) |
9016 | { |
9017 | printf("SHORT " ); |
9018 | } |
9019 | |
9020 | if (id->idIsBound()) |
9021 | { |
9022 | printf("G_M%03u_IG%02u" , Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum); |
9023 | } |
9024 | else |
9025 | { |
9026 | printf("L_M%03u_" FMT_BB, Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum); |
9027 | } |
9028 | break; |
9029 | |
9030 | case IF_METHOD: |
9031 | case IF_METHPTR: |
9032 | if (id->idIsCallAddr()) |
9033 | { |
9034 | offs = (ssize_t)id->idAddr()->iiaAddr; |
9035 | methodName = "" ; |
9036 | } |
9037 | else |
9038 | { |
9039 | offs = 0; |
9040 | methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); |
9041 | } |
9042 | |
9043 | if (id->idInsFmt() == IF_METHPTR) |
9044 | { |
9045 | printf("[" ); |
9046 | } |
9047 | |
9048 | if (offs) |
9049 | { |
9050 | if (id->idIsDspReloc()) |
9051 | { |
9052 | printf("reloc " ); |
9053 | } |
9054 | printf("%08X" , offs); |
9055 | } |
9056 | else |
9057 | { |
9058 | printf("%s" , methodName); |
9059 | } |
9060 | |
9061 | if (id->idInsFmt() == IF_METHPTR) |
9062 | { |
9063 | printf("]" ); |
9064 | } |
9065 | |
9066 | break; |
9067 | |
9068 | case IF_NONE: |
9069 | break; |
9070 | |
9071 | default: |
9072 | printf("unexpected format %s" , emitIfName(id->idInsFmt())); |
9073 | assert(!"unexpectedFormat" ); |
9074 | break; |
9075 | } |
9076 | |
9077 | if (sz != 0 && sz != id->idCodeSize() && (!asmfm || emitComp->verbose)) |
9078 | { |
9079 | // Code size in the instrDesc is different from the actual code size we've been given! |
9080 | printf(" (ECS:%d, ACS:%d)" , id->idCodeSize(), sz); |
9081 | } |
9082 | |
9083 | printf("\n" ); |
9084 | } |
9085 | |
9086 | /*****************************************************************************/ |
9087 | #endif |
9088 | |
9089 | /***************************************************************************** |
9090 | * |
9091 | * Output nBytes bytes of NOP instructions |
9092 | */ |
9093 | |
9094 | static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes) |
9095 | { |
9096 | assert(nBytes <= 15); |
9097 | |
9098 | #ifndef _TARGET_AMD64_ |
9099 | // TODO-X86-CQ: when VIA C3 CPU's are out of circulation, switch to the |
9100 | // more efficient real NOP: 0x0F 0x1F +modR/M |
9101 | // Also can't use AMD recommended, multiple size prefixes (i.e. 0x66 0x66 0x90 for 3 byte NOP) |
9102 | // because debugger and msdis don't like it, so maybe VIA doesn't either |
9103 | // So instead just stick to repeating single byte nops |
9104 | |
9105 | switch (nBytes) |
9106 | { |
9107 | case 15: |
9108 | *dst++ = 0x90; |
9109 | __fallthrough; |
9110 | case 14: |
9111 | *dst++ = 0x90; |
9112 | __fallthrough; |
9113 | case 13: |
9114 | *dst++ = 0x90; |
9115 | __fallthrough; |
9116 | case 12: |
9117 | *dst++ = 0x90; |
9118 | __fallthrough; |
9119 | case 11: |
9120 | *dst++ = 0x90; |
9121 | __fallthrough; |
9122 | case 10: |
9123 | *dst++ = 0x90; |
9124 | __fallthrough; |
9125 | case 9: |
9126 | *dst++ = 0x90; |
9127 | __fallthrough; |
9128 | case 8: |
9129 | *dst++ = 0x90; |
9130 | __fallthrough; |
9131 | case 7: |
9132 | *dst++ = 0x90; |
9133 | __fallthrough; |
9134 | case 6: |
9135 | *dst++ = 0x90; |
9136 | __fallthrough; |
9137 | case 5: |
9138 | *dst++ = 0x90; |
9139 | __fallthrough; |
9140 | case 4: |
9141 | *dst++ = 0x90; |
9142 | __fallthrough; |
9143 | case 3: |
9144 | *dst++ = 0x90; |
9145 | __fallthrough; |
9146 | case 2: |
9147 | *dst++ = 0x90; |
9148 | __fallthrough; |
9149 | case 1: |
9150 | *dst++ = 0x90; |
9151 | break; |
9152 | case 0: |
9153 | break; |
9154 | } |
9155 | #else // _TARGET_AMD64_ |
9156 | switch (nBytes) |
9157 | { |
9158 | case 2: |
9159 | *dst++ = 0x66; |
9160 | __fallthrough; |
9161 | case 1: |
9162 | *dst++ = 0x90; |
9163 | break; |
9164 | case 0: |
9165 | break; |
9166 | case 3: |
9167 | *dst++ = 0x0F; |
9168 | *dst++ = 0x1F; |
9169 | *dst++ = 0x00; |
9170 | break; |
9171 | case 4: |
9172 | *dst++ = 0x0F; |
9173 | *dst++ = 0x1F; |
9174 | *dst++ = 0x40; |
9175 | *dst++ = 0x00; |
9176 | break; |
9177 | case 6: |
9178 | *dst++ = 0x66; |
9179 | __fallthrough; |
9180 | case 5: |
9181 | *dst++ = 0x0F; |
9182 | *dst++ = 0x1F; |
9183 | *dst++ = 0x44; |
9184 | *dst++ = 0x00; |
9185 | *dst++ = 0x00; |
9186 | break; |
9187 | case 7: |
9188 | *dst++ = 0x0F; |
9189 | *dst++ = 0x1F; |
9190 | *dst++ = 0x80; |
9191 | *dst++ = 0x00; |
9192 | *dst++ = 0x00; |
9193 | *dst++ = 0x00; |
9194 | *dst++ = 0x00; |
9195 | break; |
9196 | case 15: |
9197 | // More than 3 prefixes is slower than just 2 NOPs |
9198 | dst = emitOutputNOP(emitOutputNOP(dst, 7), 8); |
9199 | break; |
9200 | case 14: |
9201 | // More than 3 prefixes is slower than just 2 NOPs |
9202 | dst = emitOutputNOP(emitOutputNOP(dst, 7), 7); |
9203 | break; |
9204 | case 13: |
9205 | // More than 3 prefixes is slower than just 2 NOPs |
9206 | dst = emitOutputNOP(emitOutputNOP(dst, 5), 8); |
9207 | break; |
9208 | case 12: |
9209 | // More than 3 prefixes is slower than just 2 NOPs |
9210 | dst = emitOutputNOP(emitOutputNOP(dst, 4), 8); |
9211 | break; |
9212 | case 11: |
9213 | *dst++ = 0x66; |
9214 | __fallthrough; |
9215 | case 10: |
9216 | *dst++ = 0x66; |
9217 | __fallthrough; |
9218 | case 9: |
9219 | *dst++ = 0x66; |
9220 | __fallthrough; |
9221 | case 8: |
9222 | *dst++ = 0x0F; |
9223 | *dst++ = 0x1F; |
9224 | *dst++ = 0x84; |
9225 | *dst++ = 0x00; |
9226 | *dst++ = 0x00; |
9227 | *dst++ = 0x00; |
9228 | *dst++ = 0x00; |
9229 | *dst++ = 0x00; |
9230 | break; |
9231 | } |
9232 | #endif // _TARGET_AMD64_ |
9233 | |
9234 | return dst; |
9235 | } |
9236 | |
9237 | /***************************************************************************** |
9238 | * |
9239 | * Output an instruction involving an address mode. |
9240 | */ |
9241 | |
9242 | BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) |
9243 | { |
9244 | regNumber reg; |
9245 | regNumber rgx; |
9246 | ssize_t dsp; |
9247 | bool dspInByte; |
9248 | bool dspIsZero; |
9249 | |
9250 | instruction ins = id->idIns(); |
9251 | emitAttr size = id->idOpSize(); |
9252 | size_t opsz = EA_SIZE_IN_BYTES(size); |
9253 | |
9254 | // Get the base/index registers |
9255 | reg = id->idAddr()->iiaAddrMode.amBaseReg; |
9256 | rgx = id->idAddr()->iiaAddrMode.amIndxReg; |
9257 | |
9258 | // For INS_call the instruction size is actually the return value size |
9259 | if (ins == INS_call) |
9260 | { |
9261 | // Special case: call via a register |
9262 | if (id->idIsCallRegPtr()) |
9263 | { |
9264 | code_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call)); |
9265 | |
9266 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode); |
9267 | dst += emitOutputWord(dst, opcode); |
9268 | goto DONE; |
9269 | } |
9270 | |
9271 | // The displacement field is in an unusual place for calls |
9272 | dsp = emitGetInsCIdisp(id); |
9273 | |
9274 | #ifdef _TARGET_AMD64_ |
9275 | |
9276 | // Compute the REX prefix if it exists |
9277 | if (IsExtendedReg(reg, EA_PTRSIZE)) |
9278 | { |
9279 | insEncodeReg012(ins, reg, EA_PTRSIZE, &code); |
9280 | // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. |
9281 | reg = (regNumber)RegEncoding(reg); |
9282 | } |
9283 | |
9284 | if (IsExtendedReg(rgx, EA_PTRSIZE)) |
9285 | { |
9286 | insEncodeRegSIB(ins, rgx, &code); |
9287 | // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. |
9288 | rgx = (regNumber)RegEncoding(rgx); |
9289 | } |
9290 | |
9291 | // And emit the REX prefix |
9292 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
9293 | |
9294 | #endif // _TARGET_AMD64_ |
9295 | |
9296 | goto GOT_DSP; |
9297 | } |
9298 | |
9299 | // Is there a large constant operand? |
9300 | if (addc && (size > EA_1BYTE)) |
9301 | { |
9302 | ssize_t cval = addc->cnsVal; |
9303 | |
9304 | // Does the constant fit in a byte? |
9305 | if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test) |
9306 | { |
9307 | if (id->idInsFmt() != IF_ARW_SHF) |
9308 | { |
9309 | code |= 2; |
9310 | } |
9311 | |
9312 | opsz = 1; |
9313 | } |
9314 | } |
9315 | |
9316 | // Emit VEX prefix if required |
9317 | // There are some callers who already add VEX prefix and call this routine. |
9318 | // Therefore, add VEX prefix is one is not already present. |
9319 | code = AddVexPrefixIfNeededAndNotPresent(ins, code, size); |
9320 | |
9321 | // For this format, moves do not support a third operand, so we only need to handle the binary ops. |
9322 | if (TakesVexPrefix(ins)) |
9323 | { |
9324 | if (IsDstDstSrcAVXInstruction(ins)) |
9325 | { |
9326 | regNumber src1 = REG_NA; |
9327 | |
9328 | switch (id->idInsFmt()) |
9329 | { |
9330 | case IF_RWR_RRD_ARD: |
9331 | case IF_RWR_ARD_RRD: |
9332 | case IF_RWR_RRD_ARD_CNS: |
9333 | case IF_RWR_RRD_ARD_RRD: |
9334 | { |
9335 | src1 = id->idReg2(); |
9336 | break; |
9337 | } |
9338 | |
9339 | default: |
9340 | { |
9341 | src1 = id->idReg1(); |
9342 | break; |
9343 | } |
9344 | } |
9345 | |
9346 | // encode source operand reg in 'vvvv' bits in 1's complement form |
9347 | code = insEncodeReg3456(ins, src1, size, code); |
9348 | } |
9349 | else if (IsDstSrcSrcAVXInstruction(ins)) |
9350 | { |
9351 | code = insEncodeReg3456(ins, id->idReg2(), size, code); |
9352 | } |
9353 | } |
9354 | |
9355 | // Emit the REX prefix if required |
9356 | if (TakesRexWPrefix(ins, size)) |
9357 | { |
9358 | code = AddRexWPrefix(ins, code); |
9359 | } |
9360 | |
9361 | if (IsExtendedReg(reg, EA_PTRSIZE)) |
9362 | { |
9363 | insEncodeReg012(ins, reg, EA_PTRSIZE, &code); |
9364 | // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. |
9365 | reg = (regNumber)RegEncoding(reg); |
9366 | } |
9367 | |
9368 | if (IsExtendedReg(rgx, EA_PTRSIZE)) |
9369 | { |
9370 | insEncodeRegSIB(ins, rgx, &code); |
9371 | // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. |
9372 | rgx = (regNumber)RegEncoding(rgx); |
9373 | } |
9374 | |
9375 | // Special case emitting AVX instructions |
9376 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
9377 | { |
9378 | if ((ins == INS_crc32) && (size > EA_1BYTE)) |
9379 | { |
9380 | code |= 0x0100; |
9381 | |
9382 | if (size == EA_2BYTE) |
9383 | { |
9384 | dst += emitOutputByte(dst, 0x66); |
9385 | } |
9386 | } |
9387 | |
9388 | regNumber reg345 = REG_NA; |
9389 | if (IsBMIInstruction(ins)) |
9390 | { |
9391 | reg345 = getBmiRegNumber(ins); |
9392 | } |
9393 | if (reg345 == REG_NA) |
9394 | { |
9395 | switch (id->idInsFmt()) |
9396 | { |
9397 | case IF_AWR_RRD_RRD: |
9398 | { |
9399 | reg345 = id->idReg2(); |
9400 | break; |
9401 | } |
9402 | |
9403 | default: |
9404 | { |
9405 | reg345 = id->idReg1(); |
9406 | break; |
9407 | } |
9408 | } |
9409 | } |
9410 | unsigned regcode = insEncodeReg345(ins, reg345, size, &code); |
9411 | |
9412 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
9413 | |
9414 | if (UseVEXEncoding() && (ins != INS_crc32)) |
9415 | { |
9416 | // Emit last opcode byte |
9417 | // TODO-XArch-CQ: Right now support 4-byte opcode instructions only |
9418 | assert((code & 0xFF) == 0); |
9419 | dst += emitOutputByte(dst, (code >> 8) & 0xFF); |
9420 | } |
9421 | else |
9422 | { |
9423 | dst += emitOutputWord(dst, code >> 16); |
9424 | dst += emitOutputWord(dst, code & 0xFFFF); |
9425 | } |
9426 | |
9427 | code = regcode; |
9428 | } |
9429 | // Is this a 'big' opcode? |
9430 | else if (code & 0xFF000000) |
9431 | { |
9432 | // Output the REX prefix |
9433 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
9434 | |
9435 | // Output the highest word of the opcode |
9436 | // We need to check again as in case of AVX instructions leading opcode bytes are stripped off |
9437 | // and encoded as part of VEX prefix. |
9438 | if (code & 0xFF000000) |
9439 | { |
9440 | dst += emitOutputWord(dst, code >> 16); |
9441 | code &= 0x0000FFFF; |
9442 | } |
9443 | } |
9444 | else if (code & 0x00FF0000) |
9445 | { |
9446 | // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix. |
9447 | assert(ins != INS_bt); |
9448 | |
9449 | // Output the REX prefix |
9450 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
9451 | |
9452 | // Output the highest byte of the opcode |
9453 | if (code & 0x00FF0000) |
9454 | { |
9455 | dst += emitOutputByte(dst, code >> 16); |
9456 | code &= 0x0000FFFF; |
9457 | } |
9458 | |
9459 | // Use the large version if this is not a byte. This trick will not |
9460 | // work in case of SSE2 and AVX instructions. |
9461 | if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSEInstruction(ins) && !IsAVXInstruction(ins)) |
9462 | { |
9463 | code++; |
9464 | } |
9465 | } |
9466 | else if (CodeGen::instIsFP(ins)) |
9467 | { |
9468 | assert(size == EA_4BYTE || size == EA_8BYTE); |
9469 | if (size == EA_8BYTE) |
9470 | { |
9471 | code += 4; |
9472 | } |
9473 | } |
9474 | else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins)) |
9475 | { |
9476 | /* Is the operand size larger than a byte? */ |
9477 | |
9478 | switch (size) |
9479 | { |
9480 | case EA_1BYTE: |
9481 | break; |
9482 | |
9483 | case EA_2BYTE: |
9484 | |
9485 | /* Output a size prefix for a 16-bit operand */ |
9486 | |
9487 | dst += emitOutputByte(dst, 0x66); |
9488 | |
9489 | __fallthrough; |
9490 | |
9491 | case EA_4BYTE: |
9492 | #ifdef _TARGET_AMD64_ |
9493 | case EA_8BYTE: |
9494 | #endif |
9495 | |
9496 | /* Set the 'w' bit to get the large version */ |
9497 | |
9498 | code |= 0x1; |
9499 | break; |
9500 | |
9501 | #ifdef _TARGET_X86_ |
9502 | case EA_8BYTE: |
9503 | |
9504 | /* Double operand - set the appropriate bit */ |
9505 | |
9506 | code |= 0x04; |
9507 | break; |
9508 | |
9509 | #endif // _TARGET_X86_ |
9510 | |
9511 | default: |
9512 | NO_WAY("unexpected size" ); |
9513 | break; |
9514 | } |
9515 | } |
9516 | |
9517 | // Output the REX prefix |
9518 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
9519 | |
9520 | // Get the displacement value |
9521 | dsp = emitGetInsAmdAny(id); |
9522 | |
9523 | GOT_DSP: |
9524 | |
9525 | dspInByte = ((signed char)dsp == (ssize_t)dsp); |
9526 | dspIsZero = (dsp == 0); |
9527 | |
9528 | if (id->idIsDspReloc()) |
9529 | { |
9530 | dspInByte = false; // relocs can't be placed in a byte |
9531 | } |
9532 | |
9533 | // Is there a [scaled] index component? |
9534 | if (rgx == REG_NA) |
9535 | { |
9536 | // The address is of the form "[reg+disp]" |
9537 | switch (reg) |
9538 | { |
9539 | case REG_NA: |
9540 | { |
9541 | if (id->idIsDspReloc()) |
9542 | { |
9543 | INT32 addlDelta = 0; |
9544 | |
9545 | // The address is of the form "[disp]" |
9546 | // On x86 - disp is relative to zero |
9547 | // On Amd64 - disp is relative to RIP |
9548 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
9549 | { |
9550 | dst += emitOutputByte(dst, code | 0x05); |
9551 | } |
9552 | else |
9553 | { |
9554 | dst += emitOutputWord(dst, code | 0x0500); |
9555 | } |
9556 | |
9557 | if (addc) |
9558 | { |
9559 | // It is of the form "ins [disp], immed" |
9560 | // For emitting relocation, we also need to take into account of the |
9561 | // additional bytes of code emitted for immed val. |
9562 | |
9563 | ssize_t cval = addc->cnsVal; |
9564 | |
9565 | #ifdef _TARGET_AMD64_ |
9566 | // all these opcodes only take a sign-extended 4-byte immediate |
9567 | noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); |
9568 | #else //_TARGET_X86_ |
9569 | noway_assert(opsz <= 4); |
9570 | #endif //_TARGET_X86_ |
9571 | |
9572 | switch (opsz) |
9573 | { |
9574 | case 0: |
9575 | case 4: |
9576 | case 8: |
9577 | addlDelta = -4; |
9578 | break; |
9579 | case 2: |
9580 | addlDelta = -2; |
9581 | break; |
9582 | case 1: |
9583 | addlDelta = -1; |
9584 | break; |
9585 | |
9586 | default: |
9587 | assert(!"unexpected operand size" ); |
9588 | unreached(); |
9589 | } |
9590 | } |
9591 | |
9592 | #ifdef _TARGET_AMD64_ |
9593 | // We emit zero on Amd64, to avoid the assert in emitOutputLong() |
9594 | dst += emitOutputLong(dst, 0); |
9595 | #else |
9596 | dst += emitOutputLong(dst, dsp); |
9597 | #endif |
9598 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32, 0, |
9599 | addlDelta); |
9600 | } |
9601 | else |
9602 | { |
9603 | #ifdef _TARGET_X86_ |
9604 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
9605 | { |
9606 | dst += emitOutputByte(dst, code | 0x05); |
9607 | } |
9608 | else |
9609 | { |
9610 | dst += emitOutputWord(dst, code | 0x0500); |
9611 | } |
9612 | #else //_TARGET_AMD64_ |
9613 | // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero. |
9614 | // This addr mode should never be used while generating relocatable ngen code nor if |
9615 | // the addr can be encoded as pc-relative address. |
9616 | noway_assert(!emitComp->opts.compReloc); |
9617 | noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32); |
9618 | noway_assert((int)dsp == dsp); |
9619 | |
9620 | // This requires, specifying a SIB byte after ModRM byte. |
9621 | if (EncodedBySSE38orSSE3A(ins)) |
9622 | { |
9623 | dst += emitOutputByte(dst, code | 0x04); |
9624 | } |
9625 | else |
9626 | { |
9627 | dst += emitOutputWord(dst, code | 0x0400); |
9628 | } |
9629 | dst += emitOutputByte(dst, 0x25); |
9630 | #endif //_TARGET_AMD64_ |
9631 | dst += emitOutputLong(dst, dsp); |
9632 | } |
9633 | break; |
9634 | } |
9635 | |
9636 | case REG_EBP: |
9637 | { |
9638 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
9639 | { |
9640 | // Does the offset fit in a byte? |
9641 | if (dspInByte) |
9642 | { |
9643 | dst += emitOutputByte(dst, code | 0x45); |
9644 | dst += emitOutputByte(dst, dsp); |
9645 | } |
9646 | else |
9647 | { |
9648 | dst += emitOutputByte(dst, code | 0x85); |
9649 | dst += emitOutputLong(dst, dsp); |
9650 | |
9651 | if (id->idIsDspReloc()) |
9652 | { |
9653 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); |
9654 | } |
9655 | } |
9656 | } |
9657 | else |
9658 | { |
9659 | // Does the offset fit in a byte? |
9660 | if (dspInByte) |
9661 | { |
9662 | dst += emitOutputWord(dst, code | 0x4500); |
9663 | dst += emitOutputByte(dst, dsp); |
9664 | } |
9665 | else |
9666 | { |
9667 | dst += emitOutputWord(dst, code | 0x8500); |
9668 | dst += emitOutputLong(dst, dsp); |
9669 | |
9670 | if (id->idIsDspReloc()) |
9671 | { |
9672 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); |
9673 | } |
9674 | } |
9675 | } |
9676 | break; |
9677 | } |
9678 | |
9679 | case REG_ESP: |
9680 | { |
9681 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
9682 | { |
9683 | // Is the offset 0 or does it at least fit in a byte? |
9684 | if (dspIsZero) |
9685 | { |
9686 | dst += emitOutputByte(dst, code | 0x04); |
9687 | dst += emitOutputByte(dst, 0x24); |
9688 | } |
9689 | else if (dspInByte) |
9690 | { |
9691 | dst += emitOutputByte(dst, code | 0x44); |
9692 | dst += emitOutputByte(dst, 0x24); |
9693 | dst += emitOutputByte(dst, dsp); |
9694 | } |
9695 | else |
9696 | { |
9697 | dst += emitOutputByte(dst, code | 0x84); |
9698 | dst += emitOutputByte(dst, 0x24); |
9699 | dst += emitOutputLong(dst, dsp); |
9700 | if (id->idIsDspReloc()) |
9701 | { |
9702 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); |
9703 | } |
9704 | } |
9705 | } |
9706 | else |
9707 | { |
9708 | // Is the offset 0 or does it at least fit in a byte? |
9709 | if (dspIsZero) |
9710 | { |
9711 | dst += emitOutputWord(dst, code | 0x0400); |
9712 | dst += emitOutputByte(dst, 0x24); |
9713 | } |
9714 | else if (dspInByte) |
9715 | { |
9716 | dst += emitOutputWord(dst, code | 0x4400); |
9717 | dst += emitOutputByte(dst, 0x24); |
9718 | dst += emitOutputByte(dst, dsp); |
9719 | } |
9720 | else |
9721 | { |
9722 | dst += emitOutputWord(dst, code | 0x8400); |
9723 | dst += emitOutputByte(dst, 0x24); |
9724 | dst += emitOutputLong(dst, dsp); |
9725 | if (id->idIsDspReloc()) |
9726 | { |
9727 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); |
9728 | } |
9729 | } |
9730 | } |
9731 | break; |
9732 | } |
9733 | |
9734 | default: |
9735 | { |
9736 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
9737 | { |
9738 | // Put the register in the opcode |
9739 | code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr); |
9740 | |
9741 | // Is there a displacement? |
9742 | if (dspIsZero) |
9743 | { |
9744 | // This is simply "[reg]" |
9745 | dst += emitOutputByte(dst, code); |
9746 | } |
9747 | else |
9748 | { |
9749 | // This is [reg + dsp]" -- does the offset fit in a byte? |
9750 | if (dspInByte) |
9751 | { |
9752 | dst += emitOutputByte(dst, code | 0x40); |
9753 | dst += emitOutputByte(dst, dsp); |
9754 | } |
9755 | else |
9756 | { |
9757 | dst += emitOutputByte(dst, code | 0x80); |
9758 | dst += emitOutputLong(dst, dsp); |
9759 | if (id->idIsDspReloc()) |
9760 | { |
9761 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); |
9762 | } |
9763 | } |
9764 | } |
9765 | } |
9766 | else |
9767 | { |
9768 | // Put the register in the opcode |
9769 | code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) << 8; |
9770 | |
9771 | // Is there a displacement? |
9772 | if (dspIsZero) |
9773 | { |
9774 | // This is simply "[reg]" |
9775 | dst += emitOutputWord(dst, code); |
9776 | } |
9777 | else |
9778 | { |
9779 | // This is [reg + dsp]" -- does the offset fit in a byte? |
9780 | if (dspInByte) |
9781 | { |
9782 | dst += emitOutputWord(dst, code | 0x4000); |
9783 | dst += emitOutputByte(dst, dsp); |
9784 | } |
9785 | else |
9786 | { |
9787 | dst += emitOutputWord(dst, code | 0x8000); |
9788 | dst += emitOutputLong(dst, dsp); |
9789 | if (id->idIsDspReloc()) |
9790 | { |
9791 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); |
9792 | } |
9793 | } |
9794 | } |
9795 | } |
9796 | |
9797 | break; |
9798 | } |
9799 | } |
9800 | } |
9801 | else |
9802 | { |
9803 | unsigned regByte; |
9804 | |
9805 | // We have a scaled index operand |
9806 | unsigned mul = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale); |
9807 | |
9808 | // Is the index operand scaled? |
9809 | if (mul > 1) |
9810 | { |
9811 | // Is there a base register? |
9812 | if (reg != REG_NA) |
9813 | { |
9814 | // The address is "[reg + {2/4/8} * rgx + icon]" |
9815 | regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | |
9816 | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul); |
9817 | |
9818 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
9819 | { |
9820 | // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0] |
9821 | if (dspIsZero && reg != REG_EBP) |
9822 | { |
9823 | // The address is "[reg + {2/4/8} * rgx]" |
9824 | dst += emitOutputByte(dst, code | 0x04); |
9825 | dst += emitOutputByte(dst, regByte); |
9826 | } |
9827 | else |
9828 | { |
9829 | // The address is "[reg + {2/4/8} * rgx + disp]" |
9830 | if (dspInByte) |
9831 | { |
9832 | dst += emitOutputByte(dst, code | 0x44); |
9833 | dst += emitOutputByte(dst, regByte); |
9834 | dst += emitOutputByte(dst, dsp); |
9835 | } |
9836 | else |
9837 | { |
9838 | dst += emitOutputByte(dst, code | 0x84); |
9839 | dst += emitOutputByte(dst, regByte); |
9840 | dst += emitOutputLong(dst, dsp); |
9841 | if (id->idIsDspReloc()) |
9842 | { |
9843 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); |
9844 | } |
9845 | } |
9846 | } |
9847 | } |
9848 | else |
9849 | { |
9850 | // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0] |
9851 | if (dspIsZero && reg != REG_EBP) |
9852 | { |
9853 | // The address is "[reg + {2/4/8} * rgx]" |
9854 | dst += emitOutputWord(dst, code | 0x0400); |
9855 | dst += emitOutputByte(dst, regByte); |
9856 | } |
9857 | else |
9858 | { |
9859 | // The address is "[reg + {2/4/8} * rgx + disp]" |
9860 | if (dspInByte) |
9861 | { |
9862 | dst += emitOutputWord(dst, code | 0x4400); |
9863 | dst += emitOutputByte(dst, regByte); |
9864 | dst += emitOutputByte(dst, dsp); |
9865 | } |
9866 | else |
9867 | { |
9868 | dst += emitOutputWord(dst, code | 0x8400); |
9869 | dst += emitOutputByte(dst, regByte); |
9870 | dst += emitOutputLong(dst, dsp); |
9871 | if (id->idIsDspReloc()) |
9872 | { |
9873 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); |
9874 | } |
9875 | } |
9876 | } |
9877 | } |
9878 | } |
9879 | else |
9880 | { |
9881 | // The address is "[{2/4/8} * rgx + icon]" |
9882 | regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) | |
9883 | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul); |
9884 | |
9885 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
9886 | { |
9887 | dst += emitOutputByte(dst, code | 0x04); |
9888 | } |
9889 | else |
9890 | { |
9891 | dst += emitOutputWord(dst, code | 0x0400); |
9892 | } |
9893 | |
9894 | dst += emitOutputByte(dst, regByte); |
9895 | |
9896 | // Special case: jump through a jump table |
9897 | if (ins == INS_i_jmp) |
9898 | { |
9899 | dsp += (size_t)emitConsBlock; |
9900 | } |
9901 | |
9902 | dst += emitOutputLong(dst, dsp); |
9903 | if (id->idIsDspReloc()) |
9904 | { |
9905 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); |
9906 | } |
9907 | } |
9908 | } |
9909 | else |
9910 | { |
9911 | // The address is "[reg+rgx+dsp]" |
9912 | regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr); |
9913 | |
9914 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
9915 | { |
9916 | if (dspIsZero && reg != REG_EBP) |
9917 | { |
9918 | // This is [reg+rgx]" |
9919 | dst += emitOutputByte(dst, code | 0x04); |
9920 | dst += emitOutputByte(dst, regByte); |
9921 | } |
9922 | else |
9923 | { |
9924 | // This is [reg+rgx+dsp]" -- does the offset fit in a byte? |
9925 | if (dspInByte) |
9926 | { |
9927 | dst += emitOutputByte(dst, code | 0x44); |
9928 | dst += emitOutputByte(dst, regByte); |
9929 | dst += emitOutputByte(dst, dsp); |
9930 | } |
9931 | else |
9932 | { |
9933 | dst += emitOutputByte(dst, code | 0x84); |
9934 | dst += emitOutputByte(dst, regByte); |
9935 | dst += emitOutputLong(dst, dsp); |
9936 | if (id->idIsDspReloc()) |
9937 | { |
9938 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); |
9939 | } |
9940 | } |
9941 | } |
9942 | } |
9943 | else |
9944 | { |
9945 | if (dspIsZero && reg != REG_EBP) |
9946 | { |
9947 | // This is [reg+rgx]" |
9948 | dst += emitOutputWord(dst, code | 0x0400); |
9949 | dst += emitOutputByte(dst, regByte); |
9950 | } |
9951 | else |
9952 | { |
9953 | // This is [reg+rgx+dsp]" -- does the offset fit in a byte? |
9954 | if (dspInByte) |
9955 | { |
9956 | dst += emitOutputWord(dst, code | 0x4400); |
9957 | dst += emitOutputByte(dst, regByte); |
9958 | dst += emitOutputByte(dst, dsp); |
9959 | } |
9960 | else |
9961 | { |
9962 | dst += emitOutputWord(dst, code | 0x8400); |
9963 | dst += emitOutputByte(dst, regByte); |
9964 | dst += emitOutputLong(dst, dsp); |
9965 | if (id->idIsDspReloc()) |
9966 | { |
9967 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW); |
9968 | } |
9969 | } |
9970 | } |
9971 | } |
9972 | } |
9973 | } |
9974 | |
9975 | // Now generate the constant value, if present |
9976 | if (addc) |
9977 | { |
9978 | ssize_t cval = addc->cnsVal; |
9979 | |
9980 | #ifdef _TARGET_AMD64_ |
9981 | // all these opcodes only take a sign-extended 4-byte immediate |
9982 | noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); |
9983 | #endif |
9984 | |
9985 | switch (opsz) |
9986 | { |
9987 | case 0: |
9988 | case 4: |
9989 | case 8: |
9990 | dst += emitOutputLong(dst, cval); |
9991 | break; |
9992 | case 2: |
9993 | dst += emitOutputWord(dst, cval); |
9994 | break; |
9995 | case 1: |
9996 | dst += emitOutputByte(dst, cval); |
9997 | break; |
9998 | |
9999 | default: |
10000 | assert(!"unexpected operand size" ); |
10001 | } |
10002 | |
10003 | if (addc->cnsReloc) |
10004 | { |
10005 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW); |
10006 | assert(opsz == 4); |
10007 | } |
10008 | } |
10009 | |
10010 | DONE: |
10011 | |
10012 | // Does this instruction operate on a GC ref value? |
10013 | if (id->idGCref()) |
10014 | { |
10015 | switch (id->idInsFmt()) |
10016 | { |
10017 | case IF_ARD: |
10018 | case IF_AWR: |
10019 | case IF_ARW: |
10020 | break; |
10021 | |
10022 | case IF_RRD_ARD: |
10023 | break; |
10024 | |
10025 | case IF_RWR_ARD: |
10026 | emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); |
10027 | break; |
10028 | |
10029 | case IF_RRW_ARD: |
10030 | // Mark the destination register as holding a GCT_BYREF |
10031 | assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub)); |
10032 | emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); |
10033 | break; |
10034 | |
10035 | case IF_ARD_RRD: |
10036 | case IF_AWR_RRD: |
10037 | break; |
10038 | |
10039 | case IF_AWR_RRD_RRD: |
10040 | break; |
10041 | |
10042 | case IF_ARD_CNS: |
10043 | case IF_AWR_CNS: |
10044 | break; |
10045 | |
10046 | case IF_ARW_RRD: |
10047 | case IF_ARW_CNS: |
10048 | assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub)); |
10049 | break; |
10050 | |
10051 | default: |
10052 | #ifdef DEBUG |
10053 | emitDispIns(id, false, false, false); |
10054 | #endif |
10055 | assert(!"unexpected GC ref instruction format" ); |
10056 | } |
10057 | |
10058 | // mul can never produce a GC ref |
10059 | assert(!instrIs3opImul(ins)); |
10060 | assert(ins != INS_mulEAX && ins != INS_imulEAX); |
10061 | } |
10062 | else |
10063 | { |
10064 | if (!emitInsCanOnlyWriteSSE2OrAVXReg(id)) |
10065 | { |
10066 | switch (id->idInsFmt()) |
10067 | { |
10068 | case IF_RWR_ARD: |
10069 | case IF_RRW_ARD: |
10070 | case IF_RWR_RRD_ARD: |
10071 | emitGCregDeadUpd(id->idReg1(), dst); |
10072 | break; |
10073 | default: |
10074 | break; |
10075 | } |
10076 | |
10077 | if (ins == INS_mulEAX || ins == INS_imulEAX) |
10078 | { |
10079 | emitGCregDeadUpd(REG_EAX, dst); |
10080 | emitGCregDeadUpd(REG_EDX, dst); |
10081 | } |
10082 | |
10083 | // For the three operand imul instruction the target register |
10084 | // is encoded in the opcode |
10085 | |
10086 | if (instrIs3opImul(ins)) |
10087 | { |
10088 | regNumber tgtReg = inst3opImulReg(ins); |
10089 | emitGCregDeadUpd(tgtReg, dst); |
10090 | } |
10091 | } |
10092 | } |
10093 | |
10094 | return dst; |
10095 | } |
10096 | |
10097 | /***************************************************************************** |
10098 | * |
10099 | * Output an instruction involving a stack frame value. |
10100 | */ |
10101 | |
10102 | BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) |
10103 | { |
10104 | int adr; |
10105 | int dsp; |
10106 | bool EBPbased; |
10107 | bool dspInByte; |
10108 | bool dspIsZero; |
10109 | |
10110 | instruction ins = id->idIns(); |
10111 | emitAttr size = id->idOpSize(); |
10112 | size_t opsz = EA_SIZE_IN_BYTES(size); |
10113 | |
10114 | assert(ins != INS_imul || id->idReg1() == REG_EAX || size == EA_4BYTE || size == EA_8BYTE); |
10115 | |
10116 | // Is there a large constant operand? |
10117 | if (addc && (size > EA_1BYTE)) |
10118 | { |
10119 | ssize_t cval = addc->cnsVal; |
10120 | |
10121 | // Does the constant fit in a byte? |
10122 | if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test) |
10123 | { |
10124 | if ((id->idInsFmt() != IF_SRW_SHF) && (id->idInsFmt() != IF_RRW_SRD_CNS) && |
10125 | (id->idInsFmt() != IF_RWR_RRD_SRD_CNS)) |
10126 | { |
10127 | code |= 2; |
10128 | } |
10129 | |
10130 | opsz = 1; |
10131 | } |
10132 | } |
10133 | |
10134 | // Add VEX prefix if required. |
10135 | // There are some callers who already add VEX prefix and call this routine. |
10136 | // Therefore, add VEX prefix is one is not already present. |
10137 | code = AddVexPrefixIfNeededAndNotPresent(ins, code, size); |
10138 | |
10139 | // Compute the REX prefix |
10140 | if (TakesRexWPrefix(ins, size)) |
10141 | { |
10142 | code = AddRexWPrefix(ins, code); |
10143 | } |
10144 | |
10145 | // Special case emitting AVX instructions |
10146 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
10147 | { |
10148 | if ((ins == INS_crc32) && (size > EA_1BYTE)) |
10149 | { |
10150 | code |= 0x0100; |
10151 | |
10152 | if (size == EA_2BYTE) |
10153 | { |
10154 | dst += emitOutputByte(dst, 0x66); |
10155 | } |
10156 | } |
10157 | |
10158 | regNumber reg345 = REG_NA; |
10159 | if (IsBMIInstruction(ins)) |
10160 | { |
10161 | reg345 = getBmiRegNumber(ins); |
10162 | } |
10163 | if (reg345 == REG_NA) |
10164 | { |
10165 | reg345 = id->idReg1(); |
10166 | } |
10167 | else |
10168 | { |
10169 | code = insEncodeReg3456(ins, id->idReg1(), size, code); |
10170 | } |
10171 | unsigned regcode = insEncodeReg345(ins, reg345, size, &code); |
10172 | |
10173 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
10174 | |
10175 | if (UseVEXEncoding() && (ins != INS_crc32)) |
10176 | { |
10177 | // Emit last opcode byte |
10178 | // TODO-XArch-CQ: Right now support 4-byte opcode instructions only |
10179 | assert((code & 0xFF) == 0); |
10180 | dst += emitOutputByte(dst, (code >> 8) & 0xFF); |
10181 | } |
10182 | else |
10183 | { |
10184 | dst += emitOutputWord(dst, code >> 16); |
10185 | dst += emitOutputWord(dst, code & 0xFFFF); |
10186 | } |
10187 | |
10188 | code = regcode; |
10189 | } |
10190 | // Is this a 'big' opcode? |
10191 | else if (code & 0xFF000000) |
10192 | { |
10193 | // Output the REX prefix |
10194 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
10195 | |
10196 | // Output the highest word of the opcode |
10197 | // We need to check again because in case of AVX instructions the leading |
10198 | // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix. |
10199 | if (code & 0xFF000000) |
10200 | { |
10201 | dst += emitOutputWord(dst, code >> 16); |
10202 | code &= 0x0000FFFF; |
10203 | } |
10204 | } |
10205 | else if (code & 0x00FF0000) |
10206 | { |
10207 | // BT supports 16 bit operands and this code doesn't add the necessary 66 prefix. |
10208 | assert(ins != INS_bt); |
10209 | |
10210 | // Output the REX prefix |
10211 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
10212 | |
10213 | // Output the highest byte of the opcode. |
10214 | // We need to check again because in case of AVX instructions the leading |
10215 | // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix. |
10216 | if (code & 0x00FF0000) |
10217 | { |
10218 | dst += emitOutputByte(dst, code >> 16); |
10219 | code &= 0x0000FFFF; |
10220 | } |
10221 | |
10222 | // Use the large version if this is not a byte |
10223 | if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSEInstruction(ins) && |
10224 | !IsAVXInstruction(ins)) |
10225 | { |
10226 | code |= 0x1; |
10227 | } |
10228 | } |
10229 | else if (CodeGen::instIsFP(ins)) |
10230 | { |
10231 | assert(size == EA_4BYTE || size == EA_8BYTE); |
10232 | |
10233 | if (size == EA_8BYTE) |
10234 | { |
10235 | code += 4; |
10236 | } |
10237 | } |
10238 | else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins)) |
10239 | { |
10240 | // Is the operand size larger than a byte? |
10241 | switch (size) |
10242 | { |
10243 | case EA_1BYTE: |
10244 | break; |
10245 | |
10246 | case EA_2BYTE: |
10247 | // Output a size prefix for a 16-bit operand |
10248 | dst += emitOutputByte(dst, 0x66); |
10249 | __fallthrough; |
10250 | |
10251 | case EA_4BYTE: |
10252 | #ifdef _TARGET_AMD64_ |
10253 | case EA_8BYTE: |
10254 | #endif // _TARGET_AMD64_ |
10255 | |
10256 | /* Set the 'w' size bit to indicate 32-bit operation |
10257 | * Note that incrementing "code" for INS_call (0xFF) would |
10258 | * overflow, whereas setting the lower bit to 1 just works out |
10259 | */ |
10260 | |
10261 | code |= 0x01; |
10262 | break; |
10263 | |
10264 | #ifdef _TARGET_X86_ |
10265 | case EA_8BYTE: |
10266 | |
10267 | // Double operand - set the appropriate bit. |
10268 | // I don't know what a legitimate reason to end up in this case would be |
10269 | // considering that FP is taken care of above... |
10270 | // what is an instruction that takes a double which is not covered by the |
10271 | // above instIsFP? Of the list in instrsxarch, only INS_fprem |
10272 | code |= 0x04; |
10273 | NO_WAY("bad 8 byte op" ); |
10274 | break; |
10275 | #endif // _TARGET_X86_ |
10276 | |
10277 | default: |
10278 | NO_WAY("unexpected size" ); |
10279 | break; |
10280 | } |
10281 | } |
10282 | |
10283 | // Output the REX prefix |
10284 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
10285 | |
10286 | // Figure out the variable's frame position |
10287 | int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); |
10288 | |
10289 | adr = emitComp->lvaFrameAddress(varNum, &EBPbased); |
10290 | dsp = adr + id->idAddr()->iiaLclVar.lvaOffset(); |
10291 | |
10292 | dspInByte = ((signed char)dsp == (int)dsp); |
10293 | dspIsZero = (dsp == 0); |
10294 | |
10295 | // for stack varaibles the dsp should never be a reloc |
10296 | assert(id->idIsDspReloc() == 0); |
10297 | |
10298 | if (EBPbased) |
10299 | { |
10300 | // EBP-based variable: does the offset fit in a byte? |
10301 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
10302 | { |
10303 | if (dspInByte) |
10304 | { |
10305 | dst += emitOutputByte(dst, code | 0x45); |
10306 | dst += emitOutputByte(dst, dsp); |
10307 | } |
10308 | else |
10309 | { |
10310 | dst += emitOutputByte(dst, code | 0x85); |
10311 | dst += emitOutputLong(dst, dsp); |
10312 | } |
10313 | } |
10314 | else |
10315 | { |
10316 | if (dspInByte) |
10317 | { |
10318 | dst += emitOutputWord(dst, code | 0x4500); |
10319 | dst += emitOutputByte(dst, dsp); |
10320 | } |
10321 | else |
10322 | { |
10323 | dst += emitOutputWord(dst, code | 0x8500); |
10324 | dst += emitOutputLong(dst, dsp); |
10325 | } |
10326 | } |
10327 | } |
10328 | else |
10329 | { |
10330 | |
10331 | #if !FEATURE_FIXED_OUT_ARGS |
10332 | // Adjust the offset by the amount currently pushed on the CPU stack |
10333 | dsp += emitCurStackLvl; |
10334 | #endif |
10335 | |
10336 | dspInByte = ((signed char)dsp == (int)dsp); |
10337 | dspIsZero = (dsp == 0); |
10338 | |
10339 | // Does the offset fit in a byte? |
10340 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
10341 | { |
10342 | if (dspInByte) |
10343 | { |
10344 | if (dspIsZero) |
10345 | { |
10346 | dst += emitOutputByte(dst, code | 0x04); |
10347 | dst += emitOutputByte(dst, 0x24); |
10348 | } |
10349 | else |
10350 | { |
10351 | dst += emitOutputByte(dst, code | 0x44); |
10352 | dst += emitOutputByte(dst, 0x24); |
10353 | dst += emitOutputByte(dst, dsp); |
10354 | } |
10355 | } |
10356 | else |
10357 | { |
10358 | dst += emitOutputByte(dst, code | 0x84); |
10359 | dst += emitOutputByte(dst, 0x24); |
10360 | dst += emitOutputLong(dst, dsp); |
10361 | } |
10362 | } |
10363 | else |
10364 | { |
10365 | if (dspInByte) |
10366 | { |
10367 | if (dspIsZero) |
10368 | { |
10369 | dst += emitOutputWord(dst, code | 0x0400); |
10370 | dst += emitOutputByte(dst, 0x24); |
10371 | } |
10372 | else |
10373 | { |
10374 | dst += emitOutputWord(dst, code | 0x4400); |
10375 | dst += emitOutputByte(dst, 0x24); |
10376 | dst += emitOutputByte(dst, dsp); |
10377 | } |
10378 | } |
10379 | else |
10380 | { |
10381 | dst += emitOutputWord(dst, code | 0x8400); |
10382 | dst += emitOutputByte(dst, 0x24); |
10383 | dst += emitOutputLong(dst, dsp); |
10384 | } |
10385 | } |
10386 | } |
10387 | |
10388 | // Now generate the constant value, if present |
10389 | if (addc) |
10390 | { |
10391 | ssize_t cval = addc->cnsVal; |
10392 | |
10393 | #ifdef _TARGET_AMD64_ |
10394 | // all these opcodes only take a sign-extended 4-byte immediate |
10395 | noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); |
10396 | #endif |
10397 | |
10398 | switch (opsz) |
10399 | { |
10400 | case 0: |
10401 | case 4: |
10402 | case 8: |
10403 | dst += emitOutputLong(dst, cval); |
10404 | break; |
10405 | case 2: |
10406 | dst += emitOutputWord(dst, cval); |
10407 | break; |
10408 | case 1: |
10409 | dst += emitOutputByte(dst, cval); |
10410 | break; |
10411 | |
10412 | default: |
10413 | assert(!"unexpected operand size" ); |
10414 | } |
10415 | |
10416 | if (addc->cnsReloc) |
10417 | { |
10418 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW); |
10419 | assert(opsz == 4); |
10420 | } |
10421 | } |
10422 | |
10423 | // Does this instruction operate on a GC ref value? |
10424 | if (id->idGCref()) |
10425 | { |
10426 | // Factor in the sub-variable offset |
10427 | adr += AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE); |
10428 | |
10429 | switch (id->idInsFmt()) |
10430 | { |
10431 | case IF_SRD: |
10432 | // Read stack -- no change |
10433 | break; |
10434 | |
10435 | case IF_SWR: // Stack Write (So we need to update GC live for stack var) |
10436 | // Write stack -- GC var may be born |
10437 | emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst); |
10438 | break; |
10439 | |
10440 | case IF_SRD_CNS: |
10441 | // Read stack -- no change |
10442 | break; |
10443 | |
10444 | case IF_SWR_CNS: |
10445 | // Write stack -- no change |
10446 | break; |
10447 | |
10448 | case IF_SRD_RRD: |
10449 | case IF_RRD_SRD: |
10450 | // Read stack , read register -- no change |
10451 | break; |
10452 | |
10453 | case IF_RWR_SRD: // Register Write, Stack Read (So we need to update GC live for register) |
10454 | |
10455 | // Read stack , write register -- GC reg may be born |
10456 | emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); |
10457 | break; |
10458 | |
10459 | case IF_SWR_RRD: // Stack Write, Register Read (So we need to update GC live for stack var) |
10460 | // Read register, write stack -- GC var may be born |
10461 | emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst); |
10462 | break; |
10463 | |
10464 | case IF_RRW_SRD: // Register Read/Write, Stack Read (So we need to update GC live for register) |
10465 | |
10466 | // reg could have been a GCREF as GCREF + int=BYREF |
10467 | // or BYREF+/-int=BYREF |
10468 | assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub)); |
10469 | emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); |
10470 | break; |
10471 | |
10472 | case IF_SRW_CNS: |
10473 | case IF_SRW_RRD: |
10474 | // += -= of a byref, no change |
10475 | |
10476 | case IF_SRW: |
10477 | break; |
10478 | |
10479 | default: |
10480 | #ifdef DEBUG |
10481 | emitDispIns(id, false, false, false); |
10482 | #endif |
10483 | assert(!"unexpected GC ref instruction format" ); |
10484 | } |
10485 | } |
10486 | else |
10487 | { |
10488 | if (!emitInsCanOnlyWriteSSE2OrAVXReg(id)) |
10489 | { |
10490 | switch (id->idInsFmt()) |
10491 | { |
10492 | case IF_RWR_SRD: // Register Write, Stack Read |
10493 | case IF_RRW_SRD: // Register Read/Write, Stack Read |
10494 | case IF_RWR_RRD_SRD: |
10495 | emitGCregDeadUpd(id->idReg1(), dst); |
10496 | break; |
10497 | default: |
10498 | break; |
10499 | } |
10500 | |
10501 | if (ins == INS_mulEAX || ins == INS_imulEAX) |
10502 | { |
10503 | emitGCregDeadUpd(REG_EAX, dst); |
10504 | emitGCregDeadUpd(REG_EDX, dst); |
10505 | } |
10506 | |
10507 | // For the three operand imul instruction the target register |
10508 | // is encoded in the opcode |
10509 | |
10510 | if (instrIs3opImul(ins)) |
10511 | { |
10512 | regNumber tgtReg = inst3opImulReg(ins); |
10513 | emitGCregDeadUpd(tgtReg, dst); |
10514 | } |
10515 | } |
10516 | } |
10517 | |
10518 | return dst; |
10519 | } |
10520 | |
10521 | /***************************************************************************** |
10522 | * |
10523 | * Output an instruction with a static data member (class variable). |
10524 | */ |
10525 | |
10526 | BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) |
10527 | { |
10528 | BYTE* addr; |
10529 | CORINFO_FIELD_HANDLE fldh; |
10530 | ssize_t offs; |
10531 | int doff; |
10532 | |
10533 | emitAttr size = id->idOpSize(); |
10534 | size_t opsz = EA_SIZE_IN_BYTES(size); |
10535 | instruction ins = id->idIns(); |
10536 | bool isMoffset = false; |
10537 | |
10538 | // Get hold of the field handle and offset |
10539 | fldh = id->idAddr()->iiaFieldHnd; |
10540 | offs = emitGetInsDsp(id); |
10541 | |
10542 | // Special case: mov reg, fs:[ddd] |
10543 | if (fldh == FLD_GLOBAL_FS) |
10544 | { |
10545 | dst += emitOutputByte(dst, 0x64); |
10546 | } |
10547 | |
10548 | // Compute VEX prefix |
10549 | // Some of its callers already add VEX prefix and then call this routine. |
10550 | // Therefore add VEX prefix is not already present. |
10551 | code = AddVexPrefixIfNeededAndNotPresent(ins, code, size); |
10552 | |
10553 | // Compute the REX prefix |
10554 | if (TakesRexWPrefix(ins, size)) |
10555 | { |
10556 | code = AddRexWPrefix(ins, code); |
10557 | } |
10558 | |
10559 | // Is there a large constant operand? |
10560 | if (addc && (size > EA_1BYTE)) |
10561 | { |
10562 | ssize_t cval = addc->cnsVal; |
10563 | // Does the constant fit in a byte? |
10564 | if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test) |
10565 | { |
10566 | if (id->idInsFmt() != IF_MRW_SHF) |
10567 | { |
10568 | code |= 2; |
10569 | } |
10570 | |
10571 | opsz = 1; |
10572 | } |
10573 | } |
10574 | #ifdef _TARGET_X86_ |
10575 | else |
10576 | { |
10577 | // Special case: "mov eax, [addr]" and "mov [addr], eax" |
10578 | // Amd64: this is one case where addr can be 64-bit in size. This is |
10579 | // currently unused or not enabled on amd64 as it always uses RIP |
10580 | // relative addressing which results in smaller instruction size. |
10581 | if (ins == INS_mov && id->idReg1() == REG_EAX) |
10582 | { |
10583 | switch (id->idInsFmt()) |
10584 | { |
10585 | case IF_RWR_MRD: |
10586 | |
10587 | assert(code == (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); |
10588 | |
10589 | code &= ~((code_t)0xFFFFFFFF); |
10590 | code |= 0xA0; |
10591 | isMoffset = true; |
10592 | break; |
10593 | |
10594 | case IF_MWR_RRD: |
10595 | |
10596 | assert(code == (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); |
10597 | |
10598 | code &= ~((code_t)0xFFFFFFFF); |
10599 | code |= 0xA2; |
10600 | isMoffset = true; |
10601 | break; |
10602 | |
10603 | default: |
10604 | break; |
10605 | } |
10606 | } |
10607 | } |
10608 | #endif //_TARGET_X86_ |
10609 | |
10610 | // Special case emitting AVX instructions |
10611 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
10612 | { |
10613 | if ((ins == INS_crc32) && (size > EA_1BYTE)) |
10614 | { |
10615 | code |= 0x0100; |
10616 | |
10617 | if (size == EA_2BYTE) |
10618 | { |
10619 | dst += emitOutputByte(dst, 0x66); |
10620 | } |
10621 | } |
10622 | |
10623 | regNumber reg345 = REG_NA; |
10624 | if (IsBMIInstruction(ins)) |
10625 | { |
10626 | reg345 = getBmiRegNumber(ins); |
10627 | } |
10628 | if (reg345 == REG_NA) |
10629 | { |
10630 | reg345 = id->idReg1(); |
10631 | } |
10632 | else |
10633 | { |
10634 | code = insEncodeReg3456(ins, id->idReg1(), size, code); |
10635 | } |
10636 | unsigned regcode = insEncodeReg345(ins, reg345, size, &code); |
10637 | |
10638 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
10639 | |
10640 | if (UseVEXEncoding() && (ins != INS_crc32)) |
10641 | { |
10642 | // Emit last opcode byte |
10643 | // TODO-XArch-CQ: Right now support 4-byte opcode instructions only |
10644 | assert((code & 0xFF) == 0); |
10645 | dst += emitOutputByte(dst, (code >> 8) & 0xFF); |
10646 | } |
10647 | else |
10648 | { |
10649 | dst += emitOutputWord(dst, code >> 16); |
10650 | dst += emitOutputWord(dst, code & 0xFFFF); |
10651 | } |
10652 | |
10653 | // Emit Mod,R/M byte |
10654 | dst += emitOutputByte(dst, regcode | 0x05); |
10655 | code = 0; |
10656 | } |
10657 | // Is this a 'big' opcode? |
10658 | else if (code & 0xFF000000) |
10659 | { |
10660 | // Output the REX prefix |
10661 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
10662 | |
10663 | // Output the highest word of the opcode. |
10664 | // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix. |
10665 | if (code & 0xFF000000) |
10666 | { |
10667 | dst += emitOutputWord(dst, code >> 16); |
10668 | } |
10669 | code &= 0x0000FFFF; |
10670 | } |
10671 | else if (code & 0x00FF0000) |
10672 | { |
10673 | // Output the REX prefix |
10674 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
10675 | |
10676 | // Check again as VEX prefix would have encoded leading opcode byte |
10677 | if (code & 0x00FF0000) |
10678 | { |
10679 | dst += emitOutputByte(dst, code >> 16); |
10680 | code &= 0x0000FFFF; |
10681 | } |
10682 | |
10683 | if ((ins == INS_movsx || ins == INS_movzx || ins == INS_cmpxchg || ins == INS_xchg || ins == INS_xadd || |
10684 | insIsCMOV(ins)) && |
10685 | size != EA_1BYTE) |
10686 | { |
10687 | // movsx and movzx are 'big' opcodes but also have the 'w' bit |
10688 | code++; |
10689 | } |
10690 | } |
10691 | else if (CodeGen::instIsFP(ins)) |
10692 | { |
10693 | assert(size == EA_4BYTE || size == EA_8BYTE); |
10694 | |
10695 | if (size == EA_8BYTE) |
10696 | { |
10697 | code += 4; |
10698 | } |
10699 | } |
10700 | else |
10701 | { |
10702 | // Is the operand size larger than a byte? |
10703 | switch (size) |
10704 | { |
10705 | case EA_1BYTE: |
10706 | break; |
10707 | |
10708 | case EA_2BYTE: |
10709 | // Output a size prefix for a 16-bit operand |
10710 | dst += emitOutputByte(dst, 0x66); |
10711 | __fallthrough; |
10712 | |
10713 | case EA_4BYTE: |
10714 | #ifdef _TARGET_AMD64_ |
10715 | case EA_8BYTE: |
10716 | #endif |
10717 | // Set the 'w' bit to get the large version |
10718 | code |= 0x1; |
10719 | break; |
10720 | |
10721 | #ifdef _TARGET_X86_ |
10722 | case EA_8BYTE: |
10723 | // Double operand - set the appropriate bit |
10724 | code |= 0x04; |
10725 | break; |
10726 | #endif // _TARGET_X86_ |
10727 | |
10728 | default: |
10729 | assert(!"unexpected size" ); |
10730 | } |
10731 | } |
10732 | |
10733 | // Output the REX prefix |
10734 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
10735 | |
10736 | if (code) |
10737 | { |
10738 | if (id->idInsFmt() == IF_MRD_OFF || id->idInsFmt() == IF_RWR_MRD_OFF || isMoffset) |
10739 | { |
10740 | dst += emitOutputByte(dst, code); |
10741 | } |
10742 | else |
10743 | { |
10744 | dst += emitOutputWord(dst, code); |
10745 | } |
10746 | } |
10747 | |
10748 | // Do we have a constant or a static data member? |
10749 | doff = Compiler::eeGetJitDataOffs(fldh); |
10750 | if (doff >= 0) |
10751 | { |
10752 | addr = emitConsBlock + doff; |
10753 | |
10754 | int byteSize = EA_SIZE_IN_BYTES(size); |
10755 | |
10756 | // this instruction has a fixed size (4) src. |
10757 | if (ins == INS_cvttss2si || ins == INS_cvtss2sd || ins == INS_vbroadcastss) |
10758 | { |
10759 | byteSize = 4; |
10760 | } |
10761 | // This has a fixed size (8) source. |
10762 | if (ins == INS_vbroadcastsd) |
10763 | { |
10764 | byteSize = 8; |
10765 | } |
10766 | |
10767 | // Check that the offset is properly aligned (i.e. the ddd in [ddd]) |
10768 | assert((emitChkAlign == false) || (ins == INS_lea) || (((size_t)addr & (byteSize - 1)) == 0)); |
10769 | } |
10770 | else |
10771 | { |
10772 | // Special case: mov reg, fs:[ddd] or mov reg, [ddd] |
10773 | if (jitStaticFldIsGlobAddr(fldh)) |
10774 | { |
10775 | addr = nullptr; |
10776 | } |
10777 | else |
10778 | { |
10779 | addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr); |
10780 | if (addr == nullptr) |
10781 | { |
10782 | NO_WAY("could not obtain address of static field" ); |
10783 | } |
10784 | } |
10785 | } |
10786 | |
10787 | BYTE* target = (addr + offs); |
10788 | |
10789 | if (!isMoffset) |
10790 | { |
10791 | INT32 addlDelta = 0; |
10792 | |
10793 | if (addc) |
10794 | { |
10795 | // It is of the form "ins [disp], immed" |
10796 | // For emitting relocation, we also need to take into account of the |
10797 | // additional bytes of code emitted for immed val. |
10798 | |
10799 | ssize_t cval = addc->cnsVal; |
10800 | |
10801 | #ifdef _TARGET_AMD64_ |
10802 | // all these opcodes only take a sign-extended 4-byte immediate |
10803 | noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); |
10804 | #else //_TARGET_X86_ |
10805 | noway_assert(opsz <= 4); |
10806 | #endif //_TARGET_X86_ |
10807 | |
10808 | switch (opsz) |
10809 | { |
10810 | case 0: |
10811 | case 4: |
10812 | case 8: |
10813 | addlDelta = -4; |
10814 | break; |
10815 | case 2: |
10816 | addlDelta = -2; |
10817 | break; |
10818 | case 1: |
10819 | addlDelta = -1; |
10820 | break; |
10821 | |
10822 | default: |
10823 | assert(!"unexpected operand size" ); |
10824 | unreached(); |
10825 | } |
10826 | } |
10827 | |
10828 | #ifdef _TARGET_AMD64_ |
10829 | // All static field and data section constant accesses should be marked as relocatable |
10830 | noway_assert(id->idIsDspReloc()); |
10831 | dst += emitOutputLong(dst, 0); |
10832 | #else //_TARGET_X86_ |
10833 | dst += emitOutputLong(dst, (int)target); |
10834 | #endif //_TARGET_X86_ |
10835 | |
10836 | if (id->idIsDspReloc()) |
10837 | { |
10838 | emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta); |
10839 | } |
10840 | } |
10841 | else |
10842 | { |
10843 | #ifdef _TARGET_AMD64_ |
10844 | // This code path should never be hit on amd64 since it always uses RIP relative addressing. |
10845 | // In future if ever there is a need to enable this special case, also enable the logic |
10846 | // that sets isMoffset to true on amd64. |
10847 | unreached(); |
10848 | #else //_TARGET_X86_ |
10849 | |
10850 | dst += emitOutputSizeT(dst, (ssize_t)target); |
10851 | |
10852 | if (id->idIsDspReloc()) |
10853 | { |
10854 | emitRecordRelocation((void*)(dst - TARGET_POINTER_SIZE), target, IMAGE_REL_BASED_MOFFSET); |
10855 | } |
10856 | |
10857 | #endif //_TARGET_X86_ |
10858 | } |
10859 | |
10860 | // Now generate the constant value, if present |
10861 | if (addc) |
10862 | { |
10863 | ssize_t cval = addc->cnsVal; |
10864 | |
10865 | #ifdef _TARGET_AMD64_ |
10866 | // all these opcodes only take a sign-extended 4-byte immediate |
10867 | noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc)); |
10868 | #endif |
10869 | |
10870 | switch (opsz) |
10871 | { |
10872 | case 0: |
10873 | case 4: |
10874 | case 8: |
10875 | dst += emitOutputLong(dst, cval); |
10876 | break; |
10877 | case 2: |
10878 | dst += emitOutputWord(dst, cval); |
10879 | break; |
10880 | case 1: |
10881 | dst += emitOutputByte(dst, cval); |
10882 | break; |
10883 | |
10884 | default: |
10885 | assert(!"unexpected operand size" ); |
10886 | } |
10887 | if (addc->cnsReloc) |
10888 | { |
10889 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW); |
10890 | assert(opsz == 4); |
10891 | } |
10892 | } |
10893 | |
10894 | // Does this instruction operate on a GC ref value? |
10895 | if (id->idGCref()) |
10896 | { |
10897 | switch (id->idInsFmt()) |
10898 | { |
10899 | case IF_MRD: |
10900 | case IF_MRW: |
10901 | case IF_MWR: |
10902 | break; |
10903 | |
10904 | case IF_RRD_MRD: |
10905 | break; |
10906 | |
10907 | case IF_RWR_MRD: |
10908 | emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); |
10909 | break; |
10910 | |
10911 | case IF_MRD_RRD: |
10912 | case IF_MWR_RRD: |
10913 | case IF_MRW_RRD: |
10914 | break; |
10915 | |
10916 | case IF_MRD_CNS: |
10917 | case IF_MWR_CNS: |
10918 | case IF_MRW_CNS: |
10919 | break; |
10920 | |
10921 | case IF_RRW_MRD: |
10922 | |
10923 | assert(id->idGCref() == GCT_BYREF); |
10924 | assert(ins == INS_add || ins == INS_sub); |
10925 | |
10926 | // Mark it as holding a GCT_BYREF |
10927 | emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); |
10928 | break; |
10929 | |
10930 | default: |
10931 | #ifdef DEBUG |
10932 | emitDispIns(id, false, false, false); |
10933 | #endif |
10934 | assert(!"unexpected GC ref instruction format" ); |
10935 | } |
10936 | } |
10937 | else |
10938 | { |
10939 | if (!emitInsCanOnlyWriteSSE2OrAVXReg(id)) |
10940 | { |
10941 | switch (id->idInsFmt()) |
10942 | { |
10943 | case IF_RWR_MRD: |
10944 | case IF_RRW_MRD: |
10945 | case IF_RWR_RRD_MRD: |
10946 | emitGCregDeadUpd(id->idReg1(), dst); |
10947 | break; |
10948 | default: |
10949 | break; |
10950 | } |
10951 | |
10952 | if (ins == INS_mulEAX || ins == INS_imulEAX) |
10953 | { |
10954 | emitGCregDeadUpd(REG_EAX, dst); |
10955 | emitGCregDeadUpd(REG_EDX, dst); |
10956 | } |
10957 | |
10958 | // For the three operand imul instruction the target register |
10959 | // is encoded in the opcode |
10960 | |
10961 | if (instrIs3opImul(ins)) |
10962 | { |
10963 | regNumber tgtReg = inst3opImulReg(ins); |
10964 | emitGCregDeadUpd(tgtReg, dst); |
10965 | } |
10966 | } |
10967 | } |
10968 | |
10969 | return dst; |
10970 | } |
10971 | |
10972 | /***************************************************************************** |
10973 | * |
10974 | * Output an instruction with one register operand. |
10975 | */ |
10976 | |
10977 | BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) |
10978 | { |
10979 | code_t code; |
10980 | |
10981 | instruction ins = id->idIns(); |
10982 | regNumber reg = id->idReg1(); |
10983 | emitAttr size = id->idOpSize(); |
10984 | |
10985 | // We would to update GC info correctly |
10986 | assert(!IsSSEInstruction(ins)); |
10987 | assert(!IsAVXInstruction(ins)); |
10988 | |
10989 | // Get the 'base' opcode |
10990 | switch (ins) |
10991 | { |
10992 | case INS_inc: |
10993 | case INS_dec: |
10994 | |
10995 | #ifdef _TARGET_AMD64_ |
10996 | if (true) |
10997 | #else |
10998 | if (size == EA_1BYTE) |
10999 | #endif |
11000 | { |
11001 | assert(INS_inc_l == INS_inc + 1); |
11002 | assert(INS_dec_l == INS_dec + 1); |
11003 | |
11004 | // Can't use the compact form, use the long form |
11005 | ins = (instruction)(ins + 1); |
11006 | if (size == EA_2BYTE) |
11007 | { |
11008 | // Output a size prefix for a 16-bit operand |
11009 | dst += emitOutputByte(dst, 0x66); |
11010 | } |
11011 | |
11012 | code = insCodeRR(ins); |
11013 | if (size != EA_1BYTE) |
11014 | { |
11015 | // Set the 'w' bit to get the large version |
11016 | code |= 0x1; |
11017 | } |
11018 | |
11019 | if (TakesRexWPrefix(ins, size)) |
11020 | { |
11021 | code = AddRexWPrefix(ins, code); |
11022 | } |
11023 | |
11024 | // Register... |
11025 | unsigned regcode = insEncodeReg012(ins, reg, size, &code); |
11026 | |
11027 | // Output the REX prefix |
11028 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
11029 | |
11030 | dst += emitOutputWord(dst, code | (regcode << 8)); |
11031 | } |
11032 | else |
11033 | { |
11034 | if (size == EA_2BYTE) |
11035 | { |
11036 | // Output a size prefix for a 16-bit operand |
11037 | dst += emitOutputByte(dst, 0x66); |
11038 | } |
11039 | dst += emitOutputByte(dst, insCodeRR(ins) | insEncodeReg012(ins, reg, size, nullptr)); |
11040 | } |
11041 | break; |
11042 | |
11043 | case INS_pop: |
11044 | case INS_pop_hide: |
11045 | case INS_push: |
11046 | case INS_push_hide: |
11047 | |
11048 | assert(size == EA_PTRSIZE); |
11049 | code = insEncodeOpreg(ins, reg, size); |
11050 | |
11051 | assert(!TakesVexPrefix(ins)); |
11052 | assert(!TakesRexWPrefix(ins, size)); |
11053 | |
11054 | // Output the REX prefix |
11055 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
11056 | |
11057 | dst += emitOutputByte(dst, code); |
11058 | break; |
11059 | |
11060 | case INS_bswap: |
11061 | { |
11062 | assert(size >= EA_4BYTE && size <= EA_PTRSIZE); // 16-bit BSWAP is undefined |
11063 | |
11064 | // The Intel instruction set reference for BSWAP states that extended registers |
11065 | // should be enabled via REX.R, but per Vol. 2A, Sec. 2.2.1.2 (see also Figure 2-7), |
11066 | // REX.B should instead be used if the register is encoded in the opcode byte itself. |
11067 | // Therefore the default logic of insEncodeReg012 is correct for this case. |
11068 | |
11069 | code = insCodeRR(ins); |
11070 | |
11071 | if (TakesRexWPrefix(ins, size)) |
11072 | { |
11073 | code = AddRexWPrefix(ins, code); |
11074 | } |
11075 | |
11076 | // Register... |
11077 | unsigned regcode = insEncodeReg012(ins, reg, size, &code); |
11078 | |
11079 | // Output the REX prefix |
11080 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
11081 | |
11082 | dst += emitOutputWord(dst, code | (regcode << 8)); |
11083 | break; |
11084 | } |
11085 | |
11086 | case INS_seto: |
11087 | case INS_setno: |
11088 | case INS_setb: |
11089 | case INS_setae: |
11090 | case INS_sete: |
11091 | case INS_setne: |
11092 | case INS_setbe: |
11093 | case INS_seta: |
11094 | case INS_sets: |
11095 | case INS_setns: |
11096 | case INS_setpe: |
11097 | case INS_setpo: |
11098 | case INS_setl: |
11099 | case INS_setge: |
11100 | case INS_setle: |
11101 | case INS_setg: |
11102 | |
11103 | assert(id->idGCref() == GCT_NONE); |
11104 | assert(size == EA_1BYTE); |
11105 | |
11106 | code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins)); |
11107 | |
11108 | // Output the REX prefix |
11109 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
11110 | |
11111 | // We expect this to always be a 'big' opcode |
11112 | assert(code & 0x00FF0000); |
11113 | |
11114 | dst += emitOutputByte(dst, code >> 16); |
11115 | dst += emitOutputWord(dst, code & 0x0000FFFF); |
11116 | |
11117 | break; |
11118 | |
11119 | case INS_mulEAX: |
11120 | case INS_imulEAX: |
11121 | |
11122 | // Kill off any GC refs in EAX or EDX |
11123 | emitGCregDeadUpd(REG_EAX, dst); |
11124 | emitGCregDeadUpd(REG_EDX, dst); |
11125 | |
11126 | __fallthrough; |
11127 | |
11128 | default: |
11129 | |
11130 | assert(id->idGCref() == GCT_NONE); |
11131 | |
11132 | code = insEncodeMRreg(ins, reg, size, insCodeMR(ins)); |
11133 | |
11134 | if (size != EA_1BYTE) |
11135 | { |
11136 | // Set the 'w' bit to get the large version |
11137 | code |= 0x1; |
11138 | |
11139 | if (size == EA_2BYTE) |
11140 | { |
11141 | // Output a size prefix for a 16-bit operand |
11142 | dst += emitOutputByte(dst, 0x66); |
11143 | } |
11144 | } |
11145 | |
11146 | code = AddVexPrefixIfNeeded(ins, code, size); |
11147 | |
11148 | if (TakesRexWPrefix(ins, size)) |
11149 | { |
11150 | code = AddRexWPrefix(ins, code); |
11151 | } |
11152 | |
11153 | // Output the REX prefix |
11154 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
11155 | |
11156 | dst += emitOutputWord(dst, code); |
11157 | break; |
11158 | } |
11159 | |
11160 | // Are we writing the register? if so then update the GC information |
11161 | switch (id->idInsFmt()) |
11162 | { |
11163 | case IF_RRD: |
11164 | break; |
11165 | case IF_RWR: |
11166 | if (id->idGCref()) |
11167 | { |
11168 | emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); |
11169 | } |
11170 | else |
11171 | { |
11172 | emitGCregDeadUpd(id->idReg1(), dst); |
11173 | } |
11174 | break; |
11175 | case IF_RRW: |
11176 | { |
11177 | #ifdef DEBUG |
11178 | regMaskTP regMask = genRegMask(reg); |
11179 | #endif |
11180 | if (id->idGCref()) |
11181 | { |
11182 | // The reg must currently be holding either a gcref or a byref |
11183 | // and the instruction must be inc or dec |
11184 | assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) && |
11185 | (ins == INS_inc || ins == INS_dec || ins == INS_inc_l || ins == INS_dec_l)); |
11186 | assert(id->idGCref() == GCT_BYREF); |
11187 | // Mark it as holding a GCT_BYREF |
11188 | emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); |
11189 | } |
11190 | else |
11191 | { |
11192 | // Can't use RRW to trash a GC ref. It's OK for unverifiable code |
11193 | // to trash Byrefs. |
11194 | assert((emitThisGCrefRegs & regMask) == 0); |
11195 | } |
11196 | } |
11197 | break; |
11198 | default: |
11199 | #ifdef DEBUG |
11200 | emitDispIns(id, false, false, false); |
11201 | #endif |
11202 | assert(!"unexpected instruction format" ); |
11203 | break; |
11204 | } |
11205 | |
11206 | return dst; |
11207 | } |
11208 | |
11209 | /***************************************************************************** |
11210 | * |
11211 | * Output an instruction with two register operands. |
11212 | */ |
11213 | |
11214 | BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) |
11215 | { |
11216 | code_t code; |
11217 | |
11218 | instruction ins = id->idIns(); |
11219 | regNumber reg1 = id->idReg1(); |
11220 | regNumber reg2 = id->idReg2(); |
11221 | emitAttr size = id->idOpSize(); |
11222 | |
11223 | // Get the 'base' opcode |
11224 | code = insCodeRM(ins); |
11225 | code = AddVexPrefixIfNeeded(ins, code, size); |
11226 | if (IsSSEOrAVXInstruction(ins)) |
11227 | { |
11228 | code = insEncodeRMreg(ins, code); |
11229 | |
11230 | if (TakesRexWPrefix(ins, size)) |
11231 | { |
11232 | code = AddRexWPrefix(ins, code); |
11233 | } |
11234 | } |
11235 | else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins))) |
11236 | { |
11237 | code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE); |
11238 | #ifdef _TARGET_AMD64_ |
11239 | |
11240 | assert((size < EA_4BYTE) || (insIsCMOV(ins))); |
11241 | if ((size == EA_8BYTE) || (ins == INS_movsx)) |
11242 | { |
11243 | code = AddRexWPrefix(ins, code); |
11244 | } |
11245 | } |
11246 | else if (ins == INS_movsxd) |
11247 | { |
11248 | code = insEncodeRMreg(ins, code); |
11249 | |
11250 | #endif // _TARGET_AMD64_ |
11251 | } |
11252 | #ifdef FEATURE_HW_INTRINSICS |
11253 | else if ((ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) || (ins == INS_tzcnt)) |
11254 | { |
11255 | code = insEncodeRMreg(ins, code); |
11256 | if ((ins == INS_crc32) && (size > EA_1BYTE)) |
11257 | { |
11258 | code |= 0x0100; |
11259 | } |
11260 | |
11261 | if (size == EA_2BYTE) |
11262 | { |
11263 | assert(ins == INS_crc32); |
11264 | dst += emitOutputByte(dst, 0x66); |
11265 | } |
11266 | else if (size == EA_8BYTE) |
11267 | { |
11268 | code = AddRexWPrefix(ins, code); |
11269 | } |
11270 | } |
11271 | #endif // FEATURE_HW_INTRINSICS |
11272 | else |
11273 | { |
11274 | code = insEncodeMRreg(ins, insCodeMR(ins)); |
11275 | |
11276 | if (ins != INS_test) |
11277 | { |
11278 | code |= 2; |
11279 | } |
11280 | |
11281 | switch (size) |
11282 | { |
11283 | case EA_1BYTE: |
11284 | noway_assert(RBM_BYTE_REGS & genRegMask(reg1)); |
11285 | noway_assert(RBM_BYTE_REGS & genRegMask(reg2)); |
11286 | break; |
11287 | |
11288 | case EA_2BYTE: |
11289 | // Output a size prefix for a 16-bit operand |
11290 | dst += emitOutputByte(dst, 0x66); |
11291 | __fallthrough; |
11292 | |
11293 | case EA_4BYTE: |
11294 | // Set the 'w' bit to get the large version |
11295 | code |= 0x1; |
11296 | break; |
11297 | |
11298 | #ifdef _TARGET_AMD64_ |
11299 | case EA_8BYTE: |
11300 | // TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it |
11301 | // Don't need to zero out the high bits explicitly |
11302 | if ((ins != INS_xor) || (reg1 != reg2)) |
11303 | { |
11304 | code = AddRexWPrefix(ins, code); |
11305 | } |
11306 | |
11307 | // Set the 'w' bit to get the large version |
11308 | code |= 0x1; |
11309 | break; |
11310 | |
11311 | #endif // _TARGET_AMD64_ |
11312 | |
11313 | default: |
11314 | assert(!"unexpected size" ); |
11315 | } |
11316 | } |
11317 | |
11318 | regNumber reg345 = REG_NA; |
11319 | if (IsBMIInstruction(ins)) |
11320 | { |
11321 | reg345 = getBmiRegNumber(ins); |
11322 | } |
11323 | if (reg345 == REG_NA) |
11324 | { |
11325 | reg345 = id->idReg1(); |
11326 | } |
11327 | unsigned regCode = insEncodeReg345(ins, reg345, size, &code); |
11328 | regCode |= insEncodeReg012(ins, reg2, size, &code); |
11329 | |
11330 | if (TakesVexPrefix(ins)) |
11331 | { |
11332 | // In case of AVX instructions that take 3 operands, we generally want to encode reg1 |
11333 | // as first source. In this case, reg1 is both a source and a destination. |
11334 | // The exception is the "merge" 3-operand case, where we have a move instruction, such |
11335 | // as movss, and we want to merge the source with itself. |
11336 | // |
11337 | // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For |
11338 | // now we use the single source as source1 and source2. |
11339 | if (IsDstDstSrcAVXInstruction(ins)) |
11340 | { |
11341 | // encode source/dest operand reg in 'vvvv' bits in 1's complement form |
11342 | code = insEncodeReg3456(ins, reg1, size, code); |
11343 | } |
11344 | else if (IsDstSrcSrcAVXInstruction(ins)) |
11345 | { |
11346 | // encode source operand reg in 'vvvv' bits in 1's complement form |
11347 | code = insEncodeReg3456(ins, reg2, size, code); |
11348 | } |
11349 | } |
11350 | |
11351 | // Output the REX prefix |
11352 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
11353 | |
11354 | if (code & 0xFF000000) |
11355 | { |
11356 | // Output the highest word of the opcode |
11357 | dst += emitOutputWord(dst, code >> 16); |
11358 | code &= 0x0000FFFF; |
11359 | |
11360 | if (Is4ByteSSEInstruction(ins)) |
11361 | { |
11362 | // Output 3rd byte of the opcode |
11363 | dst += emitOutputByte(dst, code); |
11364 | code &= 0xFF00; |
11365 | } |
11366 | } |
11367 | else if (code & 0x00FF0000) |
11368 | { |
11369 | dst += emitOutputByte(dst, code >> 16); |
11370 | code &= 0x0000FFFF; |
11371 | } |
11372 | |
11373 | // TODO-XArch-CQ: Right now support 4-byte opcode instructions only |
11374 | if ((code & 0xFF00) == 0xC000) |
11375 | { |
11376 | dst += emitOutputWord(dst, code | (regCode << 8)); |
11377 | } |
11378 | else if ((code & 0xFF) == 0x00) |
11379 | { |
11380 | // This case happens for some SSE/AVX instructions only |
11381 | assert(IsAVXInstruction(ins) || Is4ByteSSEInstruction(ins)); |
11382 | |
11383 | dst += emitOutputByte(dst, (code >> 8) & 0xFF); |
11384 | dst += emitOutputByte(dst, (0xC0 | regCode)); |
11385 | } |
11386 | else |
11387 | { |
11388 | dst += emitOutputWord(dst, code); |
11389 | dst += emitOutputByte(dst, (0xC0 | regCode)); |
11390 | } |
11391 | |
11392 | // Does this instruction operate on a GC ref value? |
11393 | if (id->idGCref()) |
11394 | { |
11395 | switch (id->idInsFmt()) |
11396 | { |
11397 | case IF_RRD_RRD: |
11398 | break; |
11399 | |
11400 | case IF_RWR_RRD: |
11401 | |
11402 | if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0) |
11403 | { |
11404 | // We're relocating "this" in the prolog |
11405 | assert(emitComp->lvaIsOriginalThisArg(0)); |
11406 | assert(emitComp->lvaTable[0].lvRegister); |
11407 | assert(emitComp->lvaTable[0].lvRegNum == reg1); |
11408 | |
11409 | if (emitFullGCinfo) |
11410 | { |
11411 | emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true); |
11412 | break; |
11413 | } |
11414 | else |
11415 | { |
11416 | /* If emitFullGCinfo==false, the we don't use any |
11417 | regPtrDsc's and so explictly note the location |
11418 | of "this" in GCEncode.cpp |
11419 | */ |
11420 | } |
11421 | } |
11422 | |
11423 | emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); |
11424 | break; |
11425 | |
11426 | case IF_RRW_RRD: |
11427 | |
11428 | switch (id->idIns()) |
11429 | { |
11430 | /* |
11431 | This must be one of the following cases: |
11432 | |
11433 | xor reg, reg to assign NULL |
11434 | |
11435 | and r1 , r2 if (ptr1 && ptr2) ... |
11436 | or r1 , r2 if (ptr1 || ptr2) ... |
11437 | |
11438 | add r1 , r2 to compute a normal byref |
11439 | sub r1 , r2 to compute a strange byref (VC only) |
11440 | |
11441 | */ |
11442 | case INS_xor: |
11443 | assert(id->idReg1() == id->idReg2()); |
11444 | emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); |
11445 | break; |
11446 | |
11447 | case INS_or: |
11448 | case INS_and: |
11449 | emitGCregDeadUpd(id->idReg1(), dst); |
11450 | break; |
11451 | |
11452 | case INS_add: |
11453 | case INS_sub: |
11454 | assert(id->idGCref() == GCT_BYREF); |
11455 | |
11456 | #ifdef DEBUG |
11457 | regMaskTP regMask; |
11458 | regMask = genRegMask(reg1) | genRegMask(reg2); |
11459 | |
11460 | // r1/r2 could have been a GCREF as GCREF + int=BYREF |
11461 | // or BYREF+/-int=BYREF |
11462 | assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) || |
11463 | ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub))); |
11464 | #endif |
11465 | // Mark r1 as holding a byref |
11466 | emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); |
11467 | break; |
11468 | |
11469 | default: |
11470 | #ifdef DEBUG |
11471 | emitDispIns(id, false, false, false); |
11472 | #endif |
11473 | assert(!"unexpected GC reg update instruction" ); |
11474 | } |
11475 | |
11476 | break; |
11477 | |
11478 | case IF_RRW_RRW: |
11479 | // This must be "xchg reg1, reg2" |
11480 | assert(id->idIns() == INS_xchg); |
11481 | |
11482 | // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC |
11483 | // register pointer mask. |
11484 | |
11485 | GCtype gc1, gc2; |
11486 | |
11487 | gc1 = emitRegGCtype(reg1); |
11488 | gc2 = emitRegGCtype(reg2); |
11489 | |
11490 | if (gc1 != gc2) |
11491 | { |
11492 | // Kill the GC-info about the GC registers |
11493 | |
11494 | if (needsGC(gc1)) |
11495 | { |
11496 | emitGCregDeadUpd(reg1, dst); |
11497 | } |
11498 | |
11499 | if (needsGC(gc2)) |
11500 | { |
11501 | emitGCregDeadUpd(reg2, dst); |
11502 | } |
11503 | |
11504 | // Now, swap the info |
11505 | |
11506 | if (needsGC(gc1)) |
11507 | { |
11508 | emitGCregLiveUpd(gc1, reg2, dst); |
11509 | } |
11510 | |
11511 | if (needsGC(gc2)) |
11512 | { |
11513 | emitGCregLiveUpd(gc2, reg1, dst); |
11514 | } |
11515 | } |
11516 | break; |
11517 | |
11518 | default: |
11519 | #ifdef DEBUG |
11520 | emitDispIns(id, false, false, false); |
11521 | #endif |
11522 | assert(!"unexpected GC ref instruction format" ); |
11523 | } |
11524 | } |
11525 | else |
11526 | { |
11527 | if (!emitInsCanOnlyWriteSSE2OrAVXReg(id)) |
11528 | { |
11529 | switch (id->idInsFmt()) |
11530 | { |
11531 | case IF_RRD_CNS: |
11532 | // INS_mulEAX can not be used with any of these formats |
11533 | assert(ins != INS_mulEAX && ins != INS_imulEAX); |
11534 | |
11535 | // For the three operand imul instruction the target |
11536 | // register is encoded in the opcode |
11537 | |
11538 | if (instrIs3opImul(ins)) |
11539 | { |
11540 | regNumber tgtReg = inst3opImulReg(ins); |
11541 | emitGCregDeadUpd(tgtReg, dst); |
11542 | } |
11543 | break; |
11544 | |
11545 | case IF_RWR_RRD: |
11546 | case IF_RRW_RRD: |
11547 | case IF_RWR_RRD_RRD: |
11548 | // INS_movxmm2i writes to reg2. |
11549 | if (ins == INS_mov_xmm2i) |
11550 | { |
11551 | emitGCregDeadUpd(id->idReg2(), dst); |
11552 | } |
11553 | else |
11554 | { |
11555 | emitGCregDeadUpd(id->idReg1(), dst); |
11556 | } |
11557 | break; |
11558 | |
11559 | default: |
11560 | break; |
11561 | } |
11562 | } |
11563 | } |
11564 | |
11565 | return dst; |
11566 | } |
11567 | |
11568 | BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) |
11569 | { |
11570 | code_t code; |
11571 | |
11572 | instruction ins = id->idIns(); |
11573 | assert(IsAVXInstruction(ins)); |
11574 | assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins)); |
11575 | regNumber targetReg = id->idReg1(); |
11576 | regNumber src1 = id->idReg2(); |
11577 | regNumber src2 = id->idReg3(); |
11578 | emitAttr size = id->idOpSize(); |
11579 | |
11580 | code = insCodeRM(ins); |
11581 | code = AddVexPrefixIfNeeded(ins, code, size); |
11582 | code = insEncodeRMreg(ins, code); |
11583 | |
11584 | if (TakesRexWPrefix(ins, size)) |
11585 | { |
11586 | code = AddRexWPrefix(ins, code); |
11587 | } |
11588 | |
11589 | unsigned regCode = insEncodeReg345(ins, targetReg, size, &code); |
11590 | regCode |= insEncodeReg012(ins, src2, size, &code); |
11591 | // encode source operand reg in 'vvvv' bits in 1's complement form |
11592 | code = insEncodeReg3456(ins, src1, size, code); |
11593 | |
11594 | // Output the REX prefix |
11595 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
11596 | |
11597 | // Is this a 'big' opcode? |
11598 | if (code & 0xFF000000) |
11599 | { |
11600 | // Output the highest word of the opcode |
11601 | dst += emitOutputWord(dst, code >> 16); |
11602 | code &= 0x0000FFFF; |
11603 | } |
11604 | else if (code & 0x00FF0000) |
11605 | { |
11606 | dst += emitOutputByte(dst, code >> 16); |
11607 | code &= 0x0000FFFF; |
11608 | } |
11609 | |
11610 | // TODO-XArch-CQ: Right now support 4-byte opcode instructions only |
11611 | if ((code & 0xFF00) == 0xC000) |
11612 | { |
11613 | dst += emitOutputWord(dst, code | (regCode << 8)); |
11614 | } |
11615 | else if ((code & 0xFF) == 0x00) |
11616 | { |
11617 | // This case happens for AVX instructions only |
11618 | assert(IsAVXInstruction(ins)); |
11619 | |
11620 | dst += emitOutputByte(dst, (code >> 8) & 0xFF); |
11621 | dst += emitOutputByte(dst, (0xC0 | regCode)); |
11622 | } |
11623 | else |
11624 | { |
11625 | dst += emitOutputWord(dst, code); |
11626 | dst += emitOutputByte(dst, (0xC0 | regCode)); |
11627 | } |
11628 | |
11629 | noway_assert(!id->idGCref()); |
11630 | |
11631 | return dst; |
11632 | } |
11633 | |
11634 | /***************************************************************************** |
11635 | * |
11636 | * Output an instruction with a register and constant operands. |
11637 | */ |
11638 | |
11639 | BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) |
11640 | { |
11641 | code_t code; |
11642 | emitAttr size = id->idOpSize(); |
11643 | instruction ins = id->idIns(); |
11644 | regNumber reg = id->idReg1(); |
11645 | ssize_t val = emitGetInsSC(id); |
11646 | bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); |
11647 | |
11648 | // BT reg,imm might be useful but it requires special handling of the immediate value |
11649 | // (it is always encoded in a byte). Let's not complicate things until this is needed. |
11650 | assert(ins != INS_bt); |
11651 | |
11652 | if (id->idIsCnsReloc()) |
11653 | { |
11654 | valInByte = false; // relocs can't be placed in a byte |
11655 | } |
11656 | |
11657 | noway_assert(emitVerifyEncodable(ins, size, reg)); |
11658 | |
11659 | if (IsSSEOrAVXInstruction(ins)) |
11660 | { |
11661 | // Handle SSE2 instructions of the form "opcode reg, immed8" |
11662 | |
11663 | assert(id->idGCref() == GCT_NONE); |
11664 | assert(valInByte); |
11665 | |
11666 | // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field. |
11667 | regNumber regOpcode = getSseShiftRegNumber(ins); |
11668 | |
11669 | // Get the 'base' opcode. |
11670 | code = insCodeMI(ins); |
11671 | code = AddVexPrefixIfNeeded(ins, code, size); |
11672 | code = insEncodeMIreg(ins, reg, size, code); |
11673 | assert(code & 0x00FF0000); |
11674 | if (TakesVexPrefix(ins)) |
11675 | { |
11676 | // The 'vvvv' bits encode the destination register, which for this case (RI) |
11677 | // is the same as the source. |
11678 | code = insEncodeReg3456(ins, reg, size, code); |
11679 | } |
11680 | |
11681 | unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8; |
11682 | |
11683 | // Output the REX prefix |
11684 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
11685 | |
11686 | if (code & 0xFF000000) |
11687 | { |
11688 | dst += emitOutputWord(dst, code >> 16); |
11689 | } |
11690 | else if (code & 0xFF0000) |
11691 | { |
11692 | dst += emitOutputByte(dst, code >> 16); |
11693 | } |
11694 | |
11695 | dst += emitOutputWord(dst, code | regcode); |
11696 | |
11697 | dst += emitOutputByte(dst, val); |
11698 | |
11699 | return dst; |
11700 | } |
11701 | |
11702 | // The 'mov' opcode is special |
11703 | if (ins == INS_mov) |
11704 | { |
11705 | code = insCodeACC(ins); |
11706 | assert(code < 0x100); |
11707 | |
11708 | code |= 0x08; // Set the 'w' bit |
11709 | unsigned regcode = insEncodeReg012(ins, reg, size, &code); |
11710 | code |= regcode; |
11711 | |
11712 | // This is INS_mov and will not take VEX prefix |
11713 | assert(!TakesVexPrefix(ins)); |
11714 | |
11715 | if (TakesRexWPrefix(ins, size)) |
11716 | { |
11717 | code = AddRexWPrefix(ins, code); |
11718 | } |
11719 | |
11720 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
11721 | |
11722 | dst += emitOutputByte(dst, code); |
11723 | if (size == EA_4BYTE) |
11724 | { |
11725 | dst += emitOutputLong(dst, val); |
11726 | } |
11727 | #ifdef _TARGET_AMD64_ |
11728 | else |
11729 | { |
11730 | assert(size == EA_PTRSIZE); |
11731 | dst += emitOutputSizeT(dst, val); |
11732 | } |
11733 | #endif |
11734 | |
11735 | if (id->idIsCnsReloc()) |
11736 | { |
11737 | emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_BASED_MOFFSET); |
11738 | } |
11739 | |
11740 | goto DONE; |
11741 | } |
11742 | |
11743 | // Decide which encoding is the shortest |
11744 | bool useSigned, useACC; |
11745 | |
11746 | if (reg == REG_EAX && !instrIs3opImul(ins)) |
11747 | { |
11748 | if (size == EA_1BYTE || (ins == INS_test)) |
11749 | { |
11750 | // For al, ACC encoding is always the smallest |
11751 | useSigned = false; |
11752 | useACC = true; |
11753 | } |
11754 | else |
11755 | { |
11756 | /* For ax/eax, we avoid ACC encoding for small constants as we |
11757 | * can emit the small constant and have it sign-extended. |
11758 | * For big constants, the ACC encoding is better as we can use |
11759 | * the 1 byte opcode |
11760 | */ |
11761 | |
11762 | if (valInByte) |
11763 | { |
11764 | // avoid using ACC encoding |
11765 | useSigned = true; |
11766 | useACC = false; |
11767 | } |
11768 | else |
11769 | { |
11770 | useSigned = false; |
11771 | useACC = true; |
11772 | } |
11773 | } |
11774 | } |
11775 | else |
11776 | { |
11777 | useACC = false; |
11778 | |
11779 | if (valInByte) |
11780 | { |
11781 | useSigned = true; |
11782 | } |
11783 | else |
11784 | { |
11785 | useSigned = false; |
11786 | } |
11787 | } |
11788 | |
11789 | // "test" has no 's' bit |
11790 | if (ins == INS_test) |
11791 | { |
11792 | useSigned = false; |
11793 | } |
11794 | |
11795 | // Get the 'base' opcode |
11796 | if (useACC) |
11797 | { |
11798 | assert(!useSigned); |
11799 | code = insCodeACC(ins); |
11800 | } |
11801 | else |
11802 | { |
11803 | assert(!useSigned || valInByte); |
11804 | |
11805 | // Some instructions (at least 'imul') do not have a |
11806 | // r/m, immed form, but do have a dstReg,srcReg,imm8 form. |
11807 | if (valInByte && useSigned && insNeedsRRIb(ins)) |
11808 | { |
11809 | code = insEncodeRRIb(ins, reg, size); |
11810 | } |
11811 | else |
11812 | { |
11813 | code = insCodeMI(ins); |
11814 | code = AddVexPrefixIfNeeded(ins, code, size); |
11815 | code = insEncodeMIreg(ins, reg, size, code); |
11816 | } |
11817 | } |
11818 | |
11819 | switch (size) |
11820 | { |
11821 | case EA_1BYTE: |
11822 | break; |
11823 | |
11824 | case EA_2BYTE: |
11825 | // Output a size prefix for a 16-bit operand |
11826 | dst += emitOutputByte(dst, 0x66); |
11827 | __fallthrough; |
11828 | |
11829 | case EA_4BYTE: |
11830 | // Set the 'w' bit to get the large version |
11831 | code |= 0x1; |
11832 | break; |
11833 | |
11834 | #ifdef _TARGET_AMD64_ |
11835 | case EA_8BYTE: |
11836 | /* Set the 'w' bit to get the large version */ |
11837 | /* and the REX.W bit to get the really large version */ |
11838 | |
11839 | code = AddRexWPrefix(ins, code); |
11840 | code |= 0x1; |
11841 | break; |
11842 | #endif |
11843 | |
11844 | default: |
11845 | assert(!"unexpected size" ); |
11846 | } |
11847 | |
11848 | // Output the REX prefix |
11849 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
11850 | |
11851 | // Does the value fit in a sign-extended byte? |
11852 | // Important! Only set the 's' bit when we have a size larger than EA_1BYTE. |
11853 | // Note: A sign-extending immediate when (size == EA_1BYTE) is invalid in 64-bit mode. |
11854 | |
11855 | if (useSigned && (size > EA_1BYTE)) |
11856 | { |
11857 | // We can just set the 's' bit, and issue an immediate byte |
11858 | |
11859 | code |= 0x2; // Set the 's' bit to use a sign-extended immediate byte. |
11860 | dst += emitOutputWord(dst, code); |
11861 | dst += emitOutputByte(dst, val); |
11862 | } |
11863 | else |
11864 | { |
11865 | // Can we use an accumulator (EAX) encoding? |
11866 | if (useACC) |
11867 | { |
11868 | dst += emitOutputByte(dst, code); |
11869 | } |
11870 | else |
11871 | { |
11872 | dst += emitOutputWord(dst, code); |
11873 | } |
11874 | |
11875 | switch (size) |
11876 | { |
11877 | case EA_1BYTE: |
11878 | dst += emitOutputByte(dst, val); |
11879 | break; |
11880 | case EA_2BYTE: |
11881 | dst += emitOutputWord(dst, val); |
11882 | break; |
11883 | case EA_4BYTE: |
11884 | dst += emitOutputLong(dst, val); |
11885 | break; |
11886 | #ifdef _TARGET_AMD64_ |
11887 | case EA_8BYTE: |
11888 | dst += emitOutputLong(dst, val); |
11889 | break; |
11890 | #endif // _TARGET_AMD64_ |
11891 | default: |
11892 | break; |
11893 | } |
11894 | |
11895 | if (id->idIsCnsReloc()) |
11896 | { |
11897 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW); |
11898 | assert(size == EA_4BYTE); |
11899 | } |
11900 | } |
11901 | |
11902 | DONE: |
11903 | |
11904 | // Does this instruction operate on a GC ref value? |
11905 | if (id->idGCref()) |
11906 | { |
11907 | switch (id->idInsFmt()) |
11908 | { |
11909 | case IF_RRD_CNS: |
11910 | break; |
11911 | |
11912 | case IF_RWR_CNS: |
11913 | emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); |
11914 | break; |
11915 | |
11916 | case IF_RRW_CNS: |
11917 | assert(id->idGCref() == GCT_BYREF); |
11918 | |
11919 | #ifdef DEBUG |
11920 | regMaskTP regMask; |
11921 | regMask = genRegMask(reg); |
11922 | // FIXNOW review the other places and relax the assert there too |
11923 | |
11924 | // The reg must currently be holding either a gcref or a byref |
11925 | // GCT_GCREF+int = GCT_BYREF, and GCT_BYREF+/-int = GCT_BYREF |
11926 | if (emitThisGCrefRegs & regMask) |
11927 | { |
11928 | assert(ins == INS_add); |
11929 | } |
11930 | if (emitThisByrefRegs & regMask) |
11931 | { |
11932 | assert(ins == INS_add || ins == INS_sub); |
11933 | } |
11934 | #endif |
11935 | // Mark it as holding a GCT_BYREF |
11936 | emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); |
11937 | break; |
11938 | |
11939 | default: |
11940 | #ifdef DEBUG |
11941 | emitDispIns(id, false, false, false); |
11942 | #endif |
11943 | assert(!"unexpected GC ref instruction format" ); |
11944 | } |
11945 | |
11946 | // mul can never produce a GC ref |
11947 | assert(!instrIs3opImul(ins)); |
11948 | assert(ins != INS_mulEAX && ins != INS_imulEAX); |
11949 | } |
11950 | else |
11951 | { |
11952 | switch (id->idInsFmt()) |
11953 | { |
11954 | case IF_RRD_CNS: |
11955 | // INS_mulEAX can not be used with any of these formats |
11956 | assert(ins != INS_mulEAX && ins != INS_imulEAX); |
11957 | |
11958 | // For the three operand imul instruction the target |
11959 | // register is encoded in the opcode |
11960 | |
11961 | if (instrIs3opImul(ins)) |
11962 | { |
11963 | regNumber tgtReg = inst3opImulReg(ins); |
11964 | emitGCregDeadUpd(tgtReg, dst); |
11965 | } |
11966 | break; |
11967 | |
11968 | case IF_RRW_CNS: |
11969 | case IF_RWR_CNS: |
11970 | assert(!instrIs3opImul(ins)); |
11971 | |
11972 | emitGCregDeadUpd(id->idReg1(), dst); |
11973 | break; |
11974 | |
11975 | default: |
11976 | #ifdef DEBUG |
11977 | emitDispIns(id, false, false, false); |
11978 | #endif |
11979 | assert(!"unexpected GC ref instruction format" ); |
11980 | } |
11981 | } |
11982 | |
11983 | return dst; |
11984 | } |
11985 | |
11986 | /***************************************************************************** |
11987 | * |
11988 | * Output an instruction with a constant operand. |
11989 | */ |
11990 | |
11991 | BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id) |
11992 | { |
11993 | code_t code; |
11994 | instruction ins = id->idIns(); |
11995 | emitAttr size = id->idOpSize(); |
11996 | ssize_t val = emitGetInsSC(id); |
11997 | bool valInByte = ((signed char)val == val); |
11998 | |
11999 | // We would to update GC info correctly |
12000 | assert(!IsSSEInstruction(ins)); |
12001 | assert(!IsAVXInstruction(ins)); |
12002 | |
12003 | #ifdef _TARGET_AMD64_ |
12004 | // all these opcodes take a sign-extended 4-byte immediate, max |
12005 | noway_assert(size < EA_8BYTE || ((int)val == val && !id->idIsCnsReloc())); |
12006 | #endif |
12007 | |
12008 | if (id->idIsCnsReloc()) |
12009 | { |
12010 | valInByte = false; // relocs can't be placed in a byte |
12011 | |
12012 | // Of these instructions only the push instruction can have reloc |
12013 | assert(ins == INS_push || ins == INS_push_hide); |
12014 | } |
12015 | |
12016 | switch (ins) |
12017 | { |
12018 | case INS_jge: |
12019 | assert((val >= -128) && (val <= 127)); |
12020 | dst += emitOutputByte(dst, insCode(ins)); |
12021 | dst += emitOutputByte(dst, val); |
12022 | break; |
12023 | |
12024 | case INS_loop: |
12025 | assert((val >= -128) && (val <= 127)); |
12026 | dst += emitOutputByte(dst, insCodeMI(ins)); |
12027 | dst += emitOutputByte(dst, val); |
12028 | break; |
12029 | |
12030 | case INS_ret: |
12031 | assert(val); |
12032 | dst += emitOutputByte(dst, insCodeMI(ins)); |
12033 | dst += emitOutputWord(dst, val); |
12034 | break; |
12035 | |
12036 | case INS_push_hide: |
12037 | case INS_push: |
12038 | code = insCodeMI(ins); |
12039 | |
12040 | // Does the operand fit in a byte? |
12041 | if (valInByte) |
12042 | { |
12043 | dst += emitOutputByte(dst, code | 2); |
12044 | dst += emitOutputByte(dst, val); |
12045 | } |
12046 | else |
12047 | { |
12048 | if (TakesRexWPrefix(ins, size)) |
12049 | { |
12050 | code = AddRexWPrefix(ins, code); |
12051 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
12052 | } |
12053 | |
12054 | dst += emitOutputByte(dst, code); |
12055 | dst += emitOutputLong(dst, val); |
12056 | if (id->idIsCnsReloc()) |
12057 | { |
12058 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW); |
12059 | } |
12060 | } |
12061 | |
12062 | // Did we push a GC ref value? |
12063 | if (id->idGCref()) |
12064 | { |
12065 | #ifdef DEBUG |
12066 | printf("UNDONE: record GCref push [cns]\n" ); |
12067 | #endif |
12068 | } |
12069 | |
12070 | break; |
12071 | |
12072 | default: |
12073 | assert(!"unexpected instruction" ); |
12074 | } |
12075 | |
12076 | return dst; |
12077 | } |
12078 | |
12079 | /***************************************************************************** |
12080 | * |
12081 | * Output a local jump instruction. |
12082 | * This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that |
12083 | * needs to get bound to an actual address and processed by branch shortening. |
12084 | */ |
12085 | |
12086 | BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i) |
12087 | { |
12088 | unsigned srcOffs; |
12089 | unsigned dstOffs; |
12090 | ssize_t distVal; |
12091 | |
12092 | instrDescJmp* id = (instrDescJmp*)i; |
12093 | instruction ins = id->idIns(); |
12094 | bool jmp; |
12095 | bool relAddr = true; // does the instruction use relative-addressing? |
12096 | |
12097 | // SSE/AVX doesnt make any sense here |
12098 | assert(!IsSSEInstruction(ins)); |
12099 | assert(!IsAVXInstruction(ins)); |
12100 | |
12101 | size_t ssz; |
12102 | size_t lsz; |
12103 | |
12104 | switch (ins) |
12105 | { |
12106 | default: |
12107 | ssz = JCC_SIZE_SMALL; |
12108 | lsz = JCC_SIZE_LARGE; |
12109 | jmp = true; |
12110 | break; |
12111 | |
12112 | case INS_jmp: |
12113 | ssz = JMP_SIZE_SMALL; |
12114 | lsz = JMP_SIZE_LARGE; |
12115 | jmp = true; |
12116 | break; |
12117 | |
12118 | case INS_call: |
12119 | ssz = lsz = CALL_INST_SIZE; |
12120 | jmp = false; |
12121 | break; |
12122 | |
12123 | case INS_push_hide: |
12124 | case INS_push: |
12125 | ssz = lsz = 5; |
12126 | jmp = false; |
12127 | relAddr = false; |
12128 | break; |
12129 | |
12130 | case INS_mov: |
12131 | case INS_lea: |
12132 | ssz = lsz = id->idCodeSize(); |
12133 | jmp = false; |
12134 | relAddr = false; |
12135 | break; |
12136 | } |
12137 | |
12138 | // Figure out the distance to the target |
12139 | srcOffs = emitCurCodeOffs(dst); |
12140 | dstOffs = id->idAddr()->iiaIGlabel->igOffs; |
12141 | |
12142 | if (relAddr) |
12143 | { |
12144 | distVal = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs)); |
12145 | } |
12146 | else |
12147 | { |
12148 | distVal = (ssize_t)emitOffsetToPtr(dstOffs); |
12149 | } |
12150 | |
12151 | if (dstOffs <= srcOffs) |
12152 | { |
12153 | // This is a backward jump - distance is known at this point |
12154 | CLANG_FORMAT_COMMENT_ANCHOR; |
12155 | |
12156 | #if DEBUG_EMIT |
12157 | if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) |
12158 | { |
12159 | size_t blkOffs = id->idjIG->igOffs; |
12160 | |
12161 | if (INTERESTING_JUMP_NUM == 0) |
12162 | { |
12163 | printf("[3] Jump %u:\n" , id->idDebugOnlyInfo()->idNum); |
12164 | } |
12165 | printf("[3] Jump block is at %08X - %02X = %08X\n" , blkOffs, emitOffsAdj, blkOffs - emitOffsAdj); |
12166 | printf("[3] Jump is at %08X - %02X = %08X\n" , srcOffs, emitOffsAdj, srcOffs - emitOffsAdj); |
12167 | printf("[3] Label block is at %08X - %02X = %08X\n" , dstOffs, emitOffsAdj, dstOffs - emitOffsAdj); |
12168 | } |
12169 | #endif |
12170 | |
12171 | // Can we use a short jump? |
12172 | if (jmp && distVal - ssz >= (size_t)JMP_DIST_SMALL_MAX_NEG) |
12173 | { |
12174 | emitSetShortJump(id); |
12175 | } |
12176 | } |
12177 | else |
12178 | { |
12179 | // This is a forward jump - distance will be an upper limit |
12180 | emitFwdJumps = true; |
12181 | |
12182 | // The target offset will be closer by at least 'emitOffsAdj', but only if this |
12183 | // jump doesn't cross the hot-cold boundary. |
12184 | if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs)) |
12185 | { |
12186 | dstOffs -= emitOffsAdj; |
12187 | distVal -= emitOffsAdj; |
12188 | } |
12189 | |
12190 | // Record the location of the jump for later patching |
12191 | id->idjOffs = dstOffs; |
12192 | |
12193 | // Are we overflowing the id->idjOffs bitfield? |
12194 | if (id->idjOffs != dstOffs) |
12195 | { |
12196 | IMPL_LIMITATION("Method is too large" ); |
12197 | } |
12198 | |
12199 | #if DEBUG_EMIT |
12200 | if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) |
12201 | { |
12202 | size_t blkOffs = id->idjIG->igOffs; |
12203 | |
12204 | if (INTERESTING_JUMP_NUM == 0) |
12205 | { |
12206 | printf("[4] Jump %u:\n" , id->idDebugOnlyInfo()->idNum); |
12207 | } |
12208 | printf("[4] Jump block is at %08X\n" , blkOffs); |
12209 | printf("[4] Jump is at %08X\n" , srcOffs); |
12210 | printf("[4] Label block is at %08X - %02X = %08X\n" , dstOffs + emitOffsAdj, emitOffsAdj, dstOffs); |
12211 | } |
12212 | #endif |
12213 | |
12214 | // Can we use a short jump? |
12215 | if (jmp && distVal - ssz <= (size_t)JMP_DIST_SMALL_MAX_POS) |
12216 | { |
12217 | emitSetShortJump(id); |
12218 | } |
12219 | } |
12220 | |
12221 | // Adjust the offset to emit relative to the end of the instruction |
12222 | if (relAddr) |
12223 | { |
12224 | distVal -= id->idjShort ? ssz : lsz; |
12225 | } |
12226 | |
12227 | #ifdef DEBUG |
12228 | if (0 && emitComp->verbose) |
12229 | { |
12230 | size_t sz = id->idjShort ? ssz : lsz; |
12231 | int distValSize = id->idjShort ? 4 : 8; |
12232 | printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n" , (dstOffs <= srcOffs) ? "Fwd" : "Bwd" , |
12233 | emitComp->dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs, |
12234 | distVal); |
12235 | } |
12236 | #endif |
12237 | |
12238 | // What size jump should we use? |
12239 | if (id->idjShort) |
12240 | { |
12241 | // Short jump |
12242 | assert(!id->idjKeepLong); |
12243 | assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false); |
12244 | |
12245 | assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL); |
12246 | assert(JMP_SIZE_SMALL == 2); |
12247 | |
12248 | assert(jmp); |
12249 | |
12250 | if (emitInstCodeSz(id) != JMP_SIZE_SMALL) |
12251 | { |
12252 | emitOffsAdj += emitInstCodeSz(id) - JMP_SIZE_SMALL; |
12253 | |
12254 | #ifdef DEBUG |
12255 | if (emitComp->verbose) |
12256 | { |
12257 | printf("; NOTE: size of jump [%08X] mis-predicted\n" , emitComp->dspPtr(id)); |
12258 | } |
12259 | #endif |
12260 | } |
12261 | |
12262 | dst += emitOutputByte(dst, insCode(ins)); |
12263 | |
12264 | // For forward jumps, record the address of the distance value |
12265 | id->idjTemp.idjAddr = (distVal > 0) ? dst : nullptr; |
12266 | |
12267 | dst += emitOutputByte(dst, distVal); |
12268 | } |
12269 | else |
12270 | { |
12271 | code_t code; |
12272 | |
12273 | // Long jump |
12274 | if (jmp) |
12275 | { |
12276 | // clang-format off |
12277 | assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp); |
12278 | assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo); |
12279 | assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb); |
12280 | assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae); |
12281 | assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je); |
12282 | assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne); |
12283 | assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe); |
12284 | assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja); |
12285 | assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js); |
12286 | assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns); |
12287 | assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe); |
12288 | assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo); |
12289 | assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl); |
12290 | assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge); |
12291 | assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle); |
12292 | assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg); |
12293 | // clang-format on |
12294 | |
12295 | code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp))); |
12296 | } |
12297 | else if (ins == INS_push || ins == INS_push_hide) |
12298 | { |
12299 | assert(insCodeMI(INS_push) == 0x68); |
12300 | code = 0x68; |
12301 | } |
12302 | else if (ins == INS_mov) |
12303 | { |
12304 | // Make it look like IF_SWR_CNS so that emitOutputSV emits the r/m32 for us |
12305 | insFormat tmpInsFmt = id->idInsFmt(); |
12306 | insGroup* tmpIGlabel = id->idAddr()->iiaIGlabel; |
12307 | bool tmpDspReloc = id->idIsDspReloc(); |
12308 | |
12309 | id->idInsFmt(IF_SWR_CNS); |
12310 | id->idAddr()->iiaLclVar = ((instrDescLbl*)id)->dstLclVar; |
12311 | id->idSetIsDspReloc(false); |
12312 | |
12313 | dst = emitOutputSV(dst, id, insCodeMI(ins)); |
12314 | |
12315 | // Restore id fields with original values |
12316 | id->idInsFmt(tmpInsFmt); |
12317 | id->idAddr()->iiaIGlabel = tmpIGlabel; |
12318 | id->idSetIsDspReloc(tmpDspReloc); |
12319 | code = 0xCC; |
12320 | } |
12321 | else if (ins == INS_lea) |
12322 | { |
12323 | // Make an instrDesc that looks like IF_RWR_ARD so that emitOutputAM emits the r/m32 for us. |
12324 | // We basically are doing what emitIns_R_AI does. |
12325 | // TODO-XArch-Cleanup: revisit this. |
12326 | instrDescAmd idAmdStackLocal; |
12327 | instrDescAmd* idAmd = &idAmdStackLocal; |
12328 | *(instrDesc*)idAmd = *(instrDesc*)id; // copy all the "core" fields |
12329 | memset((BYTE*)idAmd + sizeof(instrDesc), 0, |
12330 | sizeof(instrDescAmd) - sizeof(instrDesc)); // zero out the tail that wasn't copied |
12331 | |
12332 | idAmd->idInsFmt(IF_RWR_ARD); |
12333 | idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA; |
12334 | idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA; |
12335 | emitSetAmdDisp(idAmd, distVal); // set the displacement |
12336 | idAmd->idSetIsDspReloc(id->idIsDspReloc()); |
12337 | assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly |
12338 | |
12339 | UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins)); |
12340 | idAmd->idCodeSize(sz); |
12341 | |
12342 | code = insCodeRM(ins); |
12343 | code |= (insEncodeReg345(ins, id->idReg1(), EA_PTRSIZE, &code) << 8); |
12344 | |
12345 | dst = emitOutputAM(dst, idAmd, code, nullptr); |
12346 | |
12347 | code = 0xCC; |
12348 | |
12349 | // For forward jumps, record the address of the distance value |
12350 | // Hard-coded 4 here because we already output the displacement, as the last thing. |
12351 | id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : nullptr; |
12352 | |
12353 | // We're done |
12354 | return dst; |
12355 | } |
12356 | else |
12357 | { |
12358 | code = 0xE8; |
12359 | } |
12360 | |
12361 | if (ins != INS_mov) |
12362 | { |
12363 | dst += emitOutputByte(dst, code); |
12364 | |
12365 | if (code & 0xFF00) |
12366 | { |
12367 | dst += emitOutputByte(dst, code >> 8); |
12368 | } |
12369 | } |
12370 | |
12371 | // For forward jumps, record the address of the distance value |
12372 | id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : nullptr; |
12373 | |
12374 | dst += emitOutputLong(dst, distVal); |
12375 | |
12376 | #ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation |
12377 | if (emitComp->opts.compReloc) |
12378 | #endif |
12379 | { |
12380 | if (!relAddr) |
12381 | { |
12382 | emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)distVal, IMAGE_REL_BASED_HIGHLOW); |
12383 | } |
12384 | else if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs)) |
12385 | { |
12386 | assert(id->idjKeepLong); |
12387 | emitRecordRelocation((void*)(dst - sizeof(INT32)), dst + distVal, IMAGE_REL_BASED_REL32); |
12388 | } |
12389 | } |
12390 | } |
12391 | |
12392 | // Local calls kill all registers |
12393 | if (ins == INS_call && (emitThisGCrefRegs | emitThisByrefRegs)) |
12394 | { |
12395 | emitGCregDeadUpdMask(emitThisGCrefRegs | emitThisByrefRegs, dst); |
12396 | } |
12397 | |
12398 | return dst; |
12399 | } |
12400 | |
12401 | /***************************************************************************** |
12402 | * |
12403 | * Append the machine code corresponding to the given instruction descriptor |
12404 | * to the code block at '*dp'; the base of the code block is 'bp', and 'ig' |
12405 | * is the instruction group that contains the instruction. Updates '*dp' to |
12406 | * point past the generated code, and returns the size of the instruction |
12407 | * descriptor in bytes. |
12408 | */ |
12409 | |
12410 | #ifdef _PREFAST_ |
12411 | #pragma warning(push) |
12412 | #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function |
12413 | #endif |
12414 | size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) |
12415 | { |
12416 | assert(emitIssuing); |
12417 | |
12418 | BYTE* dst = *dp; |
12419 | size_t sz = sizeof(instrDesc); |
12420 | instruction ins = id->idIns(); |
12421 | unsigned char callInstrSize = 0; |
12422 | |
12423 | #ifdef DEBUG |
12424 | bool dspOffs = emitComp->opts.dspGCtbls; |
12425 | #endif // DEBUG |
12426 | |
12427 | emitAttr size = id->idOpSize(); |
12428 | |
12429 | assert(REG_NA == (int)REG_NA); |
12430 | |
12431 | assert(ins != INS_imul || size >= EA_4BYTE); // Has no 'w' bit |
12432 | assert(instrIs3opImul(id->idIns()) == 0 || size >= EA_4BYTE); // Has no 'w' bit |
12433 | |
12434 | VARSET_TP GCvars(VarSetOps::UninitVal()); |
12435 | |
12436 | // What instruction format have we got? |
12437 | switch (id->idInsFmt()) |
12438 | { |
12439 | code_t code; |
12440 | unsigned regcode; |
12441 | int args; |
12442 | CnsVal cnsVal; |
12443 | |
12444 | BYTE* addr; |
12445 | bool recCall; |
12446 | |
12447 | regMaskTP gcrefRegs; |
12448 | regMaskTP byrefRegs; |
12449 | |
12450 | /********************************************************************/ |
12451 | /* No operands */ |
12452 | /********************************************************************/ |
12453 | case IF_NONE: |
12454 | // the loop alignment pseudo instruction |
12455 | if (ins == INS_align) |
12456 | { |
12457 | sz = SMALL_IDSC_SIZE; |
12458 | dst = emitOutputNOP(dst, (-(int)(size_t)dst) & 0x0f); |
12459 | assert(((size_t)dst & 0x0f) == 0); |
12460 | break; |
12461 | } |
12462 | |
12463 | if (ins == INS_nop) |
12464 | { |
12465 | dst = emitOutputNOP(dst, id->idCodeSize()); |
12466 | break; |
12467 | } |
12468 | |
12469 | // the cdq instruction kills the EDX register implicitly |
12470 | if (ins == INS_cdq) |
12471 | { |
12472 | emitGCregDeadUpd(REG_EDX, dst); |
12473 | } |
12474 | |
12475 | assert(id->idGCref() == GCT_NONE); |
12476 | |
12477 | code = insCodeMR(ins); |
12478 | |
12479 | #ifdef _TARGET_AMD64_ |
12480 | // Support only scalar AVX instructions and hence size is hard coded to 4-byte. |
12481 | code = AddVexPrefixIfNeeded(ins, code, EA_4BYTE); |
12482 | |
12483 | if (ins == INS_cdq && TakesRexWPrefix(ins, id->idOpSize())) |
12484 | { |
12485 | code = AddRexWPrefix(ins, code); |
12486 | } |
12487 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
12488 | #endif |
12489 | // Is this a 'big' opcode? |
12490 | if (code & 0xFF000000) |
12491 | { |
12492 | // The high word and then the low word |
12493 | dst += emitOutputWord(dst, code >> 16); |
12494 | code &= 0x0000FFFF; |
12495 | dst += emitOutputWord(dst, code); |
12496 | } |
12497 | else if (code & 0x00FF0000) |
12498 | { |
12499 | // The high byte and then the low word |
12500 | dst += emitOutputByte(dst, code >> 16); |
12501 | code &= 0x0000FFFF; |
12502 | dst += emitOutputWord(dst, code); |
12503 | } |
12504 | else if (code & 0xFF00) |
12505 | { |
12506 | // The 2 byte opcode |
12507 | dst += emitOutputWord(dst, code); |
12508 | } |
12509 | else |
12510 | { |
12511 | // The 1 byte opcode |
12512 | dst += emitOutputByte(dst, code); |
12513 | } |
12514 | |
12515 | break; |
12516 | |
12517 | /********************************************************************/ |
12518 | /* Simple constant, local label, method */ |
12519 | /********************************************************************/ |
12520 | |
12521 | case IF_CNS: |
12522 | dst = emitOutputIV(dst, id); |
12523 | sz = emitSizeOfInsDsc(id); |
12524 | break; |
12525 | |
12526 | case IF_LABEL: |
12527 | case IF_RWR_LABEL: |
12528 | case IF_SWR_LABEL: |
12529 | assert(id->idGCref() == GCT_NONE); |
12530 | assert(id->idIsBound()); |
12531 | |
12532 | // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()? |
12533 | dst = emitOutputLJ(dst, id); |
12534 | sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp)); |
12535 | break; |
12536 | |
12537 | case IF_METHOD: |
12538 | case IF_METHPTR: |
12539 | // Assume we'll be recording this call |
12540 | recCall = true; |
12541 | |
12542 | // Get hold of the argument count and field Handle |
12543 | args = emitGetInsCDinfo(id); |
12544 | |
12545 | // Is this a "fat" call descriptor? |
12546 | if (id->idIsLargeCall()) |
12547 | { |
12548 | instrDescCGCA* idCall = (instrDescCGCA*)id; |
12549 | gcrefRegs = idCall->idcGcrefRegs; |
12550 | byrefRegs = idCall->idcByrefRegs; |
12551 | VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars); |
12552 | sz = sizeof(instrDescCGCA); |
12553 | } |
12554 | else |
12555 | { |
12556 | assert(!id->idIsLargeDsp()); |
12557 | assert(!id->idIsLargeCns()); |
12558 | |
12559 | gcrefRegs = emitDecodeCallGCregs(id); |
12560 | byrefRegs = 0; |
12561 | VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp)); |
12562 | sz = sizeof(instrDesc); |
12563 | } |
12564 | |
12565 | addr = (BYTE*)id->idAddr()->iiaAddr; |
12566 | assert(addr != nullptr); |
12567 | |
12568 | // Some helpers don't get recorded in GC tables |
12569 | if (id->idIsNoGC()) |
12570 | { |
12571 | recCall = false; |
12572 | } |
12573 | |
12574 | // What kind of a call do we have here? |
12575 | if (id->idInsFmt() == IF_METHPTR) |
12576 | { |
12577 | // This is call indirect via a method pointer |
12578 | |
12579 | code = insCodeMR(ins); |
12580 | if (ins == INS_i_jmp) |
12581 | { |
12582 | code |= 1; |
12583 | } |
12584 | |
12585 | if (id->idIsDspReloc()) |
12586 | { |
12587 | dst += emitOutputWord(dst, code | 0x0500); |
12588 | #ifdef _TARGET_AMD64_ |
12589 | dst += emitOutputLong(dst, 0); |
12590 | #else |
12591 | dst += emitOutputLong(dst, (int)addr); |
12592 | #endif |
12593 | emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32); |
12594 | } |
12595 | else |
12596 | { |
12597 | #ifdef _TARGET_X86_ |
12598 | dst += emitOutputWord(dst, code | 0x0500); |
12599 | #else //_TARGET_AMD64_ |
12600 | // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero. |
12601 | // This addr mode should never be used while generating relocatable ngen code nor if |
12602 | // the addr can be encoded as pc-relative address. |
12603 | noway_assert(!emitComp->opts.compReloc); |
12604 | noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32); |
12605 | noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (ssize_t)addr); |
12606 | |
12607 | // This requires, specifying a SIB byte after ModRM byte. |
12608 | dst += emitOutputWord(dst, code | 0x0400); |
12609 | dst += emitOutputByte(dst, 0x25); |
12610 | #endif //_TARGET_AMD64_ |
12611 | dst += emitOutputLong(dst, static_cast<int>(reinterpret_cast<intptr_t>(addr))); |
12612 | } |
12613 | goto DONE_CALL; |
12614 | } |
12615 | |
12616 | // Else |
12617 | // This is call direct where we know the target, thus we can |
12618 | // use a direct call; the target to jump to is in iiaAddr. |
12619 | assert(id->idInsFmt() == IF_METHOD); |
12620 | |
12621 | // Output the call opcode followed by the target distance |
12622 | dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins)); |
12623 | |
12624 | ssize_t offset; |
12625 | #ifdef _TARGET_AMD64_ |
12626 | // All REL32 on Amd64 go through recordRelocation. Here we will output zero to advance dst. |
12627 | offset = 0; |
12628 | assert(id->idIsDspReloc()); |
12629 | #else |
12630 | // Calculate PC relative displacement. |
12631 | // Although you think we should be using sizeof(void*), the x86 and x64 instruction set |
12632 | // only allow a 32-bit offset, so we correctly use sizeof(INT32) |
12633 | offset = addr - (dst + sizeof(INT32)); |
12634 | #endif |
12635 | |
12636 | dst += emitOutputLong(dst, offset); |
12637 | |
12638 | if (id->idIsDspReloc()) |
12639 | { |
12640 | emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32); |
12641 | } |
12642 | |
12643 | DONE_CALL: |
12644 | |
12645 | /* We update the GC info before the call as the variables cannot be |
12646 | used by the call. Killing variables before the call helps with |
12647 | boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029. |
12648 | If we ever track aliased variables (which could be used by the |
12649 | call), we would have to keep them alive past the call. |
12650 | */ |
12651 | assert(FitsIn<unsigned char>(dst - *dp)); |
12652 | callInstrSize = static_cast<unsigned char>(dst - *dp); |
12653 | emitUpdateLiveGCvars(GCvars, *dp); |
12654 | |
12655 | // If the method returns a GC ref, mark EAX appropriately |
12656 | if (id->idGCref() == GCT_GCREF) |
12657 | { |
12658 | gcrefRegs |= RBM_EAX; |
12659 | } |
12660 | else if (id->idGCref() == GCT_BYREF) |
12661 | { |
12662 | byrefRegs |= RBM_EAX; |
12663 | } |
12664 | |
12665 | #ifdef UNIX_AMD64_ABI |
12666 | // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64). |
12667 | if (id->idIsLargeCall()) |
12668 | { |
12669 | instrDescCGCA* idCall = (instrDescCGCA*)id; |
12670 | if (idCall->idSecondGCref() == GCT_GCREF) |
12671 | { |
12672 | gcrefRegs |= RBM_RDX; |
12673 | } |
12674 | else if (idCall->idSecondGCref() == GCT_BYREF) |
12675 | { |
12676 | byrefRegs |= RBM_RDX; |
12677 | } |
12678 | } |
12679 | #endif // UNIX_AMD64_ABI |
12680 | |
12681 | // If the GC register set has changed, report the new set |
12682 | if (gcrefRegs != emitThisGCrefRegs) |
12683 | { |
12684 | emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); |
12685 | } |
12686 | |
12687 | if (byrefRegs != emitThisByrefRegs) |
12688 | { |
12689 | emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst); |
12690 | } |
12691 | |
12692 | if (recCall || args) |
12693 | { |
12694 | // For callee-pop, all arguments will be popped after the call. |
12695 | // For caller-pop, any GC arguments will go dead after the call. |
12696 | |
12697 | assert(callInstrSize != 0); |
12698 | |
12699 | if (args >= 0) |
12700 | { |
12701 | emitStackPop(dst, /*isCall*/ true, callInstrSize, args); |
12702 | } |
12703 | else |
12704 | { |
12705 | emitStackKillArgs(dst, -args, callInstrSize); |
12706 | } |
12707 | } |
12708 | |
12709 | // Do we need to record a call location for GC purposes? |
12710 | if (!emitFullGCinfo && recCall) |
12711 | { |
12712 | assert(callInstrSize != 0); |
12713 | emitRecordGCcall(dst, callInstrSize); |
12714 | } |
12715 | |
12716 | #ifdef DEBUG |
12717 | if (ins == INS_call) |
12718 | { |
12719 | emitRecordCallSite(emitCurCodeOffs(*dp), id->idDebugOnlyInfo()->idCallSig, |
12720 | (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); |
12721 | } |
12722 | #endif // DEBUG |
12723 | |
12724 | break; |
12725 | |
12726 | /********************************************************************/ |
12727 | /* One register operand */ |
12728 | /********************************************************************/ |
12729 | |
12730 | case IF_RRD: |
12731 | case IF_RWR: |
12732 | case IF_RRW: |
12733 | dst = emitOutputR(dst, id); |
12734 | sz = SMALL_IDSC_SIZE; |
12735 | break; |
12736 | |
12737 | /********************************************************************/ |
12738 | /* Register and register/constant */ |
12739 | /********************************************************************/ |
12740 | |
12741 | case IF_RRW_SHF: |
12742 | code = insCodeMR(ins); |
12743 | // Emit the VEX prefix if it exists |
12744 | code = AddVexPrefixIfNeeded(ins, code, size); |
12745 | code = insEncodeMRreg(ins, id->idReg1(), size, code); |
12746 | |
12747 | // set the W bit |
12748 | if (size != EA_1BYTE) |
12749 | { |
12750 | code |= 1; |
12751 | } |
12752 | |
12753 | // Emit the REX prefix if it exists |
12754 | if (TakesRexWPrefix(ins, size)) |
12755 | { |
12756 | code = AddRexWPrefix(ins, code); |
12757 | } |
12758 | |
12759 | // Output a size prefix for a 16-bit operand |
12760 | if (size == EA_2BYTE) |
12761 | { |
12762 | dst += emitOutputByte(dst, 0x66); |
12763 | } |
12764 | |
12765 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
12766 | dst += emitOutputWord(dst, code); |
12767 | dst += emitOutputByte(dst, emitGetInsSC(id)); |
12768 | sz = emitSizeOfInsDsc(id); |
12769 | |
12770 | // Update GC info. |
12771 | assert(!id->idGCref()); |
12772 | emitGCregDeadUpd(id->idReg1(), dst); |
12773 | break; |
12774 | |
12775 | case IF_RRD_RRD: |
12776 | case IF_RWR_RRD: |
12777 | case IF_RRW_RRD: |
12778 | case IF_RRW_RRW: |
12779 | dst = emitOutputRR(dst, id); |
12780 | sz = SMALL_IDSC_SIZE; |
12781 | break; |
12782 | |
12783 | case IF_RRD_CNS: |
12784 | case IF_RWR_CNS: |
12785 | case IF_RRW_CNS: |
12786 | dst = emitOutputRI(dst, id); |
12787 | sz = emitSizeOfInsDsc(id); |
12788 | break; |
12789 | |
12790 | case IF_RWR_RRD_RRD: |
12791 | dst = emitOutputRRR(dst, id); |
12792 | sz = emitSizeOfInsDsc(id); |
12793 | break; |
12794 | case IF_RWR_RRD_RRD_CNS: |
12795 | case IF_RWR_RRD_RRD_RRD: |
12796 | dst = emitOutputRRR(dst, id); |
12797 | sz = emitSizeOfInsDsc(id); |
12798 | dst += emitOutputByte(dst, emitGetInsSC(id)); |
12799 | break; |
12800 | |
12801 | case IF_RRW_RRW_CNS: |
12802 | assert(id->idGCref() == GCT_NONE); |
12803 | |
12804 | // Get the 'base' opcode (it's a big one) |
12805 | // Also, determine which operand goes where in the ModRM byte. |
12806 | regNumber mReg; |
12807 | regNumber rReg; |
12808 | if (hasCodeMR(ins)) |
12809 | { |
12810 | code = insCodeMR(ins); |
12811 | // Emit the VEX prefix if it exists |
12812 | code = AddVexPrefixIfNeeded(ins, code, size); |
12813 | code = insEncodeMRreg(ins, code); |
12814 | mReg = id->idReg1(); |
12815 | rReg = id->idReg2(); |
12816 | } |
12817 | else if (hasCodeMI(ins)) |
12818 | { |
12819 | code = insCodeMI(ins); |
12820 | |
12821 | // Emit the VEX prefix if it exists |
12822 | code = AddVexPrefixIfNeeded(ins, code, size); |
12823 | |
12824 | assert((code & 0xC000) == 0); |
12825 | code |= 0xC000; |
12826 | |
12827 | mReg = id->idReg2(); |
12828 | |
12829 | // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field. |
12830 | rReg = getSseShiftRegNumber(ins); |
12831 | } |
12832 | else |
12833 | { |
12834 | code = insCodeRM(ins); |
12835 | // Emit the VEX prefix if it exists |
12836 | code = AddVexPrefixIfNeeded(ins, code, size); |
12837 | code = insEncodeRMreg(ins, code); |
12838 | mReg = id->idReg2(); |
12839 | rReg = id->idReg1(); |
12840 | } |
12841 | assert(code & 0x00FF0000); |
12842 | |
12843 | if (TakesRexWPrefix(ins, size)) |
12844 | { |
12845 | code = AddRexWPrefix(ins, code); |
12846 | } |
12847 | |
12848 | if (TakesVexPrefix(ins)) |
12849 | { |
12850 | if (IsDstDstSrcAVXInstruction(ins)) |
12851 | { |
12852 | // Encode source/dest operand reg in 'vvvv' bits in 1's complement form |
12853 | // This code will have to change when we support 3 operands. |
12854 | // For now, we always overload this source with the destination (always reg1). |
12855 | // (Though we will need to handle the few ops that can have the 'vvvv' bits as destination, |
12856 | // e.g. pslldq, when/if we support those instructions with 2 registers.) |
12857 | // (see x64 manual Table 2-9. Instructions with a VEX.vvvv destination) |
12858 | code = insEncodeReg3456(ins, id->idReg1(), size, code); |
12859 | } |
12860 | else if (IsDstSrcSrcAVXInstruction(ins)) |
12861 | { |
12862 | // This is a "merge" move instruction. |
12863 | // Encode source operand reg in 'vvvv' bits in 1's complement form |
12864 | code = insEncodeReg3456(ins, id->idReg2(), size, code); |
12865 | } |
12866 | } |
12867 | |
12868 | regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code)); |
12869 | |
12870 | // Output the REX prefix |
12871 | dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); |
12872 | |
12873 | if (code & 0xFF000000) |
12874 | { |
12875 | // Output the highest word of the opcode |
12876 | dst += emitOutputWord(dst, code >> 16); |
12877 | code &= 0x0000FFFF; |
12878 | |
12879 | if (Is4ByteSSEInstruction(ins)) |
12880 | { |
12881 | // Output 3rd byte of the opcode |
12882 | dst += emitOutputByte(dst, code); |
12883 | code &= 0xFF00; |
12884 | } |
12885 | } |
12886 | else if (code & 0x00FF0000) |
12887 | { |
12888 | dst += emitOutputByte(dst, code >> 16); |
12889 | code &= 0x0000FFFF; |
12890 | } |
12891 | |
12892 | // TODO-XArch-CQ: Right now support 4-byte opcode instructions only |
12893 | if ((code & 0xFF00) == 0xC000) |
12894 | { |
12895 | dst += emitOutputWord(dst, code | (regcode << 8)); |
12896 | } |
12897 | else if ((code & 0xFF) == 0x00) |
12898 | { |
12899 | // This case happens for some SSE/AVX instructions only |
12900 | assert(IsAVXInstruction(ins) || Is4ByteSSEInstruction(ins)); |
12901 | |
12902 | dst += emitOutputByte(dst, (code >> 8) & 0xFF); |
12903 | dst += emitOutputByte(dst, (0xC0 | regcode)); |
12904 | } |
12905 | else |
12906 | { |
12907 | dst += emitOutputWord(dst, code); |
12908 | dst += emitOutputByte(dst, (0xC0 | regcode)); |
12909 | } |
12910 | |
12911 | dst += emitOutputByte(dst, emitGetInsSC(id)); |
12912 | sz = emitSizeOfInsDsc(id); |
12913 | |
12914 | // Kill any GC ref in the destination register if necessary. |
12915 | if (!emitInsCanOnlyWriteSSE2OrAVXReg(id)) |
12916 | { |
12917 | emitGCregDeadUpd(id->idReg1(), dst); |
12918 | } |
12919 | break; |
12920 | |
12921 | /********************************************************************/ |
12922 | /* Address mode operand */ |
12923 | /********************************************************************/ |
12924 | |
12925 | case IF_ARD: |
12926 | case IF_AWR: |
12927 | case IF_ARW: |
12928 | |
12929 | dst = emitCodeWithInstructionSize(dst, emitOutputAM(dst, id, insCodeMR(ins)), &callInstrSize); |
12930 | |
12931 | switch (ins) |
12932 | { |
12933 | case INS_call: |
12934 | |
12935 | IND_CALL: |
12936 | // Get hold of the argument count and method handle |
12937 | args = emitGetInsCIargs(id); |
12938 | |
12939 | // Is this a "fat" call descriptor? |
12940 | if (id->idIsLargeCall()) |
12941 | { |
12942 | instrDescCGCA* idCall = (instrDescCGCA*)id; |
12943 | |
12944 | gcrefRegs = idCall->idcGcrefRegs; |
12945 | byrefRegs = idCall->idcByrefRegs; |
12946 | VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars); |
12947 | sz = sizeof(instrDescCGCA); |
12948 | } |
12949 | else |
12950 | { |
12951 | assert(!id->idIsLargeDsp()); |
12952 | assert(!id->idIsLargeCns()); |
12953 | |
12954 | gcrefRegs = emitDecodeCallGCregs(id); |
12955 | byrefRegs = 0; |
12956 | VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp)); |
12957 | sz = sizeof(instrDesc); |
12958 | } |
12959 | |
12960 | recCall = true; |
12961 | |
12962 | goto DONE_CALL; |
12963 | |
12964 | default: |
12965 | sz = emitSizeOfInsDsc(id); |
12966 | break; |
12967 | } |
12968 | break; |
12969 | |
12970 | case IF_RRW_ARD_CNS: |
12971 | case IF_RWR_ARD_CNS: |
12972 | emitGetInsAmdCns(id, &cnsVal); |
12973 | code = insCodeRM(ins); |
12974 | |
12975 | // Special case 4-byte AVX instructions |
12976 | if (EncodedBySSE38orSSE3A(ins)) |
12977 | { |
12978 | dst = emitOutputAM(dst, id, code, &cnsVal); |
12979 | } |
12980 | else |
12981 | { |
12982 | code = AddVexPrefixIfNeeded(ins, code, size); |
12983 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
12984 | dst = emitOutputAM(dst, id, code | regcode, &cnsVal); |
12985 | } |
12986 | |
12987 | sz = emitSizeOfInsDsc(id); |
12988 | break; |
12989 | |
12990 | case IF_AWR_RRD_CNS: |
12991 | assert(ins == INS_vextracti128 || ins == INS_vextractf128); |
12992 | assert(UseVEXEncoding()); |
12993 | emitGetInsAmdCns(id, &cnsVal); |
12994 | code = insCodeMR(ins); |
12995 | dst = emitOutputAM(dst, id, code, &cnsVal); |
12996 | sz = emitSizeOfInsDsc(id); |
12997 | break; |
12998 | |
12999 | case IF_RRD_ARD: |
13000 | case IF_RWR_ARD: |
13001 | case IF_RRW_ARD: |
13002 | case IF_RWR_RRD_ARD: |
13003 | { |
13004 | code = insCodeRM(ins); |
13005 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
13006 | { |
13007 | dst = emitOutputAM(dst, id, code); |
13008 | } |
13009 | else |
13010 | { |
13011 | code = AddVexPrefixIfNeeded(ins, code, size); |
13012 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13013 | dst = emitOutputAM(dst, id, code | regcode); |
13014 | } |
13015 | sz = emitSizeOfInsDsc(id); |
13016 | break; |
13017 | } |
13018 | |
13019 | case IF_RWR_ARD_RRD: |
13020 | { |
13021 | assert(IsAVX2GatherInstruction(ins)); |
13022 | code = insCodeRM(ins); |
13023 | dst = emitOutputAM(dst, id, code); |
13024 | sz = emitSizeOfInsDsc(id); |
13025 | break; |
13026 | } |
13027 | |
13028 | case IF_RWR_RRD_ARD_CNS: |
13029 | case IF_RWR_RRD_ARD_RRD: |
13030 | { |
13031 | emitGetInsAmdCns(id, &cnsVal); |
13032 | code = insCodeRM(ins); |
13033 | if (EncodedBySSE38orSSE3A(ins)) |
13034 | { |
13035 | dst = emitOutputAM(dst, id, code, &cnsVal); |
13036 | } |
13037 | else |
13038 | { |
13039 | code = AddVexPrefixIfNeeded(ins, code, size); |
13040 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13041 | dst = emitOutputAM(dst, id, code | regcode, &cnsVal); |
13042 | } |
13043 | sz = emitSizeOfInsDsc(id); |
13044 | break; |
13045 | } |
13046 | |
13047 | case IF_ARD_RRD: |
13048 | case IF_AWR_RRD: |
13049 | case IF_ARW_RRD: |
13050 | code = insCodeMR(ins); |
13051 | code = AddVexPrefixIfNeeded(ins, code, size); |
13052 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13053 | dst = emitOutputAM(dst, id, code | regcode); |
13054 | sz = emitSizeOfInsDsc(id); |
13055 | break; |
13056 | |
13057 | case IF_AWR_RRD_RRD: |
13058 | { |
13059 | code = insCodeMR(ins); |
13060 | code = AddVexPrefixIfNeeded(ins, code, size); |
13061 | dst = emitOutputAM(dst, id, code); |
13062 | sz = emitSizeOfInsDsc(id); |
13063 | break; |
13064 | } |
13065 | |
13066 | case IF_ARD_CNS: |
13067 | case IF_AWR_CNS: |
13068 | case IF_ARW_CNS: |
13069 | emitGetInsAmdCns(id, &cnsVal); |
13070 | dst = emitOutputAM(dst, id, insCodeMI(ins), &cnsVal); |
13071 | sz = emitSizeOfInsDsc(id); |
13072 | break; |
13073 | |
13074 | case IF_ARW_SHF: |
13075 | emitGetInsAmdCns(id, &cnsVal); |
13076 | dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal); |
13077 | sz = emitSizeOfInsDsc(id); |
13078 | break; |
13079 | |
13080 | /********************************************************************/ |
13081 | /* Stack-based operand */ |
13082 | /********************************************************************/ |
13083 | |
13084 | case IF_SRD: |
13085 | case IF_SWR: |
13086 | case IF_SRW: |
13087 | |
13088 | assert(ins != INS_pop_hide); |
13089 | if (ins == INS_pop) |
13090 | { |
13091 | // The offset in "pop [ESP+xxx]" is relative to the new ESP value |
13092 | CLANG_FORMAT_COMMENT_ANCHOR; |
13093 | |
13094 | #if !FEATURE_FIXED_OUT_ARGS |
13095 | emitCurStackLvl -= sizeof(int); |
13096 | #endif |
13097 | dst = emitOutputSV(dst, id, insCodeMR(ins)); |
13098 | |
13099 | #if !FEATURE_FIXED_OUT_ARGS |
13100 | emitCurStackLvl += sizeof(int); |
13101 | #endif |
13102 | break; |
13103 | } |
13104 | |
13105 | dst = emitCodeWithInstructionSize(dst, emitOutputSV(dst, id, insCodeMR(ins)), &callInstrSize); |
13106 | |
13107 | if (ins == INS_call) |
13108 | { |
13109 | goto IND_CALL; |
13110 | } |
13111 | |
13112 | break; |
13113 | |
13114 | case IF_SRD_CNS: |
13115 | case IF_SWR_CNS: |
13116 | case IF_SRW_CNS: |
13117 | emitGetInsCns(id, &cnsVal); |
13118 | dst = emitOutputSV(dst, id, insCodeMI(ins), &cnsVal); |
13119 | sz = emitSizeOfInsDsc(id); |
13120 | break; |
13121 | |
13122 | case IF_SRW_SHF: |
13123 | emitGetInsCns(id, &cnsVal); |
13124 | dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal); |
13125 | sz = emitSizeOfInsDsc(id); |
13126 | break; |
13127 | |
13128 | case IF_RRW_SRD_CNS: |
13129 | case IF_RWR_SRD_CNS: |
13130 | emitGetInsCns(id, &cnsVal); |
13131 | code = insCodeRM(ins); |
13132 | |
13133 | // Special case 4-byte AVX instructions |
13134 | if (EncodedBySSE38orSSE3A(ins)) |
13135 | { |
13136 | dst = emitOutputSV(dst, id, code, &cnsVal); |
13137 | } |
13138 | else |
13139 | { |
13140 | code = AddVexPrefixIfNeeded(ins, code, size); |
13141 | |
13142 | // In case of AVX instructions that take 3 operands, encode reg1 as first source. |
13143 | // Note that reg1 is both a source and a destination. |
13144 | // |
13145 | // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For |
13146 | // now we use the single source as source1 and source2. |
13147 | // For this format, moves do not support a third operand, so we only need to handle the binary ops. |
13148 | if (IsDstDstSrcAVXInstruction(ins)) |
13149 | { |
13150 | // encode source operand reg in 'vvvv' bits in 1's complement form |
13151 | code = insEncodeReg3456(ins, id->idReg1(), size, code); |
13152 | } |
13153 | |
13154 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13155 | dst = emitOutputSV(dst, id, code | regcode, &cnsVal); |
13156 | } |
13157 | |
13158 | sz = emitSizeOfInsDsc(id); |
13159 | break; |
13160 | |
13161 | case IF_RRD_SRD: |
13162 | case IF_RWR_SRD: |
13163 | case IF_RRW_SRD: |
13164 | { |
13165 | code = insCodeRM(ins); |
13166 | |
13167 | // 4-byte AVX instructions are special cased inside emitOutputSV |
13168 | // since they do not have space to encode ModRM byte. |
13169 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
13170 | { |
13171 | dst = emitOutputSV(dst, id, code); |
13172 | } |
13173 | else |
13174 | { |
13175 | code = AddVexPrefixIfNeeded(ins, code, size); |
13176 | |
13177 | if (IsDstDstSrcAVXInstruction(ins)) |
13178 | { |
13179 | // encode source operand reg in 'vvvv' bits in 1's complement form |
13180 | code = insEncodeReg3456(ins, id->idReg1(), size, code); |
13181 | } |
13182 | |
13183 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13184 | dst = emitOutputSV(dst, id, code | regcode); |
13185 | } |
13186 | |
13187 | sz = emitSizeOfInsDsc(id); |
13188 | break; |
13189 | } |
13190 | |
13191 | case IF_RWR_RRD_SRD: |
13192 | { |
13193 | // This should only be called on AVX instructions |
13194 | assert(IsAVXInstruction(ins)); |
13195 | |
13196 | code = insCodeRM(ins); |
13197 | code = AddVexPrefixIfNeeded(ins, code, size); |
13198 | code = insEncodeReg3456(ins, id->idReg2(), size, |
13199 | code); // encode source operand reg in 'vvvv' bits in 1's complement form |
13200 | |
13201 | // 4-byte AVX instructions are special cased inside emitOutputSV |
13202 | // since they do not have space to encode ModRM byte. |
13203 | if (EncodedBySSE38orSSE3A(ins)) |
13204 | { |
13205 | dst = emitOutputSV(dst, id, code); |
13206 | } |
13207 | else |
13208 | { |
13209 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13210 | dst = emitOutputSV(dst, id, code | regcode); |
13211 | } |
13212 | break; |
13213 | } |
13214 | |
13215 | case IF_RWR_RRD_SRD_CNS: |
13216 | case IF_RWR_RRD_SRD_RRD: |
13217 | { |
13218 | // This should only be called on AVX instructions |
13219 | assert(IsAVXInstruction(ins)); |
13220 | emitGetInsCns(id, &cnsVal); |
13221 | |
13222 | code = insCodeRM(ins); |
13223 | code = AddVexPrefixIfNeeded(ins, code, size); |
13224 | code = insEncodeReg3456(ins, id->idReg2(), size, |
13225 | code); // encode source operand reg in 'vvvv' bits in 1's complement form |
13226 | |
13227 | // 4-byte AVX instructions are special cased inside emitOutputSV |
13228 | // since they do not have space to encode ModRM byte. |
13229 | if (EncodedBySSE38orSSE3A(ins)) |
13230 | { |
13231 | dst = emitOutputSV(dst, id, code, &cnsVal); |
13232 | } |
13233 | else |
13234 | { |
13235 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13236 | dst = emitOutputSV(dst, id, code | regcode, &cnsVal); |
13237 | } |
13238 | |
13239 | sz = emitSizeOfInsDsc(id); |
13240 | break; |
13241 | } |
13242 | |
13243 | case IF_SRD_RRD: |
13244 | case IF_SWR_RRD: |
13245 | case IF_SRW_RRD: |
13246 | code = insCodeMR(ins); |
13247 | code = AddVexPrefixIfNeeded(ins, code, size); |
13248 | |
13249 | // In case of AVX instructions that take 3 operands, encode reg1 as first source. |
13250 | // Note that reg1 is both a source and a destination. |
13251 | // |
13252 | // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For |
13253 | // now we use the single source as source1 and source2. |
13254 | // For this format, moves do not support a third operand, so we only need to handle the binary ops. |
13255 | if (IsDstDstSrcAVXInstruction(ins)) |
13256 | { |
13257 | // encode source operand reg in 'vvvv' bits in 1's complement form |
13258 | code = insEncodeReg3456(ins, id->idReg1(), size, code); |
13259 | } |
13260 | |
13261 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13262 | dst = emitOutputSV(dst, id, code | regcode); |
13263 | break; |
13264 | |
13265 | /********************************************************************/ |
13266 | /* Direct memory address */ |
13267 | /********************************************************************/ |
13268 | |
13269 | case IF_MRD: |
13270 | case IF_MRW: |
13271 | case IF_MWR: |
13272 | |
13273 | noway_assert(ins != INS_call); |
13274 | dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500); |
13275 | sz = emitSizeOfInsDsc(id); |
13276 | break; |
13277 | |
13278 | case IF_MRD_OFF: |
13279 | dst = emitOutputCV(dst, id, insCodeMI(ins)); |
13280 | break; |
13281 | |
13282 | case IF_RRW_MRD_CNS: |
13283 | case IF_RWR_MRD_CNS: |
13284 | emitGetInsDcmCns(id, &cnsVal); |
13285 | code = insCodeRM(ins); |
13286 | |
13287 | // Special case 4-byte AVX instructions |
13288 | if (EncodedBySSE38orSSE3A(ins)) |
13289 | { |
13290 | dst = emitOutputCV(dst, id, code, &cnsVal); |
13291 | } |
13292 | else |
13293 | { |
13294 | code = AddVexPrefixIfNeeded(ins, code, size); |
13295 | |
13296 | // In case of AVX instructions that take 3 operands, encode reg1 as first source. |
13297 | // Note that reg1 is both a source and a destination. |
13298 | // |
13299 | // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For |
13300 | // now we use the single source as source1 and source2. |
13301 | // For this format, moves do not support a third operand, so we only need to handle the binary ops. |
13302 | if (IsDstDstSrcAVXInstruction(ins)) |
13303 | { |
13304 | // encode source operand reg in 'vvvv' bits in 1's complement form |
13305 | code = insEncodeReg3456(ins, id->idReg1(), size, code); |
13306 | } |
13307 | |
13308 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13309 | dst = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal); |
13310 | } |
13311 | |
13312 | sz = emitSizeOfInsDsc(id); |
13313 | break; |
13314 | |
13315 | case IF_MWR_RRD_CNS: |
13316 | assert(ins == INS_vextracti128 || ins == INS_vextractf128); |
13317 | assert(UseVEXEncoding()); |
13318 | emitGetInsDcmCns(id, &cnsVal); |
13319 | code = insCodeMR(ins); |
13320 | // only AVX2 vextracti128 and AVX vextractf128 can reach this path, |
13321 | // they do not need VEX.vvvv to encode the register operand |
13322 | dst = emitOutputCV(dst, id, code, &cnsVal); |
13323 | sz = emitSizeOfInsDsc(id); |
13324 | break; |
13325 | |
13326 | case IF_RRD_MRD: |
13327 | case IF_RWR_MRD: |
13328 | case IF_RRW_MRD: |
13329 | { |
13330 | code = insCodeRM(ins); |
13331 | |
13332 | // Special case 4-byte AVX instructions |
13333 | if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) |
13334 | { |
13335 | dst = emitOutputCV(dst, id, code); |
13336 | } |
13337 | else |
13338 | { |
13339 | code = AddVexPrefixIfNeeded(ins, code, size); |
13340 | |
13341 | if (IsDstDstSrcAVXInstruction(ins)) |
13342 | { |
13343 | // encode source operand reg in 'vvvv' bits in 1's complement form |
13344 | code = insEncodeReg3456(ins, id->idReg1(), size, code); |
13345 | } |
13346 | |
13347 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13348 | dst = emitOutputCV(dst, id, code | regcode | 0x0500); |
13349 | } |
13350 | |
13351 | sz = emitSizeOfInsDsc(id); |
13352 | break; |
13353 | } |
13354 | |
13355 | case IF_RWR_RRD_MRD: |
13356 | { |
13357 | // This should only be called on AVX instructions |
13358 | assert(IsAVXInstruction(ins)); |
13359 | |
13360 | code = insCodeRM(ins); |
13361 | code = AddVexPrefixIfNeeded(ins, code, size); |
13362 | code = insEncodeReg3456(ins, id->idReg2(), size, |
13363 | code); // encode source operand reg in 'vvvv' bits in 1's complement form |
13364 | |
13365 | // Special case 4-byte AVX instructions |
13366 | if (EncodedBySSE38orSSE3A(ins)) |
13367 | { |
13368 | dst = emitOutputCV(dst, id, code); |
13369 | } |
13370 | else |
13371 | { |
13372 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13373 | dst = emitOutputCV(dst, id, code | regcode | 0x0500); |
13374 | } |
13375 | sz = emitSizeOfInsDsc(id); |
13376 | break; |
13377 | } |
13378 | |
13379 | case IF_RWR_RRD_MRD_CNS: |
13380 | case IF_RWR_RRD_MRD_RRD: |
13381 | { |
13382 | // This should only be called on AVX instructions |
13383 | assert(IsAVXInstruction(ins)); |
13384 | emitGetInsCns(id, &cnsVal); |
13385 | |
13386 | code = insCodeRM(ins); |
13387 | code = AddVexPrefixIfNeeded(ins, code, size); |
13388 | code = insEncodeReg3456(ins, id->idReg2(), size, |
13389 | code); // encode source operand reg in 'vvvv' bits in 1's complement form |
13390 | |
13391 | // Special case 4-byte AVX instructions |
13392 | if (EncodedBySSE38orSSE3A(ins)) |
13393 | { |
13394 | dst = emitOutputCV(dst, id, code, &cnsVal); |
13395 | } |
13396 | else |
13397 | { |
13398 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13399 | dst = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal); |
13400 | } |
13401 | sz = emitSizeOfInsDsc(id); |
13402 | break; |
13403 | } |
13404 | |
13405 | case IF_RWR_MRD_OFF: |
13406 | code = insCode(ins); |
13407 | code = AddVexPrefixIfNeeded(ins, code, size); |
13408 | |
13409 | // In case of AVX instructions that take 3 operands, encode reg1 as first source. |
13410 | // Note that reg1 is both a source and a destination. |
13411 | // |
13412 | // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For |
13413 | // now we use the single source as source1 and source2. |
13414 | // For this format, moves do not support a third operand, so we only need to handle the binary ops. |
13415 | if (IsDstDstSrcAVXInstruction(ins)) |
13416 | { |
13417 | // encode source operand reg in 'vvvv' bits in 1's complement form |
13418 | code = insEncodeReg3456(ins, id->idReg1(), size, code); |
13419 | } |
13420 | |
13421 | regcode = insEncodeReg012(id->idIns(), id->idReg1(), size, &code); |
13422 | dst = emitOutputCV(dst, id, code | 0x30 | regcode); |
13423 | sz = emitSizeOfInsDsc(id); |
13424 | break; |
13425 | |
13426 | case IF_MRD_RRD: |
13427 | case IF_MWR_RRD: |
13428 | case IF_MRW_RRD: |
13429 | code = insCodeMR(ins); |
13430 | code = AddVexPrefixIfNeeded(ins, code, size); |
13431 | |
13432 | // In case of AVX instructions that take 3 operands, encode reg1 as first source. |
13433 | // Note that reg1 is both a source and a destination. |
13434 | // |
13435 | // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For |
13436 | // now we use the single source as source1 and source2. |
13437 | // For this format, moves do not support a third operand, so we only need to handle the binary ops. |
13438 | if (IsDstDstSrcAVXInstruction(ins)) |
13439 | { |
13440 | // encode source operand reg in 'vvvv' bits in 1's complement form |
13441 | code = insEncodeReg3456(ins, id->idReg1(), size, code); |
13442 | } |
13443 | |
13444 | regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); |
13445 | dst = emitOutputCV(dst, id, code | regcode | 0x0500); |
13446 | sz = emitSizeOfInsDsc(id); |
13447 | break; |
13448 | |
13449 | case IF_MRD_CNS: |
13450 | case IF_MWR_CNS: |
13451 | case IF_MRW_CNS: |
13452 | emitGetInsDcmCns(id, &cnsVal); |
13453 | dst = emitOutputCV(dst, id, insCodeMI(ins) | 0x0500, &cnsVal); |
13454 | sz = emitSizeOfInsDsc(id); |
13455 | break; |
13456 | |
13457 | case IF_MRW_SHF: |
13458 | emitGetInsDcmCns(id, &cnsVal); |
13459 | dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500, &cnsVal); |
13460 | sz = emitSizeOfInsDsc(id); |
13461 | break; |
13462 | |
13463 | /********************************************************************/ |
13464 | /* oops */ |
13465 | /********************************************************************/ |
13466 | |
13467 | default: |
13468 | |
13469 | #ifdef DEBUG |
13470 | printf("unexpected format %s\n" , emitIfName(id->idInsFmt())); |
13471 | assert(!"don't know how to encode this instruction" ); |
13472 | #endif |
13473 | break; |
13474 | } |
13475 | |
13476 | // Make sure we set the instruction descriptor size correctly |
13477 | assert(sz == emitSizeOfInsDsc(id)); |
13478 | |
13479 | #if !FEATURE_FIXED_OUT_ARGS |
13480 | bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig); |
13481 | |
13482 | #if FEATURE_EH_FUNCLETS |
13483 | updateStackLevel = updateStackLevel && !emitIGisInFuncletProlog(ig) && !emitIGisInFuncletEpilog(ig); |
13484 | #endif // FEATURE_EH_FUNCLETS |
13485 | |
13486 | // Make sure we keep the current stack level up to date |
13487 | if (updateStackLevel) |
13488 | { |
13489 | switch (ins) |
13490 | { |
13491 | case INS_push: |
13492 | // Please note: {INS_push_hide,IF_LABEL} is used to push the address of the |
13493 | // finally block for calling it locally for an op_leave. |
13494 | emitStackPush(dst, id->idGCref()); |
13495 | break; |
13496 | |
13497 | case INS_pop: |
13498 | emitStackPop(dst, false, /*callInstrSize*/ 0, 1); |
13499 | break; |
13500 | |
13501 | case INS_sub: |
13502 | // Check for "sub ESP, icon" |
13503 | if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP) |
13504 | { |
13505 | assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL); |
13506 | emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE)); |
13507 | } |
13508 | break; |
13509 | |
13510 | case INS_add: |
13511 | // Check for "add ESP, icon" |
13512 | if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP) |
13513 | { |
13514 | assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL); |
13515 | emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0, |
13516 | (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE)); |
13517 | } |
13518 | break; |
13519 | |
13520 | default: |
13521 | break; |
13522 | } |
13523 | } |
13524 | |
13525 | #endif // !FEATURE_FIXED_OUT_ARGS |
13526 | |
13527 | assert((int)emitCurStackLvl >= 0); |
13528 | |
13529 | // Only epilog "instructions" and some pseudo-instrs |
13530 | // are allowed not to generate any code |
13531 | |
13532 | assert(*dp != dst || emitInstHasNoCode(ins)); |
13533 | |
13534 | #ifdef DEBUG |
13535 | if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose) |
13536 | { |
13537 | emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp)); |
13538 | } |
13539 | |
13540 | if (emitComp->compDebugBreak) |
13541 | { |
13542 | // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs |
13543 | // at the beginning of this method. |
13544 | if (JitConfig.JitEmitPrintRefRegs() != 0) |
13545 | { |
13546 | printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n" , id->idDebugOnlyInfo()->idNum); |
13547 | printf(" emitThisGCrefRegs(0x%p)=" , emitComp->dspPtr(&emitThisGCrefRegs)); |
13548 | printRegMaskInt(emitThisGCrefRegs); |
13549 | emitDispRegSet(emitThisGCrefRegs); |
13550 | printf("\n" ); |
13551 | printf(" emitThisByrefRegs(0x%p)=" , emitComp->dspPtr(&emitThisByrefRegs)); |
13552 | printRegMaskInt(emitThisByrefRegs); |
13553 | emitDispRegSet(emitThisByrefRegs); |
13554 | printf("\n" ); |
13555 | } |
13556 | |
13557 | // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for |
13558 | // emitting instruction a6, (i.e. IN00a6 in jitdump). |
13559 | if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum) |
13560 | { |
13561 | assert(!"JitBreakEmitOutputInstr reached" ); |
13562 | } |
13563 | } |
13564 | #endif |
13565 | |
13566 | #ifdef TRANSLATE_PDB |
13567 | if (*dp != dst) |
13568 | { |
13569 | // only map instruction groups to instruction groups |
13570 | MapCode(id->idDebugOnlyInfo()->idilStart, *dp); |
13571 | } |
13572 | #endif |
13573 | |
13574 | *dp = dst; |
13575 | |
13576 | #ifdef DEBUG |
13577 | if (ins == INS_mulEAX || ins == INS_imulEAX) |
13578 | { |
13579 | // INS_mulEAX has implicit target of Edx:Eax. Make sure |
13580 | // that we detected this cleared its GC-status. |
13581 | |
13582 | assert(((RBM_EAX | RBM_EDX) & (emitThisGCrefRegs | emitThisByrefRegs)) == 0); |
13583 | } |
13584 | |
13585 | if (instrIs3opImul(ins)) |
13586 | { |
13587 | // The target of the 3-operand imul is implicitly encoded. Make sure |
13588 | // that we detected the implicit register and cleared its GC-status. |
13589 | |
13590 | regMaskTP regMask = genRegMask(inst3opImulReg(ins)); |
13591 | assert((regMask & (emitThisGCrefRegs | emitThisByrefRegs)) == 0); |
13592 | } |
13593 | #endif |
13594 | |
13595 | return sz; |
13596 | } |
13597 | #ifdef _PREFAST_ |
13598 | #pragma warning(pop) |
13599 | #endif |
13600 | |
13601 | /*****************************************************************************/ |
13602 | /*****************************************************************************/ |
13603 | |
13604 | #endif // defined(_TARGET_XARCH_) |
13605 | |