1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7XX XX
8XX emitX86.cpp XX
9XX XX
10XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12*/
13
14#include "jitpch.h"
15#ifdef _MSC_VER
16#pragma hdrstop
17#endif
18
19#if defined(_TARGET_XARCH_)
20
21/*****************************************************************************/
22/*****************************************************************************/
23
24#include "instr.h"
25#include "emit.h"
26#include "codegen.h"
27
28bool IsSSEInstruction(instruction ins)
29{
30 return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_SSE_INSTRUCTION);
31}
32
33bool IsSSEOrAVXInstruction(instruction ins)
34{
35 return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION);
36}
37
38bool IsAVXOnlyInstruction(instruction ins)
39{
40 return (ins >= INS_FIRST_AVX_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION);
41}
42
43bool IsFMAInstruction(instruction ins)
44{
45 return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
46}
47
48bool IsBMIInstruction(instruction ins)
49{
50 return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
51}
52
53regNumber getBmiRegNumber(instruction ins)
54{
55 switch (ins)
56 {
57 case INS_blsi:
58 {
59 return (regNumber)3;
60 }
61
62 case INS_blsmsk:
63 {
64 return (regNumber)2;
65 }
66
67 case INS_blsr:
68 {
69 return (regNumber)1;
70 }
71
72 default:
73 {
74 assert(IsBMIInstruction(ins));
75 return REG_NA;
76 }
77 }
78}
79
80regNumber getSseShiftRegNumber(instruction ins)
81{
82 switch (ins)
83 {
84 case INS_psrldq:
85 {
86 return (regNumber)3;
87 }
88
89 case INS_pslldq:
90 {
91 return (regNumber)7;
92 }
93
94 case INS_psrld:
95 case INS_psrlw:
96 case INS_psrlq:
97 {
98 return (regNumber)2;
99 }
100
101 case INS_pslld:
102 case INS_psllw:
103 case INS_psllq:
104 {
105 return (regNumber)6;
106 }
107
108 case INS_psrad:
109 case INS_psraw:
110 {
111 return (regNumber)4;
112 }
113
114 default:
115 {
116 assert(!"Invalid instruction for SSE2 instruction of the form: opcode reg, immed8");
117 return REG_NA;
118 }
119 }
120}
121
122bool emitter::IsAVXInstruction(instruction ins)
123{
124 return UseVEXEncoding() && IsSSEOrAVXInstruction(ins);
125}
126
127// Returns true if the AVX instruction is a binary operator that requires 3 operands.
128// When we emit an instruction with only two operands, we will duplicate the destination
129// as a source.
130// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
131// be formalized by adding an additional field to instruction table to
132// to indicate whether a 3-operand instruction.
133bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
134{
135 return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstDstSrcAVXInstruction) != 0) && IsAVXInstruction(ins);
136}
137
138// Returns true if the AVX instruction requires 3 operands that duplicate the source
139// register in the vvvv field.
140// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
141// be formalized by adding an additional field to instruction table to
142// to indicate whether a 3-operand instruction.
143bool emitter::IsDstSrcSrcAVXInstruction(instruction ins)
144{
145 return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstSrcSrcAVXInstruction) != 0) && IsAVXInstruction(ins);
146}
147
148#ifdef FEATURE_HW_INTRINSICS
149//------------------------------------------------------------------------
150// IsDstSrcImmAvxInstruction: Checks if the instruction has a "reg, reg/mem, imm" or
151// "reg/mem, reg, imm" form for the legacy, VEX, and EVEX
152// encodings.
153//
154// Arguments:
155// instruction -- processor instruction to check
156//
157// Return Value:
158// true if instruction has a "reg, reg/mem, imm" or "reg/mem, reg, imm" encoding
159// form for the legacy, VEX, and EVEX encodings.
160//
161// That is, the instruction takes two operands, one of which is immediate, and it
162// does not need to encode any data in the VEX.vvvv field.
163//
164static bool IsDstSrcImmAvxInstruction(instruction ins)
165{
166 switch (ins)
167 {
168 case INS_aeskeygenassist:
169 case INS_extractps:
170 case INS_pextrb:
171 case INS_pextrw:
172 case INS_pextrd:
173 case INS_pextrq:
174 case INS_pshufd:
175 case INS_pshufhw:
176 case INS_pshuflw:
177 case INS_roundpd:
178 case INS_roundps:
179 return true;
180 default:
181 return false;
182 }
183}
184#endif // FEATURE_HW_INTRINSICS
185
186// -------------------------------------------------------------------
187// Is4ByteSSEInstruction: Returns true if the SSE instruction is a 4-byte opcode.
188//
189// Arguments:
190// ins - instruction
191//
192// Note that this should be true for any of the instructions in instrsXArch.h
193// that use the SSE38 or SSE3A macro but returns false if the VEX encoding is
194// in use, since that encoding does not require an additional byte.
195bool emitter::Is4ByteSSEInstruction(instruction ins)
196{
197 return !UseVEXEncoding() && EncodedBySSE38orSSE3A(ins);
198}
199
200// Returns true if this instruction requires a VEX prefix
201// All AVX instructions require a VEX prefix
202bool emitter::TakesVexPrefix(instruction ins)
203{
204 // special case vzeroupper as it requires 2-byte VEX prefix
205 // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix
206 switch (ins)
207 {
208 case INS_lfence:
209 case INS_mfence:
210 case INS_movnti:
211 case INS_prefetchnta:
212 case INS_prefetcht0:
213 case INS_prefetcht1:
214 case INS_prefetcht2:
215 case INS_sfence:
216 case INS_vzeroupper:
217 return false;
218 default:
219 break;
220 }
221
222 return IsAVXInstruction(ins);
223}
224
225// Add base VEX prefix without setting W, R, X, or B bits
226// L bit will be set based on emitter attr.
227//
228// 2-byte VEX prefix = C5 <R,vvvv,L,pp>
229// 3-byte VEX prefix = C4 <R,X,B,m-mmmm> <W,vvvv,L,pp>
230// - R, X, B, W - bits to express corresponding REX prefixes
231// - m-mmmmm (5-bit)
232// 0-00001 - implied leading 0F opcode byte
233// 0-00010 - implied leading 0F 38 opcode bytes
234// 0-00011 - implied leading 0F 3A opcode bytes
235// Rest - reserved for future use and usage of them will uresult in Undefined instruction exception
236//
237// - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused
238// - L - scalar or AVX-128 bit operations (L=0), 256-bit operations (L=1)
239// - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix
240// these prefixes are treated mandatory when used with escape opcode 0Fh for
241// some SIMD instructions
242// 00 - None (0F - packed float)
243// 01 - 66 (66 0F - packed double)
244// 10 - F3 (F3 0F - scalar float
245// 11 - F2 (F2 0F - scalar double)
246#define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL
247#define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL
248#define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL
249emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr)
250{
251 // The 2-byte VEX encoding is preferred when possible, but actually emitting
252 // it depends on a number of factors that we may not know until much later.
253 //
254 // In order to handle this "easily", we just carry the 3-byte encoding all
255 // the way through and "fix-up" the encoding when the VEX prefix is actually
256 // emitted, by simply checking that all the requirements were met.
257
258 // Only AVX instructions require VEX prefix
259 assert(IsAVXInstruction(ins));
260
261 // Shouldn't have already added VEX prefix
262 assert(!hasVexPrefix(code));
263
264 assert((code & DEFAULT_3BYTE_VEX_PREFIX_MASK) == 0);
265
266 code |= DEFAULT_3BYTE_VEX_PREFIX;
267
268 if (attr == EA_32BYTE)
269 {
270 // Set L bit to 1 in case of instructions that operate on 256-bits.
271 code |= LBIT_IN_3BYTE_VEX_PREFIX;
272 }
273
274 return code;
275}
276
277// Returns true if this instruction, for the given EA_SIZE(attr), will require a REX.W prefix
278bool TakesRexWPrefix(instruction ins, emitAttr attr)
279{
280 // Because the current implementation of AVX does not have a way to distinguish between the register
281 // size specification (128 vs. 256 bits) and the operand size specification (32 vs. 64 bits), where both are
282 // required, the instruction must be created with the register size attribute (EA_16BYTE or EA_32BYTE),
283 // and here we must special case these by the opcode.
284 switch (ins)
285 {
286 case INS_vpermpd:
287 case INS_vpermq:
288 case INS_vpsrlvq:
289 case INS_vpsllvq:
290 case INS_pinsrq:
291 case INS_pextrq:
292 case INS_vfmadd132pd:
293 case INS_vfmadd213pd:
294 case INS_vfmadd231pd:
295 case INS_vfmadd132sd:
296 case INS_vfmadd213sd:
297 case INS_vfmadd231sd:
298 case INS_vfmaddsub132pd:
299 case INS_vfmaddsub213pd:
300 case INS_vfmaddsub231pd:
301 case INS_vfmsubadd132pd:
302 case INS_vfmsubadd213pd:
303 case INS_vfmsubadd231pd:
304 case INS_vfmsub132pd:
305 case INS_vfmsub213pd:
306 case INS_vfmsub231pd:
307 case INS_vfmsub132sd:
308 case INS_vfmsub213sd:
309 case INS_vfmsub231sd:
310 case INS_vfnmadd132pd:
311 case INS_vfnmadd213pd:
312 case INS_vfnmadd231pd:
313 case INS_vfnmadd132sd:
314 case INS_vfnmadd213sd:
315 case INS_vfnmadd231sd:
316 case INS_vfnmsub132pd:
317 case INS_vfnmsub213pd:
318 case INS_vfnmsub231pd:
319 case INS_vfnmsub132sd:
320 case INS_vfnmsub213sd:
321 case INS_vfnmsub231sd:
322 case INS_vpmaskmovq:
323 case INS_vpgatherdq:
324 case INS_vpgatherqq:
325 case INS_vgatherdpd:
326 case INS_vgatherqpd:
327 return true;
328 default:
329 break;
330 }
331
332#ifdef _TARGET_AMD64_
333 // movsx should always sign extend out to 8 bytes just because we don't track
334 // whether the dest should be 4 bytes or 8 bytes (attr indicates the size
335 // of the source, not the dest).
336 // A 4-byte movzx is equivalent to an 8 byte movzx, so it is not special
337 // cased here.
338 //
339 // Rex_jmp = jmp with rex prefix always requires rex.w prefix.
340 if (ins == INS_movsx || ins == INS_rex_jmp)
341 {
342 return true;
343 }
344
345 if (EA_SIZE(attr) != EA_8BYTE)
346 {
347 return false;
348 }
349
350 if (IsSSEOrAVXInstruction(ins))
351 {
352 switch (ins)
353 {
354 case INS_andn:
355 case INS_bextr:
356 case INS_blsi:
357 case INS_blsmsk:
358 case INS_blsr:
359 case INS_bzhi:
360 case INS_cvttsd2si:
361 case INS_cvttss2si:
362 case INS_cvtsd2si:
363 case INS_cvtss2si:
364 case INS_cvtsi2sd:
365 case INS_cvtsi2ss:
366 case INS_mov_xmm2i:
367 case INS_mov_i2xmm:
368 case INS_movnti:
369 case INS_mulx:
370 case INS_pdep:
371 case INS_pext:
372 return true;
373 default:
374 return false;
375 }
376 }
377
378 // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
379 // opcodes...
380 // These are all the instructions that default to 8-byte operand without the REX.W bit
381 // With 1 special case: movzx because the 4 byte version still zeros-out the hi 4 bytes
382 // so we never need it
383 if ((ins != INS_push) && (ins != INS_pop) && (ins != INS_movq) && (ins != INS_movzx) && (ins != INS_push_hide) &&
384 (ins != INS_pop_hide) && (ins != INS_ret) && (ins != INS_call) && !((ins >= INS_i_jmp) && (ins <= INS_l_jg)))
385 {
386 return true;
387 }
388 else
389 {
390 return false;
391 }
392#else //!_TARGET_AMD64 = _TARGET_X86_
393 return false;
394#endif //!_TARGET_AMD64_
395}
396
397// Returns true if using this register will require a REX.* prefix.
398// Since XMM registers overlap with YMM registers, this routine
399// can also be used to know whether a YMM register if the
400// instruction in question is AVX.
401bool IsExtendedReg(regNumber reg)
402{
403#ifdef _TARGET_AMD64_
404 return ((reg >= REG_R8) && (reg <= REG_R15)) || ((reg >= REG_XMM8) && (reg <= REG_XMM15));
405#else
406 // X86 JIT operates in 32-bit mode and hence extended reg are not available.
407 return false;
408#endif
409}
410
411// Returns true if using this register, for the given EA_SIZE(attr), will require a REX.* prefix
412bool IsExtendedReg(regNumber reg, emitAttr attr)
413{
414#ifdef _TARGET_AMD64_
415 // Not a register, so doesn't need a prefix
416 if (reg > REG_XMM15)
417 {
418 return false;
419 }
420
421 // Opcode field only has 3 bits for the register, these high registers
422 // need a 4th bit, that comes from the REX prefix (eiter REX.X, REX.R, or REX.B)
423 if (IsExtendedReg(reg))
424 {
425 return true;
426 }
427
428 if (EA_SIZE(attr) != EA_1BYTE)
429 {
430 return false;
431 }
432
433 // There are 12 one byte registers addressible 'below' r8b:
434 // al, cl, dl, bl, ah, ch, dh, bh, spl, bpl, sil, dil.
435 // The first 4 are always addressible, the last 8 are divided into 2 sets:
436 // ah, ch, dh, bh
437 // -- or --
438 // spl, bpl, sil, dil
439 // Both sets are encoded exactly the same, the difference is the presence
440 // of a REX prefix, even a REX prefix with no other bits set (0x40).
441 // So in order to get to the second set we need a REX prefix (but no bits).
442 //
443 // TODO-AMD64-CQ: if we ever want to start using the first set, we'll need a different way of
444 // encoding/tracking/encoding registers.
445 return (reg >= REG_RSP);
446#else
447 // X86 JIT operates in 32-bit mode and hence extended reg are not available.
448 return false;
449#endif
450}
451
452// Since XMM registers overlap with YMM registers, this routine
453// can also used to know whether a YMM register in case of AVX instructions.
454bool IsXMMReg(regNumber reg)
455{
456#ifdef _TARGET_AMD64_
457 return (reg >= REG_XMM0) && (reg <= REG_XMM15);
458#else // !_TARGET_AMD64_
459 return (reg >= REG_XMM0) && (reg <= REG_XMM7);
460#endif // !_TARGET_AMD64_
461}
462
463// Returns bits to be encoded in instruction for the given register.
464unsigned RegEncoding(regNumber reg)
465{
466 static_assert((REG_XMM0 & 0x7) == 0, "bad XMMBASE");
467 return (unsigned)(reg & 0x7);
468}
469
470// Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes
471// SSE2: separate 1-byte prefix gets added before opcode.
472// AVX: specific bits within VEX prefix need to be set in bit-inverted form.
473emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code)
474{
475 if (UseVEXEncoding() && IsAVXInstruction(ins))
476 {
477 if (TakesVexPrefix(ins))
478 {
479 // W-bit is available only in 3-byte VEX prefix that starts with byte C4.
480 assert(hasVexPrefix(code));
481
482 // W-bit is the only bit that is added in non bit-inverted form.
483 return emitter::code_t(code | 0x00008000000000ULL);
484 }
485 }
486#ifdef _TARGET_AMD64_
487 return emitter::code_t(code | 0x4800000000ULL);
488#else
489 assert(!"UNREACHED");
490 return code;
491#endif
492}
493
494#ifdef _TARGET_AMD64_
495
496emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code)
497{
498 if (UseVEXEncoding() && IsAVXInstruction(ins))
499 {
500 if (TakesVexPrefix(ins))
501 {
502 // R-bit is supported by both 2-byte and 3-byte VEX prefix
503 assert(hasVexPrefix(code));
504
505 // R-bit is added in bit-inverted form.
506 return code & 0xFF7FFFFFFFFFFFULL;
507 }
508 }
509
510 return code | 0x4400000000ULL;
511}
512
513emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code)
514{
515 if (UseVEXEncoding() && IsAVXInstruction(ins))
516 {
517 if (TakesVexPrefix(ins))
518 {
519 // X-bit is available only in 3-byte VEX prefix that starts with byte C4.
520 assert(hasVexPrefix(code));
521
522 // X-bit is added in bit-inverted form.
523 return code & 0xFFBFFFFFFFFFFFULL;
524 }
525 }
526
527 return code | 0x4200000000ULL;
528}
529
530emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code)
531{
532 if (UseVEXEncoding() && IsAVXInstruction(ins))
533 {
534 if (TakesVexPrefix(ins))
535 {
536 // B-bit is available only in 3-byte VEX prefix that starts with byte C4.
537 assert(hasVexPrefix(code));
538
539 // B-bit is added in bit-inverted form.
540 return code & 0xFFDFFFFFFFFFFFULL;
541 }
542 }
543
544 return code | 0x4100000000ULL;
545}
546
547// Adds REX prefix (0x40) without W, R, X or B bits set
548emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
549{
550 assert(!UseVEXEncoding() || !IsAVXInstruction(ins));
551 return code | 0x4000000000ULL;
552}
553
554#endif //_TARGET_AMD64_
555
556bool isPrefix(BYTE b)
557{
558 assert(b != 0); // Caller should check this
559 assert(b != 0x67); // We don't use the address size prefix
560 assert(b != 0x65); // The GS segment override prefix is emitted separately
561 assert(b != 0x64); // The FS segment override prefix is emitted separately
562 assert(b != 0xF0); // The lock prefix is emitted separately
563 assert(b != 0x2E); // We don't use the CS segment override prefix
564 assert(b != 0x3E); // Or the DS segment override prefix
565 assert(b != 0x26); // Or the ES segment override prefix
566 assert(b != 0x36); // Or the SS segment override prefix
567
568 // That just leaves the size prefixes used in SSE opcodes:
569 // Scalar Double Scalar Single Packed Double
570 return ((b == 0xF2) || (b == 0xF3) || (b == 0x66));
571}
572
573// Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise.
574unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code)
575{
576 if (hasVexPrefix(code))
577 {
578 // Only AVX instructions should have a VEX prefix
579 assert(UseVEXEncoding() && IsAVXInstruction(ins));
580 code_t vexPrefix = (code >> 32) & 0x00FFFFFF;
581 code &= 0x00000000FFFFFFFFLL;
582
583 WORD leadingBytes = 0;
584 BYTE check = (code >> 24) & 0xFF;
585 if (check != 0)
586 {
587 // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
588 // 4-byte opcode: with the bytes ordered as 0x22114433
589 // check for a prefix in the 11 position
590 BYTE sizePrefix = (code >> 16) & 0xFF;
591 if ((sizePrefix != 0) && isPrefix(sizePrefix))
592 {
593 // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
594 //
595 // 00 - None (0F - packed float)
596 // 01 - 66 (66 0F - packed double)
597 // 10 - F3 (F3 0F - scalar float
598 // 11 - F2 (F2 0F - scalar double)
599 switch (sizePrefix)
600 {
601 case 0x66:
602 if (IsBMIInstruction(ins))
603 {
604 switch (ins)
605 {
606 case INS_pdep:
607 case INS_mulx:
608 {
609 vexPrefix |= 0x03;
610 break;
611 }
612
613 case INS_pext:
614 {
615 vexPrefix |= 0x02;
616 break;
617 }
618
619 default:
620 {
621 vexPrefix |= 0x00;
622 break;
623 }
624 }
625 }
626 else
627 {
628 vexPrefix |= 0x01;
629 }
630 break;
631 case 0xF3:
632 vexPrefix |= 0x02;
633 break;
634 case 0xF2:
635 vexPrefix |= 0x03;
636 break;
637 default:
638 assert(!"unrecognized SIMD size prefix");
639 unreached();
640 }
641
642 // Now the byte in the 22 position must be an escape byte 0F
643 leadingBytes = check;
644 assert(leadingBytes == 0x0F);
645
646 // Get rid of both sizePrefix and escape byte
647 code &= 0x0000FFFFLL;
648
649 // Check the byte in the 33 position to see if it is 3A or 38.
650 // In such a case escape bytes must be 0x0F3A or 0x0F38
651 check = code & 0xFF;
652 if (check == 0x3A || check == 0x38)
653 {
654 leadingBytes = (leadingBytes << 8) | check;
655 code &= 0x0000FF00LL;
656 }
657 }
658 }
659 else
660 {
661 // 2-byte opcode with the bytes ordered as 0x0011RM22
662 // the byte in position 11 must be an escape byte.
663 leadingBytes = (code >> 16) & 0xFF;
664 assert(leadingBytes == 0x0F || leadingBytes == 0x00);
665 code &= 0xFFFF;
666 }
667
668 // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38
669 // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these
670 // implied leading bytes. 0x0F is supported by both the 2-byte and
671 // 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by
672 // the 3-byte version.
673
674 switch (leadingBytes)
675 {
676 case 0x00:
677 // there is no leading byte
678 break;
679 case 0x0F:
680 vexPrefix |= 0x0100;
681 break;
682 case 0x0F38:
683 vexPrefix |= 0x0200;
684 break;
685 case 0x0F3A:
686 vexPrefix |= 0x0300;
687 break;
688 default:
689 assert(!"encountered unknown leading bytes");
690 unreached();
691 }
692
693 // At this point
694 // VEX.2211RM33 got transformed as VEX.0000RM33
695 // VEX.0011RM22 got transformed as VEX.0000RM22
696 //
697 // Now output VEX prefix leaving the 4-byte opcode
698
699 // The 2-byte VEX encoding, requires that the X and B-bits are set (these
700 // bits are inverted from the REX values so set means off), the W-bit is
701 // not set (this bit is not inverted), and that the m-mmmm bits are 0-0001
702 // (the 2-byte VEX encoding only supports the 0x0F leading byte). When these
703 // conditions are met, we can change byte-0 from 0xC4 to 0xC5 and then
704 // byte-1 is the logical-or of bit 7 from byte-1 and bits 0-6 from byte 2
705 // from the 3-byte VEX encoding.
706 //
707 // Given the above, the check can be reduced to a simple mask and comparison.
708 // * 0xFFFF7F80 is a mask that ignores any bits whose value we don't care about:
709 // * R can be set or unset (0x7F ignores bit 7)
710 // * vvvv can be any value (0x80 ignores bits 3-6)
711 // * L can be set or unset (0x80 ignores bit 2)
712 // * pp can be any value (0x80 ignores bits 0-1)
713 // * 0x00C46100 is a value that signifies the requirements listed above were met:
714 // * We must be a three-byte VEX opcode (0x00C4)
715 // * X and B must be set (0x61 validates bits 5-6)
716 // * m-mmmm must be 0-00001 (0x61 validates bits 0-4)
717 // * W must be unset (0x00 validates bit 7)
718 if ((vexPrefix & 0xFFFF7F80) == 0x00C46100)
719 {
720 emitOutputByte(dst, 0xC5);
721 emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F));
722 return 2;
723 }
724
725 emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF));
726 emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF));
727 emitOutputByte(dst + 2, vexPrefix & 0xFF);
728 return 3;
729 }
730
731#ifdef _TARGET_AMD64_
732 if (code > 0x00FFFFFFFFLL)
733 {
734 BYTE prefix = (code >> 32) & 0xFF;
735 noway_assert(prefix >= 0x40 && prefix <= 0x4F);
736 code &= 0x00000000FFFFFFFFLL;
737
738 // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now)
739 // we can remove this code as well
740
741 // The REX prefix is required to come after all other prefixes.
742 // Some of our 'opcodes' actually include some prefixes, if that
743 // is the case, shift them over and place the REX prefix after
744 // the other prefixes, and emit any prefix that got moved out.
745 BYTE check = (code >> 24) & 0xFF;
746 if (check == 0)
747 {
748 // 3-byte opcode: with the bytes ordered as 0x00113322
749 // check for a prefix in the 11 position
750 check = (code >> 16) & 0xFF;
751 if (check != 0 && isPrefix(check))
752 {
753 // Swap the rex prefix and whatever this prefix is
754 code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL));
755 // and then emit the other prefix
756 return emitOutputByte(dst, check);
757 }
758 }
759 else
760 {
761 // 4-byte opcode with the bytes ordered as 0x22114433
762 // first check for a prefix in the 11 position
763 BYTE check2 = (code >> 16) & 0xFF;
764 if (isPrefix(check2))
765 {
766 assert(!isPrefix(check)); // We currently don't use this, so it is untested
767 if (isPrefix(check))
768 {
769 // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX
770 // Change to c2rrc1XXXX, and emit check2 now
771 code = (((code_t)prefix << 24) | ((code_t)check << 16) | (code & 0x0000FFFFLL));
772 }
773 else
774 {
775 // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode)
776 // Change to c2XXrrXXXX, and emit check2 now
777 code = (((code_t)check << 24) | ((code_t)prefix << 16) | (code & 0x0000FFFFLL));
778 }
779 return emitOutputByte(dst, check2);
780 }
781 }
782
783 return emitOutputByte(dst, prefix);
784 }
785#endif // _TARGET_AMD64_
786
787 return 0;
788}
789
790#ifdef _TARGET_AMD64_
791/*****************************************************************************
792 * Is the last instruction emitted a call instruction?
793 */
794bool emitter::emitIsLastInsCall()
795{
796 if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call))
797 {
798 return true;
799 }
800
801 return false;
802}
803
804/*****************************************************************************
805 * We're about to create an epilog. If the last instruction we output was a 'call',
806 * then we need to insert a NOP, to allow for proper exception-handling behavior.
807 */
808void emitter::emitOutputPreEpilogNOP()
809{
810 if (emitIsLastInsCall())
811 {
812 emitIns(INS_nop);
813 }
814}
815
816#endif //_TARGET_AMD64_
817
818// Size of rex prefix in bytes
819unsigned emitter::emitGetRexPrefixSize(instruction ins)
820{
821 // In case of AVX instructions, REX prefixes are part of VEX prefix.
822 // And hence requires no additional byte to encode REX prefixes.
823 if (IsAVXInstruction(ins))
824 {
825 return 0;
826 }
827
828 // If not AVX, then we would need 1-byte to encode REX prefix.
829 return 1;
830}
831
832// Size of vex prefix in bytes
833unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr)
834{
835 if (IsAVXInstruction(ins))
836 {
837 return 3;
838 }
839
840 // If not AVX, then we don't need to encode vex prefix.
841 return 0;
842}
843
844// VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
845// Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate.
846// Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that
847// instruction size estimate will be accurate.
848// Basically this function will decrease the vexPrefixSize,
849// so that opcodeSize + vexPrefixAdjustedSize will be the right size.
850// rightOpcodeSize + vexPrefixSize
851//=(opcodeSize - ExtrabytesSize) + vexPrefixSize
852//=opcodeSize + (vexPrefixSize - ExtrabytesSize)
853//=opcodeSize + vexPrefixAdjustedSize
854unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code)
855{
856 if (IsAVXInstruction(ins))
857 {
858 unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr);
859 assert(vexPrefixAdjustedSize == 3);
860
861 // In this case, opcode will contains escape prefix at least one byte,
862 // vexPrefixAdjustedSize should be minus one.
863 vexPrefixAdjustedSize -= 1;
864
865 // Get the fourth byte in Opcode.
866 // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not.
867 BYTE check = (code >> 24) & 0xFF;
868 if (check != 0)
869 {
870 // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
871 // 4-byte opcode: with the bytes ordered as 0x22114433
872 // Simd prefix is at the first byte.
873 BYTE sizePrefix = (code >> 16) & 0xFF;
874 if (sizePrefix != 0 && isPrefix(sizePrefix))
875 {
876 vexPrefixAdjustedSize -= 1;
877 }
878
879 // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
880 // But in this case the opcode has not counted R\M part.
881 // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize
882 //=opcodeSize + VexPrefixAdjustedSize -1 + 1
883 //=opcodeSize + VexPrefixAdjustedSize
884 // So although we may have second byte escape prefix, we won't decrease vexPrefixAdjustedSize.
885 }
886
887 return vexPrefixAdjustedSize;
888 }
889 return 0;
890}
891
892// Get size of rex or vex prefix emitted in code
893unsigned emitter::emitGetPrefixSize(code_t code)
894{
895 if (hasVexPrefix(code))
896 {
897 return 3;
898 }
899
900 if (hasRexPrefix(code))
901 {
902 return 1;
903 }
904
905 return 0;
906}
907
908#ifdef _TARGET_X86_
909/*****************************************************************************
910 *
911 * Record a non-empty stack
912 */
913
914void emitter::emitMarkStackLvl(unsigned stackLevel)
915{
916 assert(int(stackLevel) >= 0);
917 assert(emitCurStackLvl == 0);
918 assert(emitCurIG->igStkLvl == 0);
919 assert(emitCurIGfreeNext == emitCurIGfreeBase);
920
921 assert(stackLevel && stackLevel % sizeof(int) == 0);
922
923 emitCurStackLvl = emitCurIG->igStkLvl = stackLevel;
924
925 if (emitMaxStackDepth < emitCurStackLvl)
926 {
927 JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
928 emitMaxStackDepth = emitCurStackLvl;
929 }
930}
931#endif
932
933/*****************************************************************************
934 *
935 * Get hold of the address mode displacement value for an indirect call.
936 */
937
938inline ssize_t emitter::emitGetInsCIdisp(instrDesc* id)
939{
940 if (id->idIsLargeCall())
941 {
942 return ((instrDescCGCA*)id)->idcDisp;
943 }
944 else
945 {
946 assert(!id->idIsLargeDsp());
947 assert(!id->idIsLargeCns());
948
949 return id->idAddr()->iiaAddrMode.amDisp;
950 }
951}
952
953/** ***************************************************************************
954 *
955 * The following table is used by the instIsFP()/instUse/DefFlags() helpers.
956 */
957
958// clang-format off
959const insFlags CodeGenInterface::instInfo[] =
960{
961 #define INST0(id, nm, um, mr, flags) static_cast<insFlags>(flags),
962 #define INST1(id, nm, um, mr, flags) static_cast<insFlags>(flags),
963 #define INST2(id, nm, um, mr, mi, flags) static_cast<insFlags>(flags),
964 #define INST3(id, nm, um, mr, mi, rm, flags) static_cast<insFlags>(flags),
965 #define INST4(id, nm, um, mr, mi, rm, a4, flags) static_cast<insFlags>(flags),
966 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) static_cast<insFlags>(flags),
967 #include "instrs.h"
968 #undef INST0
969 #undef INST1
970 #undef INST2
971 #undef INST3
972 #undef INST4
973 #undef INST5
974};
975// clang-format on
976
977/*****************************************************************************
978 *
979 * Initialize the table used by emitInsModeFormat().
980 */
981
982// clang-format off
983const BYTE emitter::emitInsModeFmtTab[] =
984{
985 #define INST0(id, nm, um, mr, flags) um,
986 #define INST1(id, nm, um, mr, flags) um,
987 #define INST2(id, nm, um, mr, mi, flags) um,
988 #define INST3(id, nm, um, mr, mi, rm, flags) um,
989 #define INST4(id, nm, um, mr, mi, rm, a4, flags) um,
990 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) um,
991 #include "instrs.h"
992 #undef INST0
993 #undef INST1
994 #undef INST2
995 #undef INST3
996 #undef INST4
997 #undef INST5
998};
999// clang-format on
1000
1001#ifdef DEBUG
1002unsigned const emitter::emitInsModeFmtCnt = _countof(emitInsModeFmtTab);
1003#endif
1004
1005/*****************************************************************************
1006 *
1007 * Combine the given base format with the update mode of the instuction.
1008 */
1009
1010inline emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base)
1011{
1012 assert(IF_RRD + IUM_RD == IF_RRD);
1013 assert(IF_RRD + IUM_WR == IF_RWR);
1014 assert(IF_RRD + IUM_RW == IF_RRW);
1015
1016 return (insFormat)(base + emitInsUpdateMode(ins));
1017}
1018
1019// This is a helper we need due to Vs Whidbey #254016 in order to distinguish
1020// if we can not possibly be updating an integer register. This is not the best
1021// solution, but the other ones (see bug) are going to be much more complicated.
1022bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
1023{
1024 instruction ins = id->idIns();
1025
1026 if (!IsSSEOrAVXInstruction(ins))
1027 {
1028 return false;
1029 }
1030
1031 switch (ins)
1032 {
1033 case INS_andn:
1034 case INS_bextr:
1035 case INS_blsi:
1036 case INS_blsmsk:
1037 case INS_blsr:
1038 case INS_bzhi:
1039 case INS_cvttsd2si:
1040 case INS_cvttss2si:
1041 case INS_cvtsd2si:
1042 case INS_cvtss2si:
1043 case INS_extractps:
1044 case INS_mov_xmm2i:
1045 case INS_movmskpd:
1046 case INS_movmskps:
1047 case INS_mulx:
1048 case INS_pdep:
1049 case INS_pext:
1050 case INS_pmovmskb:
1051 case INS_pextrb:
1052 case INS_pextrd:
1053 case INS_pextrq:
1054 case INS_pextrw:
1055 case INS_pextrw_sse41:
1056 {
1057 // These SSE instructions write to a general purpose integer register.
1058 return false;
1059 }
1060
1061 default:
1062 {
1063 return true;
1064 }
1065 }
1066}
1067
1068/*****************************************************************************
1069 *
1070 * Returns the base encoding of the given CPU instruction.
1071 */
1072
1073inline size_t insCode(instruction ins)
1074{
1075 // clang-format off
1076 const static
1077 size_t insCodes[] =
1078 {
1079 #define INST0(id, nm, um, mr, flags) mr,
1080 #define INST1(id, nm, um, mr, flags) mr,
1081 #define INST2(id, nm, um, mr, mi, flags) mr,
1082 #define INST3(id, nm, um, mr, mi, rm, flags) mr,
1083 #define INST4(id, nm, um, mr, mi, rm, a4, flags) mr,
1084 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr,
1085 #include "instrs.h"
1086 #undef INST0
1087 #undef INST1
1088 #undef INST2
1089 #undef INST3
1090 #undef INST4
1091 #undef INST5
1092 };
1093 // clang-format on
1094
1095 assert((unsigned)ins < _countof(insCodes));
1096 assert((insCodes[ins] != BAD_CODE));
1097
1098 return insCodes[ins];
1099}
1100
1101/*****************************************************************************
1102 *
1103 * Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction.
1104 */
1105
1106inline size_t insCodeACC(instruction ins)
1107{
1108 // clang-format off
1109 const static
1110 size_t insCodesACC[] =
1111 {
1112 #define INST0(id, nm, um, mr, flags)
1113 #define INST1(id, nm, um, mr, flags)
1114 #define INST2(id, nm, um, mr, mi, flags)
1115 #define INST3(id, nm, um, mr, mi, rm, flags)
1116 #define INST4(id, nm, um, mr, mi, rm, a4, flags) a4,
1117 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) a4,
1118 #include "instrs.h"
1119 #undef INST0
1120 #undef INST1
1121 #undef INST2
1122 #undef INST3
1123 #undef INST4
1124 #undef INST5
1125 };
1126 // clang-format on
1127
1128 assert((unsigned)ins < _countof(insCodesACC));
1129 assert((insCodesACC[ins] != BAD_CODE));
1130
1131 return insCodesACC[ins];
1132}
1133
1134/*****************************************************************************
1135 *
1136 * Returns the "register" encoding of the given CPU instruction.
1137 */
1138
1139inline size_t insCodeRR(instruction ins)
1140{
1141 // clang-format off
1142 const static
1143 size_t insCodesRR[] =
1144 {
1145 #define INST0(id, nm, um, mr, flags)
1146 #define INST1(id, nm, um, mr, flags)
1147 #define INST2(id, nm, um, mr, mi, flags)
1148 #define INST3(id, nm, um, mr, mi, rm, flags)
1149 #define INST4(id, nm, um, mr, mi, rm, a4, flags)
1150 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rr,
1151 #include "instrs.h"
1152 #undef INST0
1153 #undef INST1
1154 #undef INST2
1155 #undef INST3
1156 #undef INST4
1157 #undef INST5
1158 };
1159 // clang-format on
1160
1161 assert((unsigned)ins < _countof(insCodesRR));
1162 assert((insCodesRR[ins] != BAD_CODE));
1163
1164 return insCodesRR[ins];
1165}
1166
1167// clang-format off
1168const static
1169size_t insCodesRM[] =
1170{
1171 #define INST0(id, nm, um, mr, flags)
1172 #define INST1(id, nm, um, mr, flags)
1173 #define INST2(id, nm, um, mr, mi, flags)
1174 #define INST3(id, nm, um, mr, mi, rm, flags) rm,
1175 #define INST4(id, nm, um, mr, mi, rm, a4, flags) rm,
1176 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rm,
1177 #include "instrs.h"
1178 #undef INST0
1179 #undef INST1
1180 #undef INST2
1181 #undef INST3
1182 #undef INST4
1183 #undef INST5
1184};
1185// clang-format on
1186
1187// Returns true iff the give CPU instruction has an RM encoding.
1188inline bool hasCodeRM(instruction ins)
1189{
1190 assert((unsigned)ins < _countof(insCodesRM));
1191 return ((insCodesRM[ins] != BAD_CODE));
1192}
1193
1194/*****************************************************************************
1195 *
1196 * Returns the "reg, [r/m]" encoding of the given CPU instruction.
1197 */
1198
1199inline size_t insCodeRM(instruction ins)
1200{
1201 assert((unsigned)ins < _countof(insCodesRM));
1202 assert((insCodesRM[ins] != BAD_CODE));
1203
1204 return insCodesRM[ins];
1205}
1206
1207// clang-format off
1208const static
1209size_t insCodesMI[] =
1210{
1211 #define INST0(id, nm, um, mr, flags)
1212 #define INST1(id, nm, um, mr, flags)
1213 #define INST2(id, nm, um, mr, mi, flags) mi,
1214 #define INST3(id, nm, um, mr, mi, rm, flags) mi,
1215 #define INST4(id, nm, um, mr, mi, rm, a4, flags) mi,
1216 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mi,
1217 #include "instrs.h"
1218 #undef INST0
1219 #undef INST1
1220 #undef INST2
1221 #undef INST3
1222 #undef INST4
1223 #undef INST5
1224};
1225// clang-format on
1226
1227// Returns true iff the give CPU instruction has an MI encoding.
1228inline bool hasCodeMI(instruction ins)
1229{
1230 assert((unsigned)ins < _countof(insCodesMI));
1231 return ((insCodesMI[ins] != BAD_CODE));
1232}
1233
1234/*****************************************************************************
1235 *
1236 * Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction.
1237 */
1238
1239inline size_t insCodeMI(instruction ins)
1240{
1241 assert((unsigned)ins < _countof(insCodesMI));
1242 assert((insCodesMI[ins] != BAD_CODE));
1243
1244 return insCodesMI[ins];
1245}
1246
1247// clang-format off
1248const static
1249size_t insCodesMR[] =
1250{
1251 #define INST0(id, nm, um, mr, flags)
1252 #define INST1(id, nm, um, mr, flags) mr,
1253 #define INST2(id, nm, um, mr, mi, flags) mr,
1254 #define INST3(id, nm, um, mr, mi, rm, flags) mr,
1255 #define INST4(id, nm, um, mr, mi, rm, a4, flags) mr,
1256 #define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr,
1257 #include "instrs.h"
1258 #undef INST0
1259 #undef INST1
1260 #undef INST2
1261 #undef INST3
1262 #undef INST4
1263 #undef INST5
1264};
1265// clang-format on
1266
1267// Returns true iff the give CPU instruction has an MR encoding.
1268inline bool hasCodeMR(instruction ins)
1269{
1270 assert((unsigned)ins < _countof(insCodesMR));
1271 return ((insCodesMR[ins] != BAD_CODE));
1272}
1273
1274/*****************************************************************************
1275 *
1276 * Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction.
1277 */
1278
1279inline size_t insCodeMR(instruction ins)
1280{
1281 assert((unsigned)ins < _countof(insCodesMR));
1282 assert((insCodesMR[ins] != BAD_CODE));
1283
1284 return insCodesMR[ins];
1285}
1286
1287// Return true if the instruction uses the SSE38 or SSE3A macro in instrsXArch.h.
1288bool emitter::EncodedBySSE38orSSE3A(instruction ins)
1289{
1290 const size_t SSE38 = 0x0F660038;
1291 const size_t SSE3A = 0x0F66003A;
1292 const size_t MASK = 0xFFFF00FF;
1293
1294 size_t insCode = 0;
1295
1296 if (!IsSSEOrAVXInstruction(ins))
1297 {
1298 return false;
1299 }
1300
1301 if (hasCodeRM(ins))
1302 {
1303 insCode = insCodeRM(ins);
1304 }
1305 else if (hasCodeMI(ins))
1306 {
1307 insCode = insCodeMI(ins);
1308 }
1309 else if (hasCodeMR(ins))
1310 {
1311 insCode = insCodeMR(ins);
1312 }
1313
1314 insCode &= MASK;
1315 return insCode == SSE38 || insCode == SSE3A;
1316}
1317
1318/*****************************************************************************
1319 *
1320 * Returns an encoding for the specified register to be used in the bit0-2
1321 * part of an opcode.
1322 */
1323
1324inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code)
1325{
1326 assert(reg < REG_STK);
1327
1328#ifdef _TARGET_AMD64_
1329 // Either code is not NULL or reg is not an extended reg.
1330 // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1331 // which would require code != NULL.
1332 assert(code != nullptr || !IsExtendedReg(reg));
1333
1334 if (IsExtendedReg(reg))
1335 {
1336 *code = AddRexBPrefix(ins, *code); // REX.B
1337 }
1338 else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1339 {
1340 // We are assuming that we only use/encode SPL, BPL, SIL and DIL
1341 // not the corresponding AH, CH, DH, or BH
1342 *code = AddRexPrefix(ins, *code); // REX
1343 }
1344#endif // _TARGET_AMD64_
1345
1346 unsigned regBits = RegEncoding(reg);
1347
1348 assert(regBits < 8);
1349 return regBits;
1350}
1351
1352/*****************************************************************************
1353 *
1354 * Returns an encoding for the specified register to be used in the bit3-5
1355 * part of an opcode.
1356 */
1357
1358inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code)
1359{
1360 assert(reg < REG_STK);
1361
1362#ifdef _TARGET_AMD64_
1363 // Either code is not NULL or reg is not an extended reg.
1364 // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1365 // which would require code != NULL.
1366 assert(code != nullptr || !IsExtendedReg(reg));
1367
1368 if (IsExtendedReg(reg))
1369 {
1370 *code = AddRexRPrefix(ins, *code); // REX.R
1371 }
1372 else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1373 {
1374 // We are assuming that we only use/encode SPL, BPL, SIL and DIL
1375 // not the corresponding AH, CH, DH, or BH
1376 *code = AddRexPrefix(ins, *code); // REX
1377 }
1378#endif // _TARGET_AMD64_
1379
1380 unsigned regBits = RegEncoding(reg);
1381
1382 assert(regBits < 8);
1383 return (regBits << 3);
1384}
1385
1386/***********************************************************************************
1387 *
1388 * Returns modified AVX opcode with the specified register encoded in bits 3-6 of
1389 * byte 2 of VEX prefix.
1390 */
1391inline emitter::code_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code)
1392{
1393 assert(reg < REG_STK);
1394 assert(IsAVXInstruction(ins));
1395 assert(hasVexPrefix(code));
1396
1397 // Get 4-bit register encoding
1398 // RegEncoding() gives lower 3 bits
1399 // IsExtendedReg() gives MSB.
1400 code_t regBits = RegEncoding(reg);
1401 if (IsExtendedReg(reg))
1402 {
1403 regBits |= 0x08;
1404 }
1405
1406 // VEX prefix encodes register operand in 1's complement form
1407 // Shift count = 4-bytes of opcode + 0-2 bits
1408 assert(regBits <= 0xF);
1409 regBits <<= 35;
1410 return code ^ regBits;
1411}
1412
1413/*****************************************************************************
1414 *
1415 * Returns an encoding for the specified register to be used in the bit3-5
1416 * part of an SIB byte (unshifted).
1417 * Used exclusively to generate the REX.X bit and truncate the register.
1418 */
1419
1420inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, code_t* code)
1421{
1422 assert(reg < REG_STK);
1423
1424#ifdef _TARGET_AMD64_
1425 // Either code is not NULL or reg is not an extended reg.
1426 // If reg is an extended reg, instruction needs to be prefixed with 'REX'
1427 // which would require code != NULL.
1428 assert(code != nullptr || reg < REG_R8 || (reg >= REG_XMM0 && reg < REG_XMM8));
1429
1430 if (IsExtendedReg(reg))
1431 {
1432 *code = AddRexXPrefix(ins, *code); // REX.X
1433 }
1434 unsigned regBits = RegEncoding(reg);
1435#else // !_TARGET_AMD64_
1436 unsigned regBits = reg;
1437#endif // !_TARGET_AMD64_
1438
1439 assert(regBits < 8);
1440 return regBits;
1441}
1442
1443/*****************************************************************************
1444 *
1445 * Returns the "[r/m]" opcode with the mod/RM field set to register.
1446 */
1447
1448inline emitter::code_t emitter::insEncodeMRreg(instruction ins, code_t code)
1449{
1450 // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1451 // Otherwise, it will be placed after the 4 byte encoding.
1452 if ((code & 0xFF00) == 0)
1453 {
1454 assert((code & 0xC000) == 0);
1455 code |= 0xC000;
1456 }
1457
1458 return code;
1459}
1460
1461/*****************************************************************************
1462 *
1463 * Returns the given "[r/m]" opcode with the mod/RM field set to register.
1464 */
1465
1466inline emitter::code_t emitter::insEncodeRMreg(instruction ins, code_t code)
1467{
1468 // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1469 // Otherwise, it will be placed after the 4 byte encoding.
1470 if ((code & 0xFF00) == 0)
1471 {
1472 assert((code & 0xC000) == 0);
1473 code |= 0xC000;
1474 }
1475 return code;
1476}
1477
1478/*****************************************************************************
1479 *
1480 * Returns the "byte ptr [r/m]" opcode with the mod/RM field set to
1481 * the given register.
1482 */
1483
1484inline emitter::code_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1485{
1486 assert((code & 0xC000) == 0);
1487 code |= 0xC000;
1488 unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
1489 code |= regcode;
1490 return code;
1491}
1492
1493/*****************************************************************************
1494 *
1495 * Returns the "byte ptr [r/m], icon" opcode with the mod/RM field set to
1496 * the given register.
1497 */
1498
1499inline emitter::code_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1500{
1501 assert((code & 0xC000) == 0);
1502 code |= 0xC000;
1503 unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
1504 code |= regcode;
1505 return code;
1506}
1507
1508/*****************************************************************************
1509 *
1510 * Returns true iff the given instruction does not have a "[r/m], icon" form, but *does* have a
1511 * "reg,reg,imm8" form.
1512 */
1513inline bool insNeedsRRIb(instruction ins)
1514{
1515 // If this list gets longer, use a switch or a table.
1516 return ins == INS_imul;
1517}
1518
1519/*****************************************************************************
1520 *
1521 * Returns the "reg,reg,imm8" opcode with both the reg's set to the
1522 * the given register.
1523 */
1524inline emitter::code_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size)
1525{
1526 assert(size == EA_4BYTE); // All we handle for now.
1527 assert(insNeedsRRIb(ins));
1528 // If this list gets longer, use a switch, or a table lookup.
1529 code_t code = 0x69c0;
1530 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1531 // We use the same register as source and destination. (Could have another version that does both regs...)
1532 code |= regcode;
1533 code |= (regcode << 3);
1534 return code;
1535}
1536
1537/*****************************************************************************
1538 *
1539 * Returns the "+reg" opcode with the the given register set into the low
1540 * nibble of the opcode
1541 */
1542
1543inline emitter::code_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size)
1544{
1545 code_t code = insCodeRR(ins);
1546 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1547 code |= regcode;
1548 return code;
1549}
1550
1551/*****************************************************************************
1552 *
1553 * Return the 'SS' field value for the given index scale factor.
1554 */
1555
1556inline unsigned emitter::insSSval(unsigned scale)
1557{
1558 assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
1559
1560 const static BYTE scales[] = {
1561 0x00, // 1
1562 0x40, // 2
1563 0xFF, // 3
1564 0x80, // 4
1565 0xFF, // 5
1566 0xFF, // 6
1567 0xFF, // 7
1568 0xC0, // 8
1569 };
1570
1571 return scales[scale - 1];
1572}
1573
1574const instruction emitJumpKindInstructions[] = {INS_nop,
1575
1576#define JMP_SMALL(en, rev, ins) INS_##ins,
1577#include "emitjmps.h"
1578
1579 INS_call};
1580
1581const emitJumpKind emitReverseJumpKinds[] = {
1582 EJ_NONE,
1583
1584#define JMP_SMALL(en, rev, ins) EJ_##rev,
1585#include "emitjmps.h"
1586};
1587
1588/*****************************************************************************
1589 * Look up the instruction for a jump kind
1590 */
1591
1592/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
1593{
1594 assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
1595 return emitJumpKindInstructions[jumpKind];
1596}
1597
1598/*****************************************************************************
1599 * Reverse the conditional jump
1600 */
1601
1602/* static */ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
1603{
1604 assert(jumpKind < EJ_COUNT);
1605 return emitReverseJumpKinds[jumpKind];
1606}
1607
1608/*****************************************************************************
1609 * The size for these instructions is less than EA_4BYTE,
1610 * but the target register need not be byte-addressable
1611 */
1612
1613inline bool emitInstHasNoCode(instruction ins)
1614{
1615 if (ins == INS_align)
1616 {
1617 return true;
1618 }
1619
1620 return false;
1621}
1622
1623/*****************************************************************************
1624 * When encoding instructions that operate on byte registers
1625 * we have to ensure that we use a low register (EAX, EBX, ECX or EDX)
1626 * otherwise we will incorrectly encode the instruction
1627 */
1628
1629bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 /* = REG_NA */)
1630{
1631#if CPU_HAS_BYTE_REGS
1632 if (size != EA_1BYTE) // Not operating on a byte register is fine
1633 {
1634 return true;
1635 }
1636
1637 if ((ins != INS_movsx) && // These three instructions support high register
1638 (ins != INS_movzx) // encodings for reg1
1639#ifdef FEATURE_HW_INTRINSICS
1640 && (ins != INS_crc32)
1641#endif
1642 )
1643 {
1644 // reg1 must be a byte-able register
1645 if ((genRegMask(reg1) & RBM_BYTE_REGS) == 0)
1646 {
1647 return false;
1648 }
1649 }
1650 // if reg2 is not REG_NA then reg2 must be a byte-able register
1651 if ((reg2 != REG_NA) && ((genRegMask(reg2) & RBM_BYTE_REGS) == 0))
1652 {
1653 return false;
1654 }
1655#endif
1656 // The instruction can be encoded
1657 return true;
1658}
1659
1660/*****************************************************************************
1661 *
1662 * Estimate the size (in bytes of generated code) of the given instruction.
1663 */
1664
1665inline UNATIVE_OFFSET emitter::emitInsSize(code_t code)
1666{
1667 UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2;
1668#ifdef _TARGET_AMD64_
1669 size += emitGetPrefixSize(code);
1670#endif
1671 return size;
1672}
1673
1674inline UNATIVE_OFFSET emitter::emitInsSizeRM(instruction ins)
1675{
1676 return emitInsSize(insCodeRM(ins));
1677}
1678
1679inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr)
1680{
1681 emitAttr size = EA_SIZE(attr);
1682
1683 UNATIVE_OFFSET sz;
1684
1685 // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes.
1686 // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes.
1687 // This would probably be better expressed as a different format or something?
1688 code_t code = insCodeRM(ins);
1689
1690 if ((code & 0xFF00) != 0)
1691 {
1692 sz = 5;
1693 }
1694 else
1695 {
1696 sz = emitInsSize(insEncodeRMreg(ins, code));
1697 }
1698
1699 // Most 16-bit operand instructions will need a prefix
1700 if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
1701 {
1702 sz += 1;
1703 }
1704
1705 // VEX prefix
1706 sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins));
1707
1708 // REX prefix
1709 if (!hasRexPrefix(code))
1710 {
1711 if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2))) || IsExtendedReg(reg1, attr) ||
1712 IsExtendedReg(reg2, attr))
1713 {
1714 sz += emitGetRexPrefixSize(ins);
1715 }
1716 }
1717
1718 return sz;
1719}
1720
1721/*****************************************************************************/
1722
1723inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp)
1724{
1725 UNATIVE_OFFSET size = emitInsSize(code);
1726 UNATIVE_OFFSET offs;
1727 bool offsIsUpperBound = true;
1728 bool EBPbased = true;
1729
1730 /* Is this a temporary? */
1731
1732 if (var < 0)
1733 {
1734 /* An address off of ESP takes an extra byte */
1735
1736 if (!emitHasFramePtr)
1737 {
1738 size++;
1739 }
1740
1741 // The offset is already assigned. Find the temp.
1742 TempDsc* tmp = codeGen->regSet.tmpFindNum(var, RegSet::TEMP_USAGE_USED);
1743 if (tmp == nullptr)
1744 {
1745 // It might be in the free lists, if we're working on zero initializing the temps.
1746 tmp = codeGen->regSet.tmpFindNum(var, RegSet::TEMP_USAGE_FREE);
1747 }
1748 assert(tmp != nullptr);
1749 offs = tmp->tdTempOffs();
1750
1751 // We only care about the magnitude of the offset here, to determine instruction size.
1752 if (emitComp->isFramePointerUsed())
1753 {
1754 if ((int)offs < 0)
1755 {
1756 offs = -(int)offs;
1757 }
1758 }
1759 else
1760 {
1761 // SP-based offsets must already be positive.
1762 assert((int)offs >= 0);
1763 }
1764 }
1765 else
1766 {
1767
1768 /* Get the frame offset of the (non-temp) variable */
1769
1770 offs = dsp + emitComp->lvaFrameAddress(var, &EBPbased);
1771
1772 /* An address off of ESP takes an extra byte */
1773
1774 if (!EBPbased)
1775 {
1776 ++size;
1777 }
1778
1779 /* Is this a stack parameter reference? */
1780
1781 if (emitComp->lvaIsParameter(var)
1782#if !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
1783 && !emitComp->lvaIsRegArgument(var)
1784#endif // !_TARGET_AMD64_ || UNIX_AMD64_ABI
1785 )
1786 {
1787 /* If no EBP frame, arguments are off of ESP, above temps */
1788
1789 if (!EBPbased)
1790 {
1791 assert((int)offs >= 0);
1792
1793 offsIsUpperBound = false; // since #temps can increase
1794 offs += emitMaxTmpSize;
1795 }
1796 }
1797 else
1798 {
1799 /* Locals off of EBP are at negative offsets */
1800
1801 if (EBPbased)
1802 {
1803#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
1804 // If localloc is not used, then ebp chaining is done and hence
1805 // offset of locals will be at negative offsets, Otherwise offsets
1806 // will be positive. In future, when RBP gets positioned in the
1807 // middle of the frame so as to optimize instruction encoding size,
1808 // the below asserts needs to be modified appropriately.
1809 // However, for Unix platforms, we always do frame pointer chaining,
1810 // so offsets from the frame pointer will always be negative.
1811 if (emitComp->compLocallocUsed || emitComp->opts.compDbgEnC)
1812 {
1813 noway_assert((int)offs >= 0);
1814 }
1815 else
1816#endif
1817 {
1818 // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
1819 CLANG_FORMAT_COMMENT_ANCHOR;
1820
1821#ifdef UNIX_AMD64_ABI
1822 LclVarDsc* varDsc = emitComp->lvaTable + var;
1823 bool isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
1824 // Register passed args could have a stack offset of 0.
1825 noway_assert((int)offs < 0 || isRegPassedArg);
1826#else // !UNIX_AMD64_ABI
1827 noway_assert((int)offs < 0);
1828#endif // !UNIX_AMD64_ABI
1829 }
1830
1831 assert(emitComp->lvaTempsHaveLargerOffsetThanVars());
1832
1833 // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar are placed below the temps
1834 if (unsigned(var) == emitComp->lvaInlinedPInvokeFrameVar ||
1835 unsigned(var) == emitComp->lvaStubArgumentVar)
1836 {
1837 offs -= emitMaxTmpSize;
1838 }
1839
1840 if ((int)offs < 0)
1841 {
1842 // offset is negative
1843 return size + ((int(offs) >= SCHAR_MIN) ? sizeof(char) : sizeof(int));
1844 }
1845#ifdef _TARGET_AMD64_
1846 // This case arises for localloc frames
1847 else
1848 {
1849 return size + ((offs <= SCHAR_MAX) ? sizeof(char) : sizeof(int));
1850 }
1851#endif
1852 }
1853
1854 if (emitComp->lvaTempsHaveLargerOffsetThanVars() == false)
1855 {
1856 offs += emitMaxTmpSize;
1857 }
1858 }
1859 }
1860
1861 assert((int)offs >= 0);
1862
1863#if !FEATURE_FIXED_OUT_ARGS
1864
1865 /* Are we addressing off of ESP? */
1866
1867 if (!emitHasFramePtr)
1868 {
1869 /* Adjust the effective offset if necessary */
1870
1871 if (emitCntStackDepth)
1872 offs += emitCurStackLvl;
1873
1874 // we could (and used to) check for the special case [sp] here but the stack offset
1875 // estimator was off, and there is very little harm in overestimating for such a
1876 // rare case.
1877 }
1878
1879#endif // !FEATURE_FIXED_OUT_ARGS
1880
1881// printf("lcl = %04X, tmp = %04X, stk = %04X, offs = %04X\n",
1882// emitLclSize, emitMaxTmpSize, emitCurStackLvl, offs);
1883
1884#ifdef _TARGET_AMD64_
1885 bool useSmallEncoding = (SCHAR_MIN <= (int)offs) && ((int)offs <= SCHAR_MAX);
1886#else
1887 bool useSmallEncoding = (offs <= size_t(SCHAR_MAX));
1888#endif
1889
1890 // If it is ESP based, and the offset is zero, we will not encode the disp part.
1891 if (!EBPbased && offs == 0)
1892 {
1893 return size;
1894 }
1895 else
1896 {
1897 return size + (useSmallEncoding ? sizeof(char) : sizeof(int));
1898 }
1899}
1900
1901inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp)
1902{
1903 instruction ins = id->idIns();
1904 emitAttr attrSize = id->idOpSize();
1905 UNATIVE_OFFSET prefix = emitGetVexPrefixAdjustedSize(ins, attrSize, code);
1906 return prefix + emitInsSizeSV(code, var, dsp);
1907}
1908
1909inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val)
1910{
1911 instruction ins = id->idIns();
1912 emitAttr attrSize = id->idOpSize();
1913 UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(attrSize);
1914 UNATIVE_OFFSET prefix = emitGetVexPrefixAdjustedSize(ins, attrSize, code);
1915 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
1916
1917#ifdef _TARGET_AMD64_
1918 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
1919 // all other opcodes take a sign-extended 4-byte immediate
1920 noway_assert(valSize <= sizeof(int) || !id->idIsCnsReloc());
1921#endif // _TARGET_AMD64_
1922
1923 if (valSize > sizeof(int))
1924 {
1925 valSize = sizeof(int);
1926 }
1927
1928 if (id->idIsCnsReloc())
1929 {
1930 valInByte = false; // relocs can't be placed in a byte
1931 assert(valSize == sizeof(int));
1932 }
1933
1934 if (valInByte)
1935 {
1936 valSize = sizeof(char);
1937 }
1938
1939 // 16-bit operand instructions need a prefix.
1940 // This referes to 66h size prefix override
1941 if (id->idOpSize() == EA_2BYTE)
1942 {
1943 prefix += 1;
1944 }
1945
1946 return prefix + valSize + emitInsSizeSV(code, var, dsp);
1947}
1948
1949/*****************************************************************************/
1950
1951static bool baseRegisterRequiresSibByte(regNumber base)
1952{
1953#ifdef _TARGET_AMD64_
1954 return base == REG_ESP || base == REG_R12;
1955#else
1956 return base == REG_ESP;
1957#endif
1958}
1959
1960static bool baseRegisterRequiresDisplacement(regNumber base)
1961{
1962#ifdef _TARGET_AMD64_
1963 return base == REG_EBP || base == REG_R13;
1964#else
1965 return base == REG_EBP;
1966#endif
1967}
1968
1969UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
1970{
1971 emitAttr attrSize = id->idOpSize();
1972 instruction ins = id->idIns();
1973 /* The displacement field is in an unusual place for calls */
1974 ssize_t dsp = (ins == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
1975 bool dspInByte = ((signed char)dsp == (ssize_t)dsp);
1976 bool dspIsZero = (dsp == 0);
1977 UNATIVE_OFFSET size;
1978
1979 // Note that the values in reg and rgx are used in this method to decide
1980 // how many bytes will be needed by the address [reg+rgx+cns]
1981 // this includes the prefix bytes when reg or rgx are registers R8-R15
1982 regNumber reg;
1983 regNumber rgx;
1984
1985 // The idAddr field is a union and only some of the instruction formats use the iiaAddrMode variant
1986 // these are IF_AWR_*, IF_ARD_*, IF_ARW_* and IF_*_ARD
1987 // ideally these should really be the only idInsFmts that we see here
1988 // but we have some outliers to deal with:
1989 // emitIns_R_L adds IF_RWR_LABEL and calls emitInsSizeAM
1990 // emitInsRMW adds IF_MRW_CNS, IF_MRW_RRD, IF_MRW_SHF, and calls emitInsSizeAM
1991
1992 switch (id->idInsFmt())
1993 {
1994 case IF_RWR_LABEL:
1995 case IF_MRW_CNS:
1996 case IF_MRW_RRD:
1997 case IF_MRW_SHF:
1998 reg = REG_NA;
1999 rgx = REG_NA;
2000 break;
2001
2002 default:
2003 reg = id->idAddr()->iiaAddrMode.amBaseReg;
2004 rgx = id->idAddr()->iiaAddrMode.amIndxReg;
2005 break;
2006 }
2007
2008 if (id->idIsDspReloc())
2009 {
2010 dspInByte = false; // relocs can't be placed in a byte
2011 dspIsZero = false; // relocs won't always be zero
2012 }
2013
2014 if (code & 0xFF000000)
2015 {
2016 size = 4;
2017 }
2018 else if (code & 0x00FF0000)
2019 {
2020 // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
2021 assert(ins != INS_bt);
2022
2023 assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
2024 || (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) // only for x64
2025 || (ins == INS_movzx) || (ins == INS_movsx)
2026 // The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded
2027 || isPrefetch(ins));
2028 size = 3;
2029 }
2030 else
2031 {
2032 size = 2;
2033
2034 // Most 16-bit operands will require a size prefix.
2035 // This refers to 66h size prefix override.
2036
2037 if (attrSize == EA_2BYTE)
2038 {
2039 size++;
2040 }
2041 }
2042
2043 size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
2044
2045 if (hasRexPrefix(code))
2046 {
2047 // REX prefix
2048 size += emitGetRexPrefixSize(ins);
2049 }
2050 else if (TakesRexWPrefix(ins, attrSize))
2051 {
2052 // REX.W prefix
2053 size += emitGetRexPrefixSize(ins);
2054 }
2055 else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) ||
2056 ((ins != INS_call) && IsExtendedReg(id->idReg1(), attrSize)))
2057 {
2058 // Should have a REX byte
2059 size += emitGetRexPrefixSize(ins);
2060 }
2061
2062 if (rgx == REG_NA)
2063 {
2064 /* The address is of the form "[reg+disp]" */
2065
2066 if (reg == REG_NA)
2067 {
2068 /* The address is of the form "[disp]" */
2069
2070 size += sizeof(INT32);
2071
2072#ifdef _TARGET_AMD64_
2073 // If id is not marked for reloc, add 1 additional byte for SIB that follows disp32
2074 if (!id->idIsDspReloc())
2075 {
2076 size++;
2077 }
2078#endif
2079 return size;
2080 }
2081
2082 // If this is just "call reg", we're done.
2083 if (id->idIsCallRegPtr())
2084 {
2085 assert(ins == INS_call);
2086 assert(dsp == 0);
2087 return size;
2088 }
2089
2090 // If the base register is ESP (or R12 on 64-bit systems), a SIB byte must be used.
2091 if (baseRegisterRequiresSibByte(reg))
2092 {
2093 size++;
2094 }
2095
2096 // If the base register is EBP (or R13 on 64-bit systems), a displacement is required.
2097 // Otherwise, the displacement can be elided if it is zero.
2098 if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2099 {
2100 return size;
2101 }
2102
2103 /* Does the offset fit in a byte? */
2104
2105 if (dspInByte)
2106 {
2107 size += sizeof(char);
2108 }
2109 else
2110 {
2111 size += sizeof(INT32);
2112 }
2113 }
2114 else
2115 {
2116 /* An index register is present */
2117
2118 size++;
2119
2120 /* Is the index value scaled? */
2121
2122 if (emitDecodeScale(id->idAddr()->iiaAddrMode.amScale) > 1)
2123 {
2124 /* Is there a base register? */
2125
2126 if (reg != REG_NA)
2127 {
2128 /* The address is "[reg + {2/4/8} * rgx + icon]" */
2129
2130 if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2131 {
2132 /* The address is "[reg + {2/4/8} * rgx]" */
2133 }
2134 else
2135 {
2136 /* The address is "[reg + {2/4/8} * rgx + disp]" */
2137
2138 if (dspInByte)
2139 {
2140 size += sizeof(char);
2141 }
2142 else
2143 {
2144 size += sizeof(int);
2145 }
2146 }
2147 }
2148 else
2149 {
2150 /* The address is "[{2/4/8} * rgx + icon]" */
2151
2152 size += sizeof(INT32);
2153 }
2154 }
2155 else
2156 {
2157 if (dspIsZero && baseRegisterRequiresDisplacement(reg) && !baseRegisterRequiresDisplacement(rgx))
2158 {
2159 /* Swap reg and rgx, such that reg is not EBP/R13 */
2160 regNumber tmp = reg;
2161 id->idAddr()->iiaAddrMode.amBaseReg = reg = rgx;
2162 id->idAddr()->iiaAddrMode.amIndxReg = rgx = tmp;
2163 }
2164
2165 /* The address is "[reg+rgx+dsp]" */
2166
2167 if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2168 {
2169 /* This is [reg+rgx]" */
2170 }
2171 else
2172 {
2173 /* This is [reg+rgx+dsp]" */
2174
2175 if (dspInByte)
2176 {
2177 size += sizeof(char);
2178 }
2179 else
2180 {
2181 size += sizeof(int);
2182 }
2183 }
2184 }
2185 }
2186
2187 return size;
2188}
2189
2190inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val)
2191{
2192 instruction ins = id->idIns();
2193 UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
2194 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2195
2196 // We should never generate BT mem,reg because it has poor performance. BT mem,imm might be useful
2197 // but it requires special handling of the immediate value (it is always encoded in a byte).
2198 // Let's not complicate things until this is needed.
2199 assert(ins != INS_bt);
2200
2201#ifdef _TARGET_AMD64_
2202 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
2203 // all other opcodes take a sign-extended 4-byte immediate
2204 noway_assert(valSize <= sizeof(INT32) || !id->idIsCnsReloc());
2205#endif // _TARGET_AMD64_
2206
2207 if (valSize > sizeof(INT32))
2208 {
2209 valSize = sizeof(INT32);
2210 }
2211
2212 if (id->idIsCnsReloc())
2213 {
2214 valInByte = false; // relocs can't be placed in a byte
2215 assert(valSize == sizeof(INT32));
2216 }
2217
2218 if (valInByte)
2219 {
2220 valSize = sizeof(char);
2221 }
2222
2223 return valSize + emitInsSizeAM(id, code);
2224}
2225
2226inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code)
2227{
2228 instruction ins = id->idIns();
2229
2230 // fgMorph changes any statics that won't fit into 32-bit addresses
2231 // into constants with an indir, rather than GT_CLS_VAR
2232 // so we should only hit this path for statics that are RIP-relative
2233 UNATIVE_OFFSET size = sizeof(INT32);
2234
2235 size += emitGetVexPrefixAdjustedSize(ins, id->idOpSize(), code);
2236
2237 // Most 16-bit operand instructions will need a prefix.
2238 // This refers to 66h size prefix override.
2239
2240 if (id->idOpSize() == EA_2BYTE && ins != INS_movzx && ins != INS_movsx)
2241 {
2242 size++;
2243 }
2244
2245 return size + emitInsSize(code);
2246}
2247
2248inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val)
2249{
2250 instruction ins = id->idIns();
2251 UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
2252 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2253
2254#ifndef _TARGET_AMD64_
2255 // occasionally longs get here on x86
2256 if (valSize > sizeof(INT32))
2257 valSize = sizeof(INT32);
2258#endif // !_TARGET_AMD64_
2259
2260 if (id->idIsCnsReloc())
2261 {
2262 valInByte = false; // relocs can't be placed in a byte
2263 assert(valSize == sizeof(INT32));
2264 }
2265
2266 if (valInByte)
2267 {
2268 valSize = sizeof(char);
2269 }
2270
2271 return valSize + emitInsSizeCV(id, code);
2272}
2273
2274/*****************************************************************************
2275 *
2276 * Allocate instruction descriptors for instructions with address modes.
2277 */
2278
2279inline emitter::instrDesc* emitter::emitNewInstrAmd(emitAttr size, ssize_t dsp)
2280{
2281 if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
2282 {
2283 instrDescAmd* id = emitAllocInstrAmd(size);
2284
2285 id->idSetIsLargeDsp();
2286#ifdef DEBUG
2287 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2288#endif
2289 id->idaAmdVal = dsp;
2290
2291 return id;
2292 }
2293 else
2294 {
2295 instrDesc* id = emitAllocInstr(size);
2296
2297 id->idAddr()->iiaAddrMode.amDisp = dsp;
2298 assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2299
2300 return id;
2301 }
2302}
2303
2304/*****************************************************************************
2305 *
2306 * Set the displacement field in an instruction. Only handles instrDescAmd type.
2307 */
2308
2309inline void emitter::emitSetAmdDisp(instrDescAmd* id, ssize_t dsp)
2310{
2311 if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
2312 {
2313 id->idSetIsLargeDsp();
2314#ifdef DEBUG
2315 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2316#endif
2317 id->idaAmdVal = dsp;
2318 }
2319 else
2320 {
2321 id->idSetIsSmallDsp();
2322 id->idAddr()->iiaAddrMode.amDisp = dsp;
2323 assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2324 }
2325}
2326
2327/*****************************************************************************
2328 *
2329 * Allocate an instruction descriptor for an instruction that uses both
2330 * an address mode displacement and a constant.
2331 */
2332
2333emitter::instrDesc* emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int cns)
2334{
2335 if (dsp >= AM_DISP_MIN && dsp <= AM_DISP_MAX)
2336 {
2337 if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
2338 {
2339 instrDesc* id = emitAllocInstr(size);
2340
2341 id->idSmallCns(cns);
2342
2343 id->idAddr()->iiaAddrMode.amDisp = dsp;
2344 assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2345
2346 return id;
2347 }
2348 else
2349 {
2350 instrDescCns* id = emitAllocInstrCns(size);
2351
2352 id->idSetIsLargeCns();
2353 id->idcCnsVal = cns;
2354
2355 id->idAddr()->iiaAddrMode.amDisp = dsp;
2356 assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2357
2358 return id;
2359 }
2360 }
2361 else
2362 {
2363 if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
2364 {
2365 instrDescAmd* id = emitAllocInstrAmd(size);
2366
2367 id->idSetIsLargeDsp();
2368#ifdef DEBUG
2369 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2370#endif
2371 id->idaAmdVal = dsp;
2372
2373 id->idSmallCns(cns);
2374
2375 return id;
2376 }
2377 else
2378 {
2379 instrDescCnsAmd* id = emitAllocInstrCnsAmd(size);
2380
2381 id->idSetIsLargeCns();
2382 id->idacCnsVal = cns;
2383
2384 id->idSetIsLargeDsp();
2385#ifdef DEBUG
2386 id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2387#endif
2388 id->idacAmdVal = dsp;
2389
2390 return id;
2391 }
2392 }
2393}
2394
2395/*****************************************************************************
2396 *
2397 * The next instruction will be a loop head entry point
2398 * So insert a dummy instruction here to ensure that
2399 * the x86 I-cache alignment rule is followed.
2400 */
2401
2402void emitter::emitLoopAlign()
2403{
2404 /* Insert a pseudo-instruction to ensure that we align
2405 the next instruction properly */
2406
2407 instrDesc* id = emitNewInstrSmall(EA_1BYTE);
2408 id->idIns(INS_align);
2409 id->idCodeSize(15); // We may need to skip up to 15 bytes of code
2410 emitCurIGsize += 15;
2411}
2412
2413/*****************************************************************************
2414 *
2415 * Add a NOP instruction of the given size.
2416 */
2417
2418void emitter::emitIns_Nop(unsigned size)
2419{
2420 assert(size <= 15);
2421
2422 instrDesc* id = emitNewInstr();
2423 id->idIns(INS_nop);
2424 id->idInsFmt(IF_NONE);
2425 id->idCodeSize(size);
2426
2427 dispIns(id);
2428 emitCurIGsize += size;
2429}
2430
2431/*****************************************************************************
2432 *
2433 * Add an instruction with no operands.
2434 */
2435void emitter::emitIns(instruction ins)
2436{
2437 UNATIVE_OFFSET sz;
2438 instrDesc* id = emitNewInstr();
2439 code_t code = insCodeMR(ins);
2440
2441#ifdef DEBUG
2442 {
2443 // We cannot have #ifdef inside macro expansion.
2444 bool assertCond =
2445 (ins == INS_cdq || ins == INS_int3 || ins == INS_lock || ins == INS_leave || ins == INS_movsb ||
2446 ins == INS_movsd || ins == INS_movsp || ins == INS_nop || ins == INS_r_movsb || ins == INS_r_movsd ||
2447 ins == INS_r_movsp || ins == INS_r_stosb || ins == INS_r_stosd || ins == INS_r_stosp || ins == INS_ret ||
2448 ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
2449 // These instructions take zero operands
2450 || ins == INS_vzeroupper || ins == INS_lfence || ins == INS_mfence || ins == INS_sfence);
2451
2452 assert(assertCond);
2453 }
2454#endif // DEBUG
2455
2456 assert(!hasRexPrefix(code)); // Can't have a REX bit with no operands, right?
2457
2458 if (code & 0xFF000000)
2459 {
2460 sz = 2; // TODO-XArch-Bug?: Shouldn't this be 4? Or maybe we should assert that we don't see this case.
2461 }
2462 else if (code & 0x00FF0000)
2463 {
2464 sz = 3;
2465 }
2466 else if (code & 0x0000FF00)
2467 {
2468 sz = 2;
2469 }
2470 else
2471 {
2472 sz = 1;
2473 }
2474
2475 // vzeroupper includes its 2-byte VEX prefix in its MR code.
2476 assert((ins != INS_vzeroupper) || (sz == 3));
2477
2478 insFormat fmt = IF_NONE;
2479
2480 id->idIns(ins);
2481 id->idInsFmt(fmt);
2482 id->idCodeSize(sz);
2483
2484 dispIns(id);
2485 emitCurIGsize += sz;
2486}
2487
2488// Add an instruction with no operands, but whose encoding depends on the size
2489// (Only CDQ/CQO currently)
2490void emitter::emitIns(instruction ins, emitAttr attr)
2491{
2492 UNATIVE_OFFSET sz;
2493 instrDesc* id = emitNewInstr(attr);
2494 code_t code = insCodeMR(ins);
2495 assert(ins == INS_cdq);
2496 assert((code & 0xFFFFFF00) == 0);
2497 sz = 1;
2498
2499 insFormat fmt = IF_NONE;
2500
2501 sz += emitGetVexPrefixAdjustedSize(ins, attr, code);
2502 if (TakesRexWPrefix(ins, attr))
2503 {
2504 sz += emitGetRexPrefixSize(ins);
2505 }
2506
2507 id->idIns(ins);
2508 id->idInsFmt(fmt);
2509 id->idCodeSize(sz);
2510
2511 dispIns(id);
2512 emitCurIGsize += sz;
2513}
2514
2515//------------------------------------------------------------------------
2516// emitMapFmtForIns: map the instruction format based on the instruction.
2517// Shift-by-a-constant instructions have a special format.
2518//
2519// Arguments:
2520// fmt - the instruction format to map
2521// ins - the instruction
2522//
2523// Returns:
2524// The mapped instruction format.
2525//
2526emitter::insFormat emitter::emitMapFmtForIns(insFormat fmt, instruction ins)
2527{
2528 switch (ins)
2529 {
2530 case INS_rol_N:
2531 case INS_ror_N:
2532 case INS_rcl_N:
2533 case INS_rcr_N:
2534 case INS_shl_N:
2535 case INS_shr_N:
2536 case INS_sar_N:
2537 {
2538 switch (fmt)
2539 {
2540 case IF_RRW_CNS:
2541 return IF_RRW_SHF;
2542 case IF_MRW_CNS:
2543 return IF_MRW_SHF;
2544 case IF_SRW_CNS:
2545 return IF_SRW_SHF;
2546 case IF_ARW_CNS:
2547 return IF_ARW_SHF;
2548 default:
2549 unreached();
2550 }
2551 }
2552
2553 default:
2554 return fmt;
2555 }
2556}
2557
2558//------------------------------------------------------------------------
2559// emitMapFmtAtoM: map the address mode formats ARD, ARW, and AWR to their direct address equivalents.
2560//
2561// Arguments:
2562// fmt - the instruction format to map
2563//
2564// Returns:
2565// The mapped instruction format.
2566//
2567emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
2568{
2569 switch (fmt)
2570 {
2571 case IF_ARD:
2572 return IF_MRD;
2573 case IF_AWR:
2574 return IF_MWR;
2575 case IF_ARW:
2576 return IF_MRW;
2577
2578 case IF_RRD_ARD:
2579 return IF_RRD_MRD;
2580 case IF_RWR_ARD:
2581 return IF_RWR_MRD;
2582 case IF_RWR_ARD_CNS:
2583 return IF_RWR_MRD_CNS;
2584 case IF_RRW_ARD:
2585 return IF_RRW_MRD;
2586 case IF_RRW_ARD_CNS:
2587 return IF_RRW_MRD_CNS;
2588 case IF_RWR_RRD_ARD:
2589 return IF_RWR_RRD_MRD;
2590 case IF_RWR_RRD_ARD_CNS:
2591 return IF_RWR_RRD_MRD_CNS;
2592 case IF_RWR_RRD_ARD_RRD:
2593 return IF_RWR_RRD_MRD_RRD;
2594
2595 case IF_ARD_RRD:
2596 return IF_MRD_RRD;
2597 case IF_AWR_RRD:
2598 return IF_MWR_RRD;
2599 case IF_ARW_RRD:
2600 return IF_MRW_RRD;
2601
2602 case IF_ARD_CNS:
2603 return IF_MRD_CNS;
2604 case IF_AWR_CNS:
2605 return IF_MWR_CNS;
2606 case IF_ARW_CNS:
2607 return IF_MRW_CNS;
2608
2609 case IF_AWR_RRD_CNS:
2610 return IF_MWR_RRD_CNS;
2611
2612 case IF_ARW_SHF:
2613 return IF_MRW_SHF;
2614
2615 default:
2616 unreached();
2617 }
2618}
2619
2620//------------------------------------------------------------------------
2621// emitHandleMemOp: For a memory operand, fill in the relevant fields of the instrDesc.
2622//
2623// Arguments:
2624// indir - the memory operand.
2625// id - the instrDesc to fill in.
2626// fmt - the instruction format to use. This must be one of the ARD, AWR, or ARW formats. If necessary (such as for
2627// GT_CLS_VAR_ADDR), this function will map it to the correct format.
2628// ins - the instruction we are generating. This might affect the instruction format we choose.
2629//
2630// Assumptions:
2631// The correctly sized instrDesc must already be created, e.g., via emitNewInstrAmd() or emitNewInstrAmdCns();
2632//
2633// Post-conditions:
2634// For base address of int constant:
2635// -- the caller must have added the int constant base to the instrDesc when creating it via
2636// emitNewInstrAmdCns().
2637// For simple address modes (base + scale * index + offset):
2638// -- the base register, index register, and scale factor are set.
2639// -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via
2640// emitNewInstrAmdCns().
2641//
2642// The instruction format is set.
2643//
2644// idSetIsDspReloc() is called if necessary.
2645//
2646void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins)
2647{
2648 assert(fmt != IF_NONE);
2649
2650 GenTree* memBase = indir->Base();
2651
2652 if ((memBase != nullptr) && memBase->isContained() && (memBase->OperGet() == GT_CLS_VAR_ADDR))
2653 {
2654 CORINFO_FIELD_HANDLE fldHnd = memBase->gtClsVar.gtClsVarHnd;
2655
2656 // Static always need relocs
2657 if (!jitStaticFldIsGlobAddr(fldHnd))
2658 {
2659 // Contract:
2660 // fgMorphField() changes any statics that won't fit into 32-bit addresses into
2661 // constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given
2662 // by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable.
2663 //
2664 // Data section constants: these get allocated close to code block of the method and
2665 // always addressable IP relative. These too should be marked as relocatable.
2666
2667 id->idSetIsDspReloc();
2668 }
2669
2670 id->idAddr()->iiaFieldHnd = fldHnd;
2671 id->idInsFmt(emitMapFmtForIns(emitMapFmtAtoM(fmt), ins));
2672 }
2673 else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained())
2674 {
2675 // Absolute addresses marked as contained should fit within the base of addr mode.
2676 assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp));
2677
2678 // Either not generating relocatable code, or addr must be an icon handle, or the
2679 // constant is zero (which we won't generate a relocation for).
2680 assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0));
2681
2682 if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp))
2683 {
2684 id->idSetIsDspReloc();
2685 }
2686
2687 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2688 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2689 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1; // for completeness
2690
2691 id->idInsFmt(emitMapFmtForIns(fmt, ins));
2692
2693 // Absolute address must have already been set in the instrDesc constructor.
2694 assert(emitGetInsAmdAny(id) == memBase->AsIntConCommon()->IconValue());
2695 }
2696 else
2697 {
2698 if (memBase != nullptr)
2699 {
2700 id->idAddr()->iiaAddrMode.amBaseReg = memBase->gtRegNum;
2701 }
2702 else
2703 {
2704 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2705 }
2706
2707 if (indir->HasIndex())
2708 {
2709 id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->gtRegNum;
2710 }
2711 else
2712 {
2713 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2714 }
2715 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale());
2716
2717 id->idInsFmt(emitMapFmtForIns(fmt, ins));
2718
2719 // disp must have already been set in the instrDesc constructor.
2720 assert(emitGetInsAmdAny(id) == indir->Offset()); // make sure "disp" is stored properly
2721 }
2722}
2723
2724// Takes care of storing all incoming register parameters
2725// into its corresponding shadow space (defined by the x64 ABI)
2726void emitter::spillIntArgRegsToShadowSlots()
2727{
2728 unsigned argNum;
2729 instrDesc* id;
2730 UNATIVE_OFFSET sz;
2731
2732 assert(emitComp->compGeneratingProlog);
2733
2734 for (argNum = 0; argNum < MAX_REG_ARG; ++argNum)
2735 {
2736 regNumber argReg = intArgRegs[argNum];
2737
2738 // The offsets for the shadow space start at RSP + 8
2739 // (right before the caller return address)
2740 int offset = (argNum + 1) * EA_PTRSIZE;
2741
2742 id = emitNewInstrAmd(EA_PTRSIZE, offset);
2743 id->idIns(INS_mov);
2744 id->idInsFmt(IF_AWR_RRD);
2745 id->idAddr()->iiaAddrMode.amBaseReg = REG_SPBASE;
2746 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2747 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1);
2748
2749 // The offset has already been set in the intrDsc ctor,
2750 // make sure we got it right.
2751 assert(emitGetInsAmdAny(id) == ssize_t(offset));
2752
2753 id->idReg1(argReg);
2754 sz = emitInsSizeAM(id, insCodeMR(INS_mov));
2755 id->idCodeSize(sz);
2756 emitCurIGsize += sz;
2757 }
2758}
2759
2760//------------------------------------------------------------------------
2761// emitInsLoadInd: Emits a "mov reg, [mem]" (or a variant such as "movzx" or "movss")
2762// instruction for a GT_IND node.
2763//
2764// Arguments:
2765// ins - the instruction to emit
2766// attr - the instruction operand size
2767// dstReg - the destination register
2768// mem - the GT_IND node
2769//
2770void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem)
2771{
2772 assert(mem->OperIs(GT_IND));
2773
2774 GenTree* addr = mem->Addr();
2775
2776 if (addr->OperGet() == GT_CLS_VAR_ADDR)
2777 {
2778 emitIns_R_C(ins, attr, dstReg, addr->gtClsVar.gtClsVarHnd, 0);
2779 return;
2780 }
2781
2782 if (addr->OperGet() == GT_LCL_VAR_ADDR)
2783 {
2784 GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2785 emitIns_R_S(ins, attr, dstReg, varNode->GetLclNum(), 0);
2786 codeGen->genUpdateLife(varNode);
2787 return;
2788 }
2789
2790 assert(addr->OperIsAddrMode() || (addr->IsCnsIntOrI() && addr->isContained()) || !addr->isContained());
2791 ssize_t offset = mem->Offset();
2792 instrDesc* id = emitNewInstrAmd(attr, offset);
2793 id->idIns(ins);
2794 id->idReg1(dstReg);
2795 emitHandleMemOp(mem, id, IF_RWR_ARD, ins);
2796 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
2797 id->idCodeSize(sz);
2798 dispIns(id);
2799 emitCurIGsize += sz;
2800}
2801
2802//------------------------------------------------------------------------
2803// emitInsStoreInd: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
2804// instruction for a GT_STOREIND node.
2805//
2806// Arguments:
2807// ins - the instruction to emit
2808// attr - the instruction operand size
2809// mem - the GT_STOREIND node
2810//
2811void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem)
2812{
2813 assert(mem->OperIs(GT_STOREIND));
2814
2815 GenTree* addr = mem->Addr();
2816 GenTree* data = mem->Data();
2817
2818 if (addr->OperGet() == GT_CLS_VAR_ADDR)
2819 {
2820 if (data->isContainedIntOrIImmed())
2821 {
2822 emitIns_C_I(ins, attr, addr->gtClsVar.gtClsVarHnd, 0, (int)data->AsIntConCommon()->IconValue());
2823 }
2824 else
2825 {
2826 assert(!data->isContained());
2827 emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
2828 }
2829 return;
2830 }
2831
2832 if (addr->OperGet() == GT_LCL_VAR_ADDR)
2833 {
2834 GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2835 if (data->isContainedIntOrIImmed())
2836 {
2837 emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
2838 }
2839 else
2840 {
2841 assert(!data->isContained());
2842 emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
2843 }
2844 codeGen->genUpdateLife(varNode);
2845 return;
2846 }
2847
2848 ssize_t offset = mem->Offset();
2849 UNATIVE_OFFSET sz;
2850 instrDesc* id;
2851
2852 if (data->isContainedIntOrIImmed())
2853 {
2854 int icon = (int)data->AsIntConCommon()->IconValue();
2855 id = emitNewInstrAmdCns(attr, offset, icon);
2856 id->idIns(ins);
2857 emitHandleMemOp(mem, id, IF_AWR_CNS, ins);
2858 sz = emitInsSizeAM(id, insCodeMI(ins), icon);
2859 id->idCodeSize(sz);
2860 }
2861 else
2862 {
2863 assert(!data->isContained());
2864 id = emitNewInstrAmd(attr, offset);
2865 id->idIns(ins);
2866 emitHandleMemOp(mem, id, IF_AWR_RRD, ins);
2867 id->idReg1(data->gtRegNum);
2868 sz = emitInsSizeAM(id, insCodeMR(ins));
2869 id->idCodeSize(sz);
2870 }
2871
2872 dispIns(id);
2873 emitCurIGsize += sz;
2874}
2875
2876//------------------------------------------------------------------------
2877// emitInsStoreLcl: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
2878// instruction for a GT_STORE_LCL_VAR node.
2879//
2880// Arguments:
2881// ins - the instruction to emit
2882// attr - the instruction operand size
2883// varNode - the GT_STORE_LCL_VAR node
2884//
2885void emitter::emitInsStoreLcl(instruction ins, emitAttr attr, GenTreeLclVarCommon* varNode)
2886{
2887 assert(varNode->OperIs(GT_STORE_LCL_VAR));
2888 assert(varNode->gtRegNum == REG_NA); // stack store
2889
2890 GenTree* data = varNode->gtGetOp1();
2891 codeGen->inst_set_SV_var(varNode);
2892
2893 if (data->isContainedIntOrIImmed())
2894 {
2895 emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
2896 }
2897 else
2898 {
2899 assert(!data->isContained());
2900 emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
2901 }
2902 codeGen->genUpdateLife(varNode);
2903}
2904
2905//------------------------------------------------------------------------
2906// emitInsBinary: Emits an instruction for a node which takes two operands
2907//
2908// Arguments:
2909// ins - the instruction to emit
2910// attr - the instruction operand size
2911// dst - the destination and first source operand
2912// src - the second source operand
2913//
2914// Assumptions:
2915// i) caller of this routine needs to call genConsumeReg()
2916// ii) caller of this routine needs to call genProduceReg()
2917regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
2918{
2919 // We can only have one memory operand and only src can be a constant operand
2920 // However, the handling for a given operand type (mem, cns, or other) is fairly
2921 // consistent regardless of whether they are src or dst. As such, we will find
2922 // the type of each operand and only check them against src/dst where relevant.
2923
2924 GenTree* memOp = nullptr;
2925 GenTree* cnsOp = nullptr;
2926 GenTree* otherOp = nullptr;
2927
2928 if (dst->isContained() || (dst->isLclField() && (dst->gtRegNum == REG_NA)) || dst->isUsedFromSpillTemp())
2929 {
2930 // dst can only be a modrm
2931 // dst on 3opImul isn't really the dst
2932 assert(dst->isUsedFromMemory() || (dst->gtRegNum == REG_NA) || instrIs3opImul(ins));
2933 assert(!src->isUsedFromMemory());
2934
2935 memOp = dst;
2936
2937 if (src->isContained())
2938 {
2939 assert(src->IsCnsIntOrI());
2940 cnsOp = src;
2941 }
2942 else
2943 {
2944 otherOp = src;
2945 }
2946 }
2947 else if (src->isContained() || src->isUsedFromSpillTemp())
2948 {
2949 assert(!dst->isUsedFromMemory());
2950 otherOp = dst;
2951
2952 if ((src->IsCnsIntOrI() || src->IsCnsFltOrDbl()) && !src->isUsedFromSpillTemp())
2953 {
2954 assert(!src->isUsedFromMemory() || src->IsCnsFltOrDbl());
2955 cnsOp = src;
2956 }
2957 else
2958 {
2959 assert(src->isUsedFromMemory());
2960 memOp = src;
2961 }
2962 }
2963
2964 // At this point, we either have a memory operand or we don't.
2965 //
2966 // If we don't then the logic is very simple and we will either be emitting a
2967 // `reg, immed` instruction (if src is a cns) or a `reg, reg` instruction otherwise.
2968 //
2969 // If we do have a memory operand, the logic is a bit more complicated as we need
2970 // to do different things depending on the type of memory operand. These types include:
2971 // * Spill temp
2972 // * Indirect access
2973 // * Local variable
2974 // * Class variable
2975 // * Addressing mode [base + index * scale + offset]
2976 // * Local field
2977 // * Local variable
2978 //
2979 // Most of these types (except Indirect: Class variable and Indirect: Addressing mode)
2980 // give us a a local variable number and an offset and access memory on the stack
2981 //
2982 // Indirect: Class variable is used for access static class variables and gives us a handle
2983 // to the memory location we read from
2984 //
2985 // Indirect: Addressing mode is used for the remaining memory accesses and will give us
2986 // a base address, an index, a scale, and an offset. These are combined to let us easily
2987 // access the given memory location.
2988 //
2989 // In all of the memory access cases, we determine which form to emit (e.g. `reg, [mem]`
2990 // or `[mem], reg`) by comparing memOp to src to determine which `emitIns_*` method needs
2991 // to be called. The exception is for the `[mem], immed` case (for Indirect: Class variable)
2992 // where only src can be the immediate.
2993
2994 if (memOp != nullptr)
2995 {
2996 TempDsc* tmpDsc = nullptr;
2997 unsigned varNum = BAD_VAR_NUM;
2998 unsigned offset = (unsigned)-1;
2999
3000 if (memOp->isUsedFromSpillTemp())
3001 {
3002 assert(memOp->IsRegOptional());
3003
3004 tmpDsc = codeGen->getSpillTempDsc(memOp);
3005 varNum = tmpDsc->tdTempNum();
3006 offset = 0;
3007
3008 codeGen->regSet.tmpRlsTemp(tmpDsc);
3009 }
3010 else if (memOp->isIndir())
3011 {
3012 GenTreeIndir* memIndir = memOp->AsIndir();
3013 GenTree* memBase = memIndir->gtOp1;
3014
3015 switch (memBase->OperGet())
3016 {
3017 case GT_LCL_VAR_ADDR:
3018 {
3019 varNum = memBase->AsLclVarCommon()->GetLclNum();
3020 offset = 0;
3021
3022 // Ensure that all the GenTreeIndir values are set to their defaults.
3023 assert(!memIndir->HasIndex());
3024 assert(memIndir->Scale() == 1);
3025 assert(memIndir->Offset() == 0);
3026
3027 break;
3028 }
3029
3030 case GT_CLS_VAR_ADDR:
3031 {
3032 if (memOp == src)
3033 {
3034 assert(otherOp == dst);
3035 assert(cnsOp == nullptr);
3036
3037 if (instrHasImplicitRegPairDest(ins))
3038 {
3039 // src is a class static variable
3040 // dst is implicit - RDX:RAX
3041 emitIns_C(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0);
3042 }
3043 else
3044 {
3045 // src is a class static variable
3046 // dst is a register
3047 emitIns_R_C(ins, attr, dst->gtRegNum, memBase->gtClsVar.gtClsVarHnd, 0);
3048 }
3049 }
3050 else
3051 {
3052 assert(memOp == dst);
3053
3054 if (cnsOp != nullptr)
3055 {
3056 assert(cnsOp == src);
3057 assert(otherOp == nullptr);
3058 assert(src->IsCnsIntOrI());
3059
3060 // src is an contained immediate
3061 // dst is a class static variable
3062 emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0,
3063 (int)src->gtIntConCommon.IconValue());
3064 }
3065 else
3066 {
3067 assert(otherOp == src);
3068
3069 // src is a register
3070 // dst is a class static variable
3071 emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, src->gtRegNum, 0);
3072 }
3073 }
3074
3075 return dst->gtRegNum;
3076 }
3077
3078 default: // Addressing mode [base + index * scale + offset]
3079 {
3080 instrDesc* id = nullptr;
3081
3082 if (cnsOp != nullptr)
3083 {
3084 assert(memOp == dst);
3085 assert(cnsOp == src);
3086 assert(otherOp == nullptr);
3087 assert(src->IsCnsIntOrI());
3088
3089 id = emitNewInstrAmdCns(attr, memIndir->Offset(), (int)src->gtIntConCommon.IconValue());
3090 }
3091 else
3092 {
3093 ssize_t offset = memIndir->Offset();
3094 id = emitNewInstrAmd(attr, offset);
3095 id->idIns(ins);
3096
3097 GenTree* regTree = (memOp == src) ? dst : src;
3098
3099 // there must be one non-contained op
3100 assert(!regTree->isContained());
3101 id->idReg1(regTree->gtRegNum);
3102 }
3103 assert(id != nullptr);
3104
3105 id->idIns(ins); // Set the instruction.
3106
3107 // Determine the instruction format
3108 insFormat fmt = IF_NONE;
3109
3110 if (memOp == src)
3111 {
3112 assert(cnsOp == nullptr);
3113 assert(otherOp == dst);
3114
3115 if (instrHasImplicitRegPairDest(ins))
3116 {
3117 fmt = emitInsModeFormat(ins, IF_ARD);
3118 }
3119 else
3120 {
3121 fmt = emitInsModeFormat(ins, IF_RRD_ARD);
3122 }
3123 }
3124 else
3125 {
3126 assert(memOp == dst);
3127
3128 if (cnsOp != nullptr)
3129 {
3130 assert(cnsOp == src);
3131 assert(otherOp == nullptr);
3132 assert(src->IsCnsIntOrI());
3133
3134 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
3135 }
3136 else
3137 {
3138 assert(otherOp == src);
3139 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
3140 }
3141 }
3142 assert(fmt != IF_NONE);
3143 emitHandleMemOp(memIndir, id, fmt, ins);
3144
3145 // Determine the instruction size
3146 UNATIVE_OFFSET sz = 0;
3147
3148 if (memOp == src)
3149 {
3150 assert(otherOp == dst);
3151 assert(cnsOp == nullptr);
3152
3153 if (instrHasImplicitRegPairDest(ins))
3154 {
3155 sz = emitInsSizeAM(id, insCode(ins));
3156 }
3157 else
3158 {
3159 sz = emitInsSizeAM(id, insCodeRM(ins));
3160 }
3161 }
3162 else
3163 {
3164 assert(memOp == dst);
3165
3166 if (cnsOp != nullptr)
3167 {
3168 assert(memOp == dst);
3169 assert(cnsOp == src);
3170 assert(otherOp == nullptr);
3171
3172 sz = emitInsSizeAM(id, insCodeMI(ins), (int)src->gtIntConCommon.IconValue());
3173 }
3174 else
3175 {
3176 assert(otherOp == src);
3177 sz = emitInsSizeAM(id, insCodeMR(ins));
3178 }
3179 }
3180 assert(sz != 0);
3181
3182 id->idCodeSize(sz);
3183
3184 dispIns(id);
3185 emitCurIGsize += sz;
3186
3187 return (memOp == src) ? dst->gtRegNum : REG_NA;
3188 }
3189 }
3190 }
3191 else
3192 {
3193 switch (memOp->OperGet())
3194 {
3195 case GT_LCL_FLD:
3196 case GT_STORE_LCL_FLD:
3197 {
3198 GenTreeLclFld* lclField = memOp->AsLclFld();
3199 varNum = lclField->GetLclNum();
3200 offset = lclField->gtLclFld.gtLclOffs;
3201 break;
3202 }
3203
3204 case GT_LCL_VAR:
3205 {
3206 assert(memOp->IsRegOptional() || !emitComp->lvaTable[memOp->gtLclVar.gtLclNum].lvIsRegCandidate());
3207 varNum = memOp->AsLclVar()->GetLclNum();
3208 offset = 0;
3209 break;
3210 }
3211
3212 default:
3213 unreached();
3214 break;
3215 }
3216 }
3217
3218 // Ensure we got a good varNum and offset.
3219 // We also need to check for `tmpDsc != nullptr` since spill temp numbers
3220 // are negative and start with -1, which also happens to be BAD_VAR_NUM.
3221 assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
3222 assert(offset != (unsigned)-1);
3223
3224 if (memOp == src)
3225 {
3226 assert(otherOp == dst);
3227 assert(cnsOp == nullptr);
3228
3229 if (instrHasImplicitRegPairDest(ins))
3230 {
3231 // src is a stack based local variable
3232 // dst is implicit - RDX:RAX
3233 emitIns_S(ins, attr, varNum, offset);
3234 }
3235 else
3236 {
3237 // src is a stack based local variable
3238 // dst is a register
3239 emitIns_R_S(ins, attr, dst->gtRegNum, varNum, offset);
3240 }
3241 }
3242 else
3243 {
3244 assert(memOp == dst);
3245 assert((dst->gtRegNum == REG_NA) || dst->IsRegOptional());
3246
3247 if (cnsOp != nullptr)
3248 {
3249 assert(cnsOp == src);
3250 assert(otherOp == nullptr);
3251 assert(src->IsCnsIntOrI());
3252
3253 // src is an contained immediate
3254 // dst is a stack based local variable
3255 emitIns_S_I(ins, attr, varNum, offset, (int)src->gtIntConCommon.IconValue());
3256 }
3257 else
3258 {
3259 assert(otherOp == src);
3260 assert(!src->isContained());
3261
3262 // src is a register
3263 // dst is a stack based local variable
3264 emitIns_S_R(ins, attr, src->gtRegNum, varNum, offset);
3265 }
3266 }
3267 }
3268 else if (cnsOp != nullptr) // reg, immed
3269 {
3270 assert(cnsOp == src);
3271 assert(otherOp == dst);
3272
3273 if (src->IsCnsIntOrI())
3274 {
3275 assert(!dst->isContained());
3276 GenTreeIntConCommon* intCns = src->AsIntConCommon();
3277 emitIns_R_I(ins, attr, dst->gtRegNum, intCns->IconValue());
3278 }
3279 else
3280 {
3281 assert(src->IsCnsFltOrDbl());
3282 GenTreeDblCon* dblCns = src->AsDblCon();
3283
3284 CORINFO_FIELD_HANDLE hnd = emitFltOrDblConst(dblCns->gtDconVal, emitTypeSize(dblCns));
3285 emitIns_R_C(ins, attr, dst->gtRegNum, hnd, 0);
3286 }
3287 }
3288 else // reg, reg
3289 {
3290 assert(otherOp == nullptr);
3291 assert(!src->isContained() && !dst->isContained());
3292
3293 if (instrHasImplicitRegPairDest(ins))
3294 {
3295 emitIns_R(ins, attr, src->gtRegNum);
3296 }
3297 else
3298 {
3299 emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
3300 }
3301 }
3302
3303 return dst->gtRegNum;
3304}
3305
3306//------------------------------------------------------------------------
3307// emitInsRMW: Emit logic for Read-Modify-Write binary instructions.
3308//
3309// Responsible for emitting a single instruction that will perform an operation of the form:
3310// *addr = *addr <BinOp> src
3311// For example:
3312// ADD [RAX], RCX
3313//
3314// Arguments:
3315// ins - instruction to generate
3316// attr - emitter attribute for instruction
3317// storeInd - indir for RMW addressing mode
3318// src - source operand of instruction
3319//
3320// Assumptions:
3321// Lowering has taken care of recognizing the StoreInd pattern of:
3322// StoreInd( AddressTree, BinOp( Ind ( AddressTree ), Operand ) )
3323// The address to store is already sitting in a register.
3324//
3325// Notes:
3326// This is a no-produce operation, meaning that no register output will
3327// be produced for future use in the code stream.
3328//
3329void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd, GenTree* src)
3330{
3331 GenTree* addr = storeInd->Addr();
3332 addr = addr->gtSkipReloadOrCopy();
3333 assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA ||
3334 addr->OperGet() == GT_CLS_VAR_ADDR || addr->OperGet() == GT_CNS_INT);
3335
3336 instrDesc* id = nullptr;
3337 UNATIVE_OFFSET sz;
3338
3339 ssize_t offset = 0;
3340 if (addr->OperGet() != GT_CLS_VAR_ADDR)
3341 {
3342 offset = storeInd->Offset();
3343 }
3344
3345 if (src->isContainedIntOrIImmed())
3346 {
3347 GenTreeIntConCommon* intConst = src->AsIntConCommon();
3348 int iconVal = (int)intConst->IconValue();
3349 switch (ins)
3350 {
3351 case INS_rcl_N:
3352 case INS_rcr_N:
3353 case INS_rol_N:
3354 case INS_ror_N:
3355 case INS_shl_N:
3356 case INS_shr_N:
3357 case INS_sar_N:
3358 iconVal &= 0x7F;
3359 break;
3360 default:
3361 break;
3362 }
3363
3364 id = emitNewInstrAmdCns(attr, offset, iconVal);
3365 emitHandleMemOp(storeInd, id, IF_ARW_CNS, ins);
3366 id->idIns(ins);
3367 sz = emitInsSizeAM(id, insCodeMI(ins), iconVal);
3368 }
3369 else
3370 {
3371 assert(!src->isContained()); // there must be one non-contained src
3372
3373 // ind, reg
3374 id = emitNewInstrAmd(attr, offset);
3375 emitHandleMemOp(storeInd, id, IF_ARW_RRD, ins);
3376 id->idReg1(src->gtRegNum);
3377 id->idIns(ins);
3378 sz = emitInsSizeAM(id, insCodeMR(ins));
3379 }
3380
3381 id->idCodeSize(sz);
3382
3383 dispIns(id);
3384 emitCurIGsize += sz;
3385}
3386
3387//------------------------------------------------------------------------
3388// emitInsRMW: Emit logic for Read-Modify-Write unary instructions.
3389//
3390// Responsible for emitting a single instruction that will perform an operation of the form:
3391// *addr = UnaryOp *addr
3392// For example:
3393// NOT [RAX]
3394//
3395// Arguments:
3396// ins - instruction to generate
3397// attr - emitter attribute for instruction
3398// storeInd - indir for RMW addressing mode
3399//
3400// Assumptions:
3401// Lowering has taken care of recognizing the StoreInd pattern of:
3402// StoreInd( AddressTree, UnaryOp( Ind ( AddressTree ) ) )
3403// The address to store is already sitting in a register.
3404//
3405// Notes:
3406// This is a no-produce operation, meaning that no register output will
3407// be produced for future use in the code stream.
3408//
3409void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd)
3410{
3411 GenTree* addr = storeInd->Addr();
3412 addr = addr->gtSkipReloadOrCopy();
3413 assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_CLS_VAR_ADDR ||
3414 addr->OperGet() == GT_LEA || addr->OperGet() == GT_CNS_INT);
3415
3416 ssize_t offset = 0;
3417 if (addr->OperGet() != GT_CLS_VAR_ADDR)
3418 {
3419 offset = storeInd->Offset();
3420 }
3421
3422 instrDesc* id = emitNewInstrAmd(attr, offset);
3423 emitHandleMemOp(storeInd, id, IF_ARW, ins);
3424 id->idIns(ins);
3425 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3426 id->idCodeSize(sz);
3427
3428 dispIns(id);
3429 emitCurIGsize += sz;
3430}
3431
3432/*****************************************************************************
3433 *
3434 * Add an instruction referencing a single register.
3435 */
3436
3437void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
3438{
3439 emitAttr size = EA_SIZE(attr);
3440
3441 assert(size <= EA_PTRSIZE);
3442 noway_assert(emitVerifyEncodable(ins, size, reg));
3443
3444 UNATIVE_OFFSET sz;
3445 instrDesc* id = emitNewInstrSmall(attr);
3446
3447 switch (ins)
3448 {
3449 case INS_inc:
3450 case INS_dec:
3451#ifdef _TARGET_AMD64_
3452
3453 sz = 2; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix)
3454
3455#else // !_TARGET_AMD64_
3456
3457 if (size == EA_1BYTE)
3458 sz = 2; // Use the long form as the small one has no 'w' bit
3459 else
3460 sz = 1; // Use short form
3461
3462#endif // !_TARGET_AMD64_
3463
3464 break;
3465
3466 case INS_pop:
3467 case INS_pop_hide:
3468 case INS_push:
3469 case INS_push_hide:
3470
3471 /* We don't currently push/pop small values */
3472
3473 assert(size == EA_PTRSIZE);
3474
3475 sz = 1;
3476 break;
3477
3478 default:
3479
3480 /* All the sixteen INS_setCCs are contiguous. */
3481
3482 if (INS_seto <= ins && ins <= INS_setg)
3483 {
3484 // Rough check that we used the endpoints for the range check
3485
3486 assert(INS_seto + 0xF == INS_setg);
3487
3488 // The caller must specify EA_1BYTE for 'attr'
3489
3490 assert(attr == EA_1BYTE);
3491
3492 /* We expect this to always be a 'big' opcode */
3493
3494 assert(insEncodeMRreg(ins, reg, attr, insCodeMR(ins)) & 0x00FF0000);
3495
3496 size = attr;
3497
3498 sz = 3;
3499 break;
3500 }
3501 else
3502 {
3503 sz = 2;
3504 break;
3505 }
3506 }
3507 insFormat fmt = emitInsModeFormat(ins, IF_RRD);
3508
3509 id->idIns(ins);
3510 id->idInsFmt(fmt);
3511 id->idReg1(reg);
3512
3513 // 16-bit operand instructions will need a prefix.
3514 // This refers to 66h size prefix override.
3515 if (size == EA_2BYTE)
3516 {
3517 sz += 1;
3518 }
3519
3520 // Vex bytes
3521 sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins)));
3522
3523 // REX byte
3524 if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr))
3525 {
3526 sz += emitGetRexPrefixSize(ins);
3527 }
3528
3529 id->idCodeSize(sz);
3530
3531 dispIns(id);
3532 emitCurIGsize += sz;
3533
3534 emitAdjustStackDepthPushPop(ins);
3535}
3536
3537/*****************************************************************************
3538 *
3539 * Add an instruction referencing a register and a constant.
3540 */
3541
3542void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val)
3543{
3544 emitAttr size = EA_SIZE(attr);
3545
3546 // Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE
3547 assert(size <= EA_PTRSIZE || IsSSEOrAVXInstruction(ins));
3548
3549 noway_assert(emitVerifyEncodable(ins, size, reg));
3550
3551#ifdef _TARGET_AMD64_
3552 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3553 // all other opcodes take a sign-extended 4-byte immediate
3554 noway_assert(size < EA_8BYTE || ins == INS_mov || ((int)val == val && !EA_IS_CNS_RELOC(attr)));
3555#endif
3556
3557 UNATIVE_OFFSET sz;
3558 instrDesc* id;
3559 insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS);
3560 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
3561
3562 // BT reg,imm might be useful but it requires special handling of the immediate value
3563 // (it is always encoded in a byte). Let's not complicate things until this is needed.
3564 assert(ins != INS_bt);
3565
3566 // Figure out the size of the instruction
3567 switch (ins)
3568 {
3569 case INS_mov:
3570#ifdef _TARGET_AMD64_
3571 // mov reg, imm64 is equivalent to mov reg, imm32 if the high order bits are all 0
3572 // and this isn't a reloc constant.
3573 if (((size > EA_4BYTE) && (0 == (val & 0xFFFFFFFF00000000LL))) && !EA_IS_CNS_RELOC(attr))
3574 {
3575 attr = size = EA_4BYTE;
3576 }
3577
3578 if (size > EA_4BYTE)
3579 {
3580 sz = 9; // Really it is 10, but we'll add one more later
3581 break;
3582 }
3583#endif // _TARGET_AMD64_
3584 sz = 5;
3585 break;
3586
3587 case INS_rcl_N:
3588 case INS_rcr_N:
3589 case INS_rol_N:
3590 case INS_ror_N:
3591 case INS_shl_N:
3592 case INS_shr_N:
3593 case INS_sar_N:
3594 assert(val != 1);
3595 fmt = IF_RRW_SHF;
3596 sz = 3;
3597 val &= 0x7F;
3598 valInByte = true; // shift amount always placed in a byte
3599 break;
3600
3601 default:
3602
3603 if (EA_IS_CNS_RELOC(attr))
3604 {
3605 valInByte = false; // relocs can't be placed in a byte
3606 }
3607
3608 if (valInByte)
3609 {
3610 if (IsSSEOrAVXInstruction(ins))
3611 {
3612 sz = 5;
3613 }
3614 else if (size == EA_1BYTE && reg == REG_EAX && !instrIs3opImul(ins))
3615 {
3616 sz = 2;
3617 }
3618 else
3619 {
3620 sz = 3;
3621 }
3622 }
3623 else
3624 {
3625 if (reg == REG_EAX && !instrIs3opImul(ins))
3626 {
3627 sz = 1;
3628 }
3629 else
3630 {
3631 sz = 2;
3632 }
3633
3634#ifdef _TARGET_AMD64_
3635 if (size > EA_4BYTE)
3636 {
3637 // We special-case anything that takes a full 8-byte constant.
3638 sz += 4;
3639 }
3640 else
3641#endif // _TARGET_AMD64_
3642 {
3643 sz += EA_SIZE_IN_BYTES(attr);
3644 }
3645 }
3646 break;
3647 }
3648
3649 // Vex prefix size
3650 sz += emitGetVexPrefixSize(ins, attr);
3651
3652 // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
3653 // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
3654 // register. So we also need to check if that built-in register is an extended register.
3655 if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins))
3656 {
3657 sz += emitGetRexPrefixSize(ins);
3658 }
3659
3660 id = emitNewInstrSC(attr, val);
3661 id->idIns(ins);
3662 id->idInsFmt(fmt);
3663 id->idReg1(reg);
3664
3665 // 16-bit operand instructions will need a prefix
3666 if (size == EA_2BYTE)
3667 {
3668 sz += 1;
3669 }
3670
3671 id->idCodeSize(sz);
3672
3673 dispIns(id);
3674 emitCurIGsize += sz;
3675
3676 if (reg == REG_ESP)
3677 {
3678 emitAdjustStackDepth(ins, val);
3679 }
3680}
3681
3682/*****************************************************************************
3683 *
3684 * Add an instruction referencing an integer constant.
3685 */
3686
3687void emitter::emitIns_I(instruction ins, emitAttr attr, int val)
3688{
3689 UNATIVE_OFFSET sz;
3690 instrDesc* id;
3691 bool valInByte = ((signed char)val == val);
3692
3693#ifdef _TARGET_AMD64_
3694 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3695 // all other opcodes take a sign-extended 4-byte immediate
3696 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
3697#endif
3698
3699 if (EA_IS_CNS_RELOC(attr))
3700 {
3701 valInByte = false; // relocs can't be placed in a byte
3702 }
3703
3704 switch (ins)
3705 {
3706 case INS_loop:
3707 case INS_jge:
3708 sz = 2;
3709 break;
3710
3711 case INS_ret:
3712 sz = 3;
3713 break;
3714
3715 case INS_push_hide:
3716 case INS_push:
3717 sz = valInByte ? 2 : 5;
3718 break;
3719
3720 default:
3721 NO_WAY("unexpected instruction");
3722 }
3723
3724 id = emitNewInstrSC(attr, val);
3725 id->idIns(ins);
3726 id->idInsFmt(IF_CNS);
3727 id->idCodeSize(sz);
3728
3729 dispIns(id);
3730 emitCurIGsize += sz;
3731
3732 emitAdjustStackDepthPushPop(ins);
3733}
3734
3735/*****************************************************************************
3736 *
3737 * Add a "jump through a table" instruction.
3738 */
3739
3740void emitter::emitIns_IJ(emitAttr attr, regNumber reg, unsigned base)
3741{
3742 assert(EA_SIZE(attr) == EA_4BYTE);
3743
3744 UNATIVE_OFFSET sz = 3 + 4;
3745 const instruction ins = INS_i_jmp;
3746
3747 if (IsExtendedReg(reg, attr))
3748 {
3749 sz += emitGetRexPrefixSize(ins);
3750 }
3751
3752 instrDesc* id = emitNewInstrAmd(attr, base);
3753
3754 id->idIns(ins);
3755 id->idInsFmt(IF_ARD);
3756 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
3757 id->idAddr()->iiaAddrMode.amIndxReg = reg;
3758 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZP;
3759
3760#ifdef DEBUG
3761 id->idDebugOnlyInfo()->idMemCookie = base;
3762#endif
3763
3764 id->idCodeSize(sz);
3765
3766 dispIns(id);
3767 emitCurIGsize += sz;
3768}
3769
3770/*****************************************************************************
3771 *
3772 * Add an instruction with a static data member operand. If 'size' is 0, the
3773 * instruction operates on the address of the static member instead of its
3774 * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
3775 */
3776
3777void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
3778{
3779 // Static always need relocs
3780 if (!jitStaticFldIsGlobAddr(fldHnd))
3781 {
3782 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
3783 }
3784
3785 UNATIVE_OFFSET sz;
3786 instrDesc* id;
3787
3788 /* Are we pushing the offset of the class variable? */
3789
3790 if (EA_IS_OFFSET(attr))
3791 {
3792 assert(ins == INS_push);
3793 sz = 1 + TARGET_POINTER_SIZE;
3794
3795 id = emitNewInstrDsp(EA_1BYTE, offs);
3796 id->idIns(ins);
3797 id->idInsFmt(IF_MRD_OFF);
3798 }
3799 else
3800 {
3801 insFormat fmt = emitInsModeFormat(ins, IF_MRD);
3802
3803 id = emitNewInstrDsp(attr, offs);
3804 id->idIns(ins);
3805 id->idInsFmt(fmt);
3806 sz = emitInsSizeCV(id, insCodeMR(ins));
3807 }
3808
3809 // Vex prefix size
3810 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
3811
3812 if (TakesRexWPrefix(ins, attr))
3813 {
3814 // REX.W prefix
3815 sz += emitGetRexPrefixSize(ins);
3816 }
3817
3818 id->idAddr()->iiaFieldHnd = fldHnd;
3819
3820 id->idCodeSize(sz);
3821
3822 dispIns(id);
3823 emitCurIGsize += sz;
3824
3825 emitAdjustStackDepthPushPop(ins);
3826}
3827
3828/*****************************************************************************
3829 *
3830 * Add an instruction with two register operands.
3831 */
3832
3833void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2)
3834{
3835 emitAttr size = EA_SIZE(attr);
3836
3837 /* We don't want to generate any useless mov instructions! */
3838 CLANG_FORMAT_COMMENT_ANCHOR;
3839
3840#ifdef _TARGET_AMD64_
3841 // Same-reg 4-byte mov can be useful because it performs a
3842 // zero-extension to 8 bytes.
3843 assert(ins != INS_mov || reg1 != reg2 || size == EA_4BYTE);
3844#else
3845 assert(ins != INS_mov || reg1 != reg2);
3846#endif // _TARGET_AMD64_
3847
3848 assert(size <= EA_32BYTE);
3849 noway_assert(emitVerifyEncodable(ins, size, reg1, reg2));
3850
3851 UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr);
3852
3853 if (Is4ByteSSEInstruction(ins))
3854 {
3855 // The 4-Byte SSE instructions require one additional byte
3856 sz += 1;
3857 }
3858
3859 /* Special case: "XCHG" uses a different format */
3860 insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD);
3861
3862 instrDesc* id = emitNewInstrSmall(attr);
3863 id->idIns(ins);
3864 id->idInsFmt(fmt);
3865 id->idReg1(reg1);
3866 id->idReg2(reg2);
3867 id->idCodeSize(sz);
3868
3869 dispIns(id);
3870 emitCurIGsize += sz;
3871}
3872
3873/*****************************************************************************
3874 *
3875 * Add an instruction with two register operands and an integer constant.
3876 */
3877
3878void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival)
3879{
3880 // SSE2 version requires 5 bytes and some SSE/AVX version 6 bytes
3881 UNATIVE_OFFSET sz = 4;
3882 if (IsSSEOrAVXInstruction(ins))
3883 {
3884 // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate
3885 // SSE: 4 byte opcode + 1 byte ModR/M + 1 byte immediate
3886 // SSE: 3 byte opcode + 1 byte ModR/M + 1 byte immediate
3887 sz = (UseVEXEncoding() || Is4ByteSSEInstruction(ins)) ? 6 : 5;
3888 }
3889
3890#ifdef _TARGET_AMD64_
3891 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3892 // all other opcodes take a sign-extended 4-byte immediate
3893 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
3894#endif
3895
3896 instrDesc* id = emitNewInstrSC(attr, ival);
3897
3898 // REX prefix
3899 if (IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr))
3900 {
3901 sz += emitGetRexPrefixSize(ins);
3902 }
3903
3904 if ((ins == INS_pextrq || ins == INS_pinsrq) && !UseVEXEncoding())
3905 {
3906 sz += 1;
3907 }
3908
3909 id->idIns(ins);
3910 id->idInsFmt(IF_RRW_RRW_CNS);
3911 id->idReg1(reg1);
3912 id->idReg2(reg2);
3913 id->idCodeSize(sz);
3914
3915 dispIns(id);
3916 emitCurIGsize += sz;
3917}
3918
3919void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs)
3920{
3921 assert(ins == INS_prefetcht0 || ins == INS_prefetcht1 || ins == INS_prefetcht2 || ins == INS_prefetchnta);
3922
3923 instrDesc* id = emitNewInstrAmd(attr, offs);
3924
3925 id->idIns(ins);
3926
3927 id->idInsFmt(IF_ARD);
3928 id->idAddr()->iiaAddrMode.amBaseReg = base;
3929 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
3930
3931 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3932 id->idCodeSize(sz);
3933
3934 dispIns(id);
3935 emitCurIGsize += sz;
3936}
3937
3938//------------------------------------------------------------------------
3939// emitIns_AR_R_R: emits the code for an instruction that takes a base memory register, two register operands
3940// and that does not return a value
3941//
3942// Arguments:
3943// ins -- The instruction being emitted
3944// attr -- The emit attribute
3945// targetReg -- The target register
3946// op2Reg -- The register of the second operand
3947// op3Reg -- The register of the third operand
3948// base -- The base register used for the memory address (first operand)
3949// offs -- The offset from base
3950//
3951void emitter::emitIns_AR_R_R(
3952 instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs)
3953{
3954 assert(IsSSEOrAVXInstruction(ins));
3955 assert(IsThreeOperandAVXInstruction(ins));
3956
3957 instrDesc* id = emitNewInstrAmd(attr, offs);
3958
3959 id->idIns(ins);
3960 id->idReg1(op2Reg);
3961 id->idReg2(op3Reg);
3962
3963 id->idInsFmt(IF_AWR_RRD_RRD);
3964 id->idAddr()->iiaAddrMode.amBaseReg = base;
3965 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
3966
3967 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3968 id->idCodeSize(sz);
3969
3970 dispIns(id);
3971 emitCurIGsize += sz;
3972}
3973
3974void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir)
3975{
3976 ssize_t offs = indir->Offset();
3977 instrDesc* id = emitNewInstrAmd(attr, offs);
3978
3979 id->idIns(ins);
3980 id->idReg1(reg1);
3981
3982 emitHandleMemOp(indir, id, IF_RRW_ARD, ins);
3983
3984 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
3985 id->idCodeSize(sz);
3986
3987 dispIns(id);
3988 emitCurIGsize += sz;
3989}
3990
3991void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival)
3992{
3993 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
3994 assert(IsSSEOrAVXInstruction(ins));
3995
3996 ssize_t offs = indir->Offset();
3997 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
3998
3999 id->idIns(ins);
4000 id->idReg1(reg1);
4001
4002 emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins);
4003
4004 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4005
4006 if (Is4ByteSSEInstruction(ins))
4007 {
4008 // The 4-Byte SSE instructions require two additional bytes
4009 sz += 2;
4010 }
4011
4012 id->idCodeSize(sz);
4013
4014 dispIns(id);
4015 emitCurIGsize += sz;
4016}
4017
4018void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival)
4019{
4020 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4021 assert(IsSSEOrAVXInstruction(ins));
4022
4023 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4024
4025 id->idIns(ins);
4026 id->idReg1(reg1);
4027
4028 id->idInsFmt(IF_RRW_ARD_CNS);
4029 id->idAddr()->iiaAddrMode.amBaseReg = base;
4030 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4031
4032 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4033
4034 if (Is4ByteSSEInstruction(ins))
4035 {
4036 // The 4-Byte SSE instructions require two additional bytes
4037 sz += 2;
4038 }
4039
4040 id->idCodeSize(sz);
4041
4042 dispIns(id);
4043 emitCurIGsize += sz;
4044}
4045
4046void emitter::emitIns_R_C_I(
4047 instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4048{
4049 // Static always need relocs
4050 if (!jitStaticFldIsGlobAddr(fldHnd))
4051 {
4052 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4053 }
4054
4055 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4056 assert(IsSSEOrAVXInstruction(ins));
4057
4058 instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4059
4060 id->idIns(ins);
4061 id->idInsFmt(IF_RRW_MRD_CNS);
4062 id->idReg1(reg1);
4063 id->idAddr()->iiaFieldHnd = fldHnd;
4064
4065 UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4066
4067 if (Is4ByteSSEInstruction(ins))
4068 {
4069 // The 4-Byte SSE instructions require two additional bytes
4070 sz += 2;
4071 }
4072
4073 id->idCodeSize(sz);
4074
4075 dispIns(id);
4076 emitCurIGsize += sz;
4077}
4078
4079void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival)
4080{
4081 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4082 assert(IsSSEOrAVXInstruction(ins));
4083
4084 instrDesc* id = emitNewInstrCns(attr, ival);
4085
4086 id->idIns(ins);
4087 id->idInsFmt(IF_RRW_SRD_CNS);
4088 id->idReg1(reg1);
4089 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4090
4091#ifdef DEBUG
4092 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4093#endif
4094
4095 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4096
4097 if (Is4ByteSSEInstruction(ins))
4098 {
4099 // The 4-Byte SSE instructions require two additional bytes
4100 sz += 2;
4101 }
4102
4103 id->idCodeSize(sz);
4104
4105 dispIns(id);
4106 emitCurIGsize += sz;
4107}
4108
4109void emitter::emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir)
4110{
4111 assert(IsSSEOrAVXInstruction(ins));
4112 assert(IsThreeOperandAVXInstruction(ins));
4113
4114 ssize_t offs = indir->Offset();
4115 instrDesc* id = emitNewInstrAmd(attr, offs);
4116
4117 id->idIns(ins);
4118 id->idReg1(reg1);
4119 id->idReg2(reg2);
4120
4121 emitHandleMemOp(indir, id, IF_RWR_RRD_ARD, ins);
4122
4123 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4124 id->idCodeSize(sz);
4125
4126 dispIns(id);
4127 emitCurIGsize += sz;
4128}
4129
4130void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs)
4131{
4132 assert(IsSSEOrAVXInstruction(ins));
4133 assert(IsThreeOperandAVXInstruction(ins));
4134
4135 instrDesc* id = emitNewInstrAmd(attr, offs);
4136
4137 id->idIns(ins);
4138 id->idReg1(reg1);
4139 id->idReg2(reg2);
4140
4141 id->idInsFmt(IF_RWR_RRD_ARD);
4142 id->idAddr()->iiaAddrMode.amBaseReg = base;
4143 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4144
4145 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4146 id->idCodeSize(sz);
4147
4148 dispIns(id);
4149 emitCurIGsize += sz;
4150}
4151
4152//------------------------------------------------------------------------
4153// IsAVX2GatherInstruction: return true if the instruction is AVX2 Gather
4154//
4155// Arguments:
4156// ins - the instruction to check
4157// Return Value:
4158// true if the instruction is AVX2 Gather
4159//
4160bool IsAVX2GatherInstruction(instruction ins)
4161{
4162 switch (ins)
4163 {
4164 case INS_vpgatherdd:
4165 case INS_vpgatherdq:
4166 case INS_vpgatherqd:
4167 case INS_vpgatherqq:
4168 case INS_vgatherdps:
4169 case INS_vgatherdpd:
4170 case INS_vgatherqps:
4171 case INS_vgatherqpd:
4172 return true;
4173 default:
4174 return false;
4175 }
4176}
4177
4178//------------------------------------------------------------------------
4179// emitIns_R_AR_R: Emits an AVX2 Gather instructions
4180//
4181// Arguments:
4182// ins - the instruction to emit
4183// attr - the instruction operand size
4184// reg1 - the destination and first source operand
4185// reg2 - the mask operand (encoded in VEX.vvvv)
4186// base - the base register of address to load
4187// index - the index register of VSIB
4188// scale - the scale number of VSIB
4189// offs - the offset added to the memory address from base
4190//
4191void emitter::emitIns_R_AR_R(instruction ins,
4192 emitAttr attr,
4193 regNumber reg1,
4194 regNumber reg2,
4195 regNumber base,
4196 regNumber index,
4197 int scale,
4198 int offs)
4199{
4200 assert(IsAVX2GatherInstruction(ins));
4201
4202 instrDesc* id = emitNewInstrAmd(attr, offs);
4203
4204 id->idIns(ins);
4205 id->idReg1(reg1);
4206 id->idReg2(reg2);
4207
4208 id->idInsFmt(IF_RWR_ARD_RRD);
4209 id->idAddr()->iiaAddrMode.amBaseReg = base;
4210 id->idAddr()->iiaAddrMode.amIndxReg = index;
4211 id->idAddr()->iiaAddrMode.amScale = emitEncodeSize((emitAttr)scale);
4212
4213 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4214 id->idCodeSize(sz);
4215
4216 dispIns(id);
4217 emitCurIGsize += sz;
4218}
4219
4220void emitter::emitIns_R_R_C(
4221 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs)
4222{
4223 assert(IsSSEOrAVXInstruction(ins));
4224 assert(IsThreeOperandAVXInstruction(ins));
4225
4226 // Static always need relocs
4227 if (!jitStaticFldIsGlobAddr(fldHnd))
4228 {
4229 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4230 }
4231
4232 instrDesc* id = emitNewInstrDsp(attr, offs);
4233
4234 id->idIns(ins);
4235 id->idInsFmt(IF_RWR_RRD_MRD);
4236 id->idReg1(reg1);
4237 id->idReg2(reg2);
4238 id->idAddr()->iiaFieldHnd = fldHnd;
4239
4240 UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
4241 id->idCodeSize(sz);
4242
4243 dispIns(id);
4244 emitCurIGsize += sz;
4245}
4246
4247/*****************************************************************************
4248*
4249* Add an instruction with three register operands.
4250*/
4251
4252void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2)
4253{
4254 assert(IsSSEOrAVXInstruction(ins));
4255 assert(IsThreeOperandAVXInstruction(ins));
4256 // Currently vex prefix only use three bytes mode.
4257 // size = vex + opcode + ModR/M = 3 + 1 + 1 = 5
4258 // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4259 UNATIVE_OFFSET sz = 5;
4260
4261 instrDesc* id = emitNewInstr(attr);
4262 id->idIns(ins);
4263 id->idInsFmt(IF_RWR_RRD_RRD);
4264 id->idReg1(targetReg);
4265 id->idReg2(reg1);
4266 id->idReg3(reg2);
4267
4268 id->idCodeSize(sz);
4269 dispIns(id);
4270 emitCurIGsize += sz;
4271}
4272
4273void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs)
4274{
4275 assert(IsSSEOrAVXInstruction(ins));
4276 assert(IsThreeOperandAVXInstruction(ins));
4277
4278 instrDesc* id = emitNewInstr(attr);
4279
4280 id->idIns(ins);
4281 id->idInsFmt(IF_RWR_RRD_SRD);
4282 id->idReg1(reg1);
4283 id->idReg2(reg2);
4284 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4285
4286#ifdef DEBUG
4287 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4288#endif
4289
4290 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs);
4291 id->idCodeSize(sz);
4292
4293 dispIns(id);
4294 emitCurIGsize += sz;
4295}
4296
4297void emitter::emitIns_R_R_A_I(
4298 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt)
4299{
4300 assert(IsSSEOrAVXInstruction(ins));
4301 assert(IsThreeOperandAVXInstruction(ins));
4302
4303 ssize_t offs = indir->Offset();
4304 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4305
4306 id->idIns(ins);
4307 id->idReg1(reg1);
4308 id->idReg2(reg2);
4309
4310 emitHandleMemOp(indir, id, fmt, ins);
4311
4312 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4313 id->idCodeSize(sz);
4314
4315 dispIns(id);
4316 emitCurIGsize += sz;
4317}
4318
4319void emitter::emitIns_R_R_AR_I(
4320 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival)
4321{
4322 assert(IsSSEOrAVXInstruction(ins));
4323 assert(IsThreeOperandAVXInstruction(ins));
4324
4325 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4326
4327 id->idIns(ins);
4328 id->idReg1(reg1);
4329 id->idReg2(reg2);
4330
4331 id->idInsFmt(IF_RWR_RRD_ARD_CNS);
4332 id->idAddr()->iiaAddrMode.amBaseReg = base;
4333 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4334
4335 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4336 id->idCodeSize(sz);
4337
4338 dispIns(id);
4339 emitCurIGsize += sz;
4340}
4341
4342void emitter::emitIns_R_R_C_I(
4343 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4344{
4345 assert(IsSSEOrAVXInstruction(ins));
4346 assert(IsThreeOperandAVXInstruction(ins));
4347
4348 // Static always need relocs
4349 if (!jitStaticFldIsGlobAddr(fldHnd))
4350 {
4351 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4352 }
4353
4354 instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4355
4356 id->idIns(ins);
4357 id->idInsFmt(IF_RWR_RRD_MRD_CNS);
4358 id->idReg1(reg1);
4359 id->idReg2(reg2);
4360 id->idAddr()->iiaFieldHnd = fldHnd;
4361
4362 UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4363 id->idCodeSize(sz);
4364
4365 dispIns(id);
4366 emitCurIGsize += sz;
4367}
4368
4369/**********************************************************************************
4370* emitIns_R_R_R_I: Add an instruction with three register operands and an immediate.
4371*
4372* Arguments:
4373* ins - the instruction to add
4374* attr - the emitter attribute for instruction
4375* targetReg - the target (destination) register
4376* reg1 - the first source register
4377* reg2 - the second source register
4378* ival - the immediate value
4379*/
4380
4381void emitter::emitIns_R_R_R_I(
4382 instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, int ival)
4383{
4384 assert(IsSSEOrAVXInstruction(ins));
4385 assert(IsThreeOperandAVXInstruction(ins));
4386 // Currently vex prefix only use three bytes mode.
4387 // size = vex + opcode + ModR/M + 1-byte-cns = 3 + 1 + 1 + 1 = 6
4388 // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4389 UNATIVE_OFFSET sz = 6;
4390
4391 instrDesc* id = emitNewInstrCns(attr, ival);
4392 id->idIns(ins);
4393 id->idInsFmt(IF_RWR_RRD_RRD_CNS);
4394 id->idReg1(targetReg);
4395 id->idReg2(reg1);
4396 id->idReg3(reg2);
4397
4398 id->idCodeSize(sz);
4399 dispIns(id);
4400 emitCurIGsize += sz;
4401}
4402
4403void emitter::emitIns_R_R_S_I(
4404 instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival)
4405{
4406 assert(IsSSEOrAVXInstruction(ins));
4407 assert(IsThreeOperandAVXInstruction(ins));
4408
4409 instrDesc* id = emitNewInstrCns(attr, ival);
4410
4411 id->idIns(ins);
4412 id->idInsFmt(IF_RWR_RRD_SRD_CNS);
4413 id->idReg1(reg1);
4414 id->idReg2(reg2);
4415 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4416
4417#ifdef DEBUG
4418 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4419#endif
4420
4421 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4422 id->idCodeSize(sz);
4423
4424 dispIns(id);
4425 emitCurIGsize += sz;
4426}
4427
4428//------------------------------------------------------------------------
4429// encodeXmmRegAsIval: Encodes a XMM register into imm[7:4] for use by a SIMD instruction
4430//
4431// Arguments
4432// opReg -- The register being encoded
4433//
4434// Returns:
4435// opReg encoded in imm[7:4]
4436static int encodeXmmRegAsIval(regNumber opReg)
4437{
4438 // AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb,
4439 // which encodes the fourth register into imm8[7:4]
4440 assert(opReg >= XMMBASE);
4441 int ival = (opReg - XMMBASE) << 4;
4442
4443 assert((ival >= 0) && (ival <= 255));
4444 return (int8_t)ival;
4445}
4446
4447//------------------------------------------------------------------------
4448// emitIns_R_R_A_R: emits the code for an instruction that takes a register operand, a GenTreeIndir address,
4449// another register operand, and that returns a value in register
4450//
4451// Arguments:
4452// ins -- The instruction being emitted
4453// attr -- The emit attribute
4454// targetReg -- The target register
4455// op1Reg -- The register of the first operand
4456// op3Reg -- The register of the third operand
4457// indir -- The GenTreeIndir used for the memory address
4458//
4459// Remarks:
4460// op2 is built from indir
4461//
4462void emitter::emitIns_R_R_A_R(
4463 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir)
4464{
4465 assert(isAvxBlendv(ins));
4466 assert(UseVEXEncoding());
4467
4468 int ival = encodeXmmRegAsIval(op3Reg);
4469 ssize_t offs = indir->Offset();
4470 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4471
4472 id->idIns(ins);
4473 id->idReg1(targetReg);
4474 id->idReg2(op1Reg);
4475
4476 emitHandleMemOp(indir, id, IF_RWR_RRD_ARD_RRD, ins);
4477
4478 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4479 id->idCodeSize(sz);
4480
4481 dispIns(id);
4482 emitCurIGsize += sz;
4483}
4484
4485//------------------------------------------------------------------------
4486// emitIns_R_R_AR_R: emits the code for an instruction that takes a register operand, a base memory
4487// register, another register operand, and that returns a value in register
4488//
4489// Arguments:
4490// ins -- The instruction being emitted
4491// attr -- The emit attribute
4492// targetReg -- The target register
4493// op1Reg -- The register of the first operands
4494// op3Reg -- The register of the third operand
4495// base -- The base register used for the memory address
4496// offs -- The offset added to the memory address from base
4497//
4498// Remarks:
4499// op2 is built from base + offs
4500//
4501void emitter::emitIns_R_R_AR_R(
4502 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base, int offs)
4503{
4504 assert(isAvxBlendv(ins));
4505 assert(UseVEXEncoding());
4506
4507 int ival = encodeXmmRegAsIval(op3Reg);
4508 instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4509
4510 id->idIns(ins);
4511 id->idReg1(targetReg);
4512 id->idReg2(op1Reg);
4513
4514 id->idInsFmt(IF_RWR_RRD_ARD_RRD);
4515 id->idAddr()->iiaAddrMode.amBaseReg = base;
4516 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4517
4518 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4519 id->idCodeSize(sz);
4520
4521 dispIns(id);
4522 emitCurIGsize += sz;
4523}
4524
4525//------------------------------------------------------------------------
4526// emitIns_R_R_C_R: emits the code for an instruction that takes a register operand, a field handle +
4527// offset, another register operand, and that returns a value in register
4528//
4529// Arguments:
4530// ins -- The instruction being emitted
4531// attr -- The emit attribute
4532// targetReg -- The target register
4533// op1Reg -- The register of the first operand
4534// op3Reg -- The register of the third operand
4535// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
4536// offs -- The offset added to the memory address from fldHnd
4537//
4538// Remarks:
4539// op2 is built from fldHnd + offs
4540//
4541void emitter::emitIns_R_R_C_R(instruction ins,
4542 emitAttr attr,
4543 regNumber targetReg,
4544 regNumber op1Reg,
4545 regNumber op3Reg,
4546 CORINFO_FIELD_HANDLE fldHnd,
4547 int offs)
4548{
4549 assert(isAvxBlendv(ins));
4550 assert(UseVEXEncoding());
4551
4552 // Static always need relocs
4553 if (!jitStaticFldIsGlobAddr(fldHnd))
4554 {
4555 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4556 }
4557
4558 int ival = encodeXmmRegAsIval(op3Reg);
4559 instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4560
4561 id->idIns(ins);
4562 id->idReg1(targetReg);
4563 id->idReg2(op1Reg);
4564
4565 id->idInsFmt(IF_RWR_RRD_MRD_RRD);
4566 id->idAddr()->iiaFieldHnd = fldHnd;
4567
4568 UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4569 id->idCodeSize(sz);
4570
4571 dispIns(id);
4572 emitCurIGsize += sz;
4573}
4574
4575//------------------------------------------------------------------------
4576// emitIns_R_R_R_S: emits the code for a instruction that takes a register operand, a variable index +
4577// offset, another register operand, and that returns a value in register
4578//
4579// Arguments:
4580// ins -- The instruction being emitted
4581// attr -- The emit attribute
4582// targetReg -- The target register
4583// op1Reg -- The register of the first operand
4584// op3Reg -- The register of the third operand
4585// varx -- The variable index used for the memory address
4586// offs -- The offset added to the memory address from varx
4587//
4588// Remarks:
4589// op2 is built from varx + offs
4590//
4591void emitter::emitIns_R_R_S_R(
4592 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs)
4593{
4594 assert(isAvxBlendv(ins));
4595 assert(UseVEXEncoding());
4596
4597 int ival = encodeXmmRegAsIval(op3Reg);
4598 instrDesc* id = emitNewInstrCns(attr, ival);
4599
4600 id->idIns(ins);
4601 id->idReg1(targetReg);
4602 id->idReg2(op1Reg);
4603
4604 id->idInsFmt(IF_RWR_RRD_SRD_RRD);
4605 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4606
4607 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4608 id->idCodeSize(sz);
4609
4610 dispIns(id);
4611 emitCurIGsize += sz;
4612}
4613
4614void emitter::emitIns_R_R_R_R(
4615 instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, regNumber reg3)
4616{
4617 assert(isAvxBlendv(ins));
4618 assert(UseVEXEncoding());
4619 // Currently vex prefix only use three bytes mode.
4620 // size = vex + opcode + ModR/M + 1-byte-cns(Reg) = 3 + 1 + 1 + 1 = 6
4621 // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4622 UNATIVE_OFFSET sz = 6;
4623
4624 int ival = encodeXmmRegAsIval(reg3);
4625 instrDesc* id = emitNewInstrCns(attr, ival);
4626
4627 id->idIns(ins);
4628 id->idInsFmt(IF_RWR_RRD_RRD_RRD);
4629 id->idReg1(targetReg);
4630 id->idReg2(reg1);
4631 id->idReg3(reg2);
4632 id->idReg4(reg3);
4633
4634 id->idCodeSize(sz);
4635 dispIns(id);
4636 emitCurIGsize += sz;
4637}
4638
4639/*****************************************************************************
4640 *
4641 * Add an instruction with a register + static member operands.
4642 */
4643void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
4644{
4645 // Static always need relocs
4646 if (!jitStaticFldIsGlobAddr(fldHnd))
4647 {
4648 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4649 }
4650
4651 emitAttr size = EA_SIZE(attr);
4652
4653 assert(size <= EA_32BYTE);
4654 noway_assert(emitVerifyEncodable(ins, size, reg));
4655
4656 UNATIVE_OFFSET sz;
4657 instrDesc* id;
4658
4659 // Are we MOV'ing the offset of the class variable into EAX?
4660 if (EA_IS_OFFSET(attr))
4661 {
4662 id = emitNewInstrDsp(EA_1BYTE, offs);
4663 id->idIns(ins);
4664 id->idInsFmt(IF_RWR_MRD_OFF);
4665
4666 assert(ins == INS_mov && reg == REG_EAX);
4667
4668 // Special case: "mov eax, [addr]" is smaller
4669 sz = 1 + TARGET_POINTER_SIZE;
4670 }
4671 else
4672 {
4673 insFormat fmt = emitInsModeFormat(ins, IF_RRD_MRD);
4674
4675 id = emitNewInstrDsp(attr, offs);
4676 id->idIns(ins);
4677 id->idInsFmt(fmt);
4678
4679#ifdef _TARGET_X86_
4680 // Special case: "mov eax, [addr]" is smaller.
4681 // This case is not enabled for amd64 as it always uses RIP relative addressing
4682 // and it results in smaller instruction size than encoding 64-bit addr in the
4683 // instruction.
4684 if (ins == INS_mov && reg == REG_EAX)
4685 {
4686 sz = 1 + TARGET_POINTER_SIZE;
4687 if (size == EA_2BYTE)
4688 sz += 1;
4689 }
4690 else
4691#endif //_TARGET_X86_
4692 {
4693 sz = emitInsSizeCV(id, insCodeRM(ins));
4694 }
4695
4696 // Special case: mov reg, fs:[ddd]
4697 if (fldHnd == FLD_GLOBAL_FS)
4698 {
4699 sz += 1;
4700 }
4701 }
4702
4703 // VEX prefix
4704 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4705
4706 // REX prefix
4707 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
4708 {
4709 sz += emitGetRexPrefixSize(ins);
4710 }
4711
4712 id->idReg1(reg);
4713 id->idCodeSize(sz);
4714
4715 id->idAddr()->iiaFieldHnd = fldHnd;
4716
4717 dispIns(id);
4718 emitCurIGsize += sz;
4719}
4720
4721/*****************************************************************************
4722 *
4723 * Add an instruction with a static member + register operands.
4724 */
4725
4726void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
4727{
4728 // Static always need relocs
4729 if (!jitStaticFldIsGlobAddr(fldHnd))
4730 {
4731 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4732 }
4733
4734 emitAttr size = EA_SIZE(attr);
4735
4736#if defined(_TARGET_X86_)
4737 // For x86 it is valid to storeind a double sized operand in an xmm reg to memory
4738 assert(size <= EA_8BYTE);
4739#else
4740 assert(size <= EA_PTRSIZE);
4741#endif
4742
4743 noway_assert(emitVerifyEncodable(ins, size, reg));
4744
4745 instrDesc* id = emitNewInstrDsp(attr, offs);
4746 insFormat fmt = emitInsModeFormat(ins, IF_MRD_RRD);
4747
4748 id->idIns(ins);
4749 id->idInsFmt(fmt);
4750
4751 UNATIVE_OFFSET sz;
4752
4753#ifdef _TARGET_X86_
4754 // Special case: "mov [addr], EAX" is smaller.
4755 // This case is not enable for amd64 as it always uses RIP relative addressing
4756 // and it will result in smaller instruction size than encoding 64-bit addr in
4757 // the instruction.
4758 if (ins == INS_mov && reg == REG_EAX)
4759 {
4760 sz = 1 + TARGET_POINTER_SIZE;
4761 if (size == EA_2BYTE)
4762 sz += 1;
4763 }
4764 else
4765#endif //_TARGET_X86_
4766 {
4767 sz = emitInsSizeCV(id, insCodeMR(ins));
4768 }
4769
4770 // Special case: mov reg, fs:[ddd]
4771 if (fldHnd == FLD_GLOBAL_FS)
4772 {
4773 sz += 1;
4774 }
4775
4776 // VEX prefix
4777 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
4778
4779 // REX prefix
4780 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
4781 {
4782 sz += emitGetRexPrefixSize(ins);
4783 }
4784
4785 id->idReg1(reg);
4786 id->idCodeSize(sz);
4787
4788 id->idAddr()->iiaFieldHnd = fldHnd;
4789
4790 dispIns(id);
4791 emitCurIGsize += sz;
4792}
4793
4794/*****************************************************************************
4795 *
4796 * Add an instruction with a static member + constant.
4797 */
4798
4799void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val)
4800{
4801 // Static always need relocs
4802 if (!jitStaticFldIsGlobAddr(fldHnd))
4803 {
4804 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4805 }
4806
4807 insFormat fmt;
4808
4809 switch (ins)
4810 {
4811 case INS_rcl_N:
4812 case INS_rcr_N:
4813 case INS_rol_N:
4814 case INS_ror_N:
4815 case INS_shl_N:
4816 case INS_shr_N:
4817 case INS_sar_N:
4818 assert(val != 1);
4819 fmt = IF_MRW_SHF;
4820 val &= 0x7F;
4821 break;
4822
4823 default:
4824 fmt = emitInsModeFormat(ins, IF_MRD_CNS);
4825 break;
4826 }
4827
4828 instrDesc* id = emitNewInstrCnsDsp(attr, val, offs);
4829 id->idIns(ins);
4830 id->idInsFmt(fmt);
4831
4832 code_t code = insCodeMI(ins);
4833 UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val);
4834
4835 // Vex prefix
4836 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
4837
4838 // REX prefix, if not already included in "code"
4839 if (TakesRexWPrefix(ins, attr) && !hasRexPrefix(code))
4840 {
4841 sz += emitGetRexPrefixSize(ins);
4842 }
4843
4844 id->idAddr()->iiaFieldHnd = fldHnd;
4845 id->idCodeSize(sz);
4846
4847 dispIns(id);
4848 emitCurIGsize += sz;
4849}
4850
4851void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs)
4852{
4853 assert(ins == INS_mov);
4854 assert(dst->bbFlags & BBF_JMP_TARGET);
4855
4856 instrDescLbl* id = emitNewInstrLbl();
4857
4858 id->idIns(ins);
4859 id->idInsFmt(IF_SWR_LABEL);
4860 id->idAddr()->iiaBBlabel = dst;
4861
4862 /* The label reference is always long */
4863
4864 id->idjShort = 0;
4865 id->idjKeepLong = 1;
4866
4867 /* Record the current IG and offset within it */
4868
4869 id->idjIG = emitCurIG;
4870 id->idjOffs = emitCurIGsize;
4871
4872 /* Append this instruction to this IG's jump list */
4873
4874 id->idjNext = emitCurIGjmpList;
4875 emitCurIGjmpList = id;
4876
4877 UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(id, insCodeMI(ins), varx, offs);
4878 id->dstLclVar.initLclVarAddr(varx, offs);
4879#ifdef DEBUG
4880 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4881#endif
4882
4883#if EMITTER_STATS
4884 emitTotalIGjmps++;
4885#endif
4886
4887#ifndef _TARGET_AMD64_
4888 // Storing the address of a basicBlock will need a reloc
4889 // as the instruction uses the absolute address,
4890 // not a relative address.
4891 //
4892 // On Amd64, Absolute code addresses should always go through a reloc to
4893 // to be encoded as RIP rel32 offset.
4894 if (emitComp->opts.compReloc)
4895#endif
4896 {
4897 id->idSetIsDspReloc();
4898 }
4899
4900 id->idCodeSize(sz);
4901
4902 dispIns(id);
4903 emitCurIGsize += sz;
4904}
4905
4906/*****************************************************************************
4907 *
4908 * Add a label instruction.
4909 */
4910void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
4911{
4912 assert(ins == INS_lea);
4913 assert(dst->bbFlags & BBF_JMP_TARGET);
4914
4915 instrDescJmp* id = emitNewInstrJmp();
4916
4917 id->idIns(ins);
4918 id->idReg1(reg);
4919 id->idInsFmt(IF_RWR_LABEL);
4920 id->idOpSize(EA_SIZE(attr)); // emitNewInstrJmp() sets the size (incorrectly) to EA_1BYTE
4921 id->idAddr()->iiaBBlabel = dst;
4922
4923 /* The label reference is always long */
4924
4925 id->idjShort = 0;
4926 id->idjKeepLong = 1;
4927
4928 /* Record the current IG and offset within it */
4929
4930 id->idjIG = emitCurIG;
4931 id->idjOffs = emitCurIGsize;
4932
4933 /* Append this instruction to this IG's jump list */
4934
4935 id->idjNext = emitCurIGjmpList;
4936 emitCurIGjmpList = id;
4937
4938#ifdef DEBUG
4939 // Mark the catch return
4940 if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
4941 {
4942 id->idDebugOnlyInfo()->idCatchRet = true;
4943 }
4944#endif // DEBUG
4945
4946#if EMITTER_STATS
4947 emitTotalIGjmps++;
4948#endif
4949
4950 // Set the relocation flags - these give hint to zap to perform
4951 // relocation of the specified 32bit address.
4952 //
4953 // Note the relocation flags influence the size estimate.
4954 id->idSetRelocFlags(attr);
4955
4956 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4957 id->idCodeSize(sz);
4958
4959 dispIns(id);
4960 emitCurIGsize += sz;
4961}
4962
4963/*****************************************************************************
4964 *
4965 * The following adds instructions referencing address modes.
4966 */
4967
4968void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int disp)
4969{
4970 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
4971
4972#ifdef _TARGET_AMD64_
4973 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
4974 // all other opcodes take a sign-extended 4-byte immediate
4975 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
4976#endif
4977
4978 insFormat fmt;
4979
4980 switch (ins)
4981 {
4982 case INS_rcl_N:
4983 case INS_rcr_N:
4984 case INS_rol_N:
4985 case INS_ror_N:
4986 case INS_shl_N:
4987 case INS_shr_N:
4988 case INS_sar_N:
4989 assert(val != 1);
4990 fmt = IF_ARW_SHF;
4991 val &= 0x7F;
4992 break;
4993
4994 default:
4995 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
4996 break;
4997 }
4998
4999 /*
5000 Useful if you want to trap moves with 0 constant
5001 if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
5002 {
5003 printf("MOV 0\n");
5004 }
5005 */
5006
5007 UNATIVE_OFFSET sz;
5008 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5009 id->idIns(ins);
5010 id->idInsFmt(fmt);
5011
5012 id->idAddr()->iiaAddrMode.amBaseReg = reg;
5013 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5014
5015 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5016
5017 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5018 id->idCodeSize(sz);
5019
5020 dispIns(id);
5021 emitCurIGsize += sz;
5022}
5023
5024void emitter::emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp)
5025{
5026 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5027
5028#ifdef _TARGET_AMD64_
5029 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5030 // all other opcodes take a sign-extended 4-byte immediate
5031 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5032#endif
5033
5034 insFormat fmt;
5035
5036 switch (ins)
5037 {
5038 case INS_rcl_N:
5039 case INS_rcr_N:
5040 case INS_rol_N:
5041 case INS_ror_N:
5042 case INS_shl_N:
5043 case INS_shr_N:
5044 case INS_sar_N:
5045 assert(val != 1);
5046 fmt = IF_ARW_SHF;
5047 val &= 0x7F;
5048 break;
5049
5050 default:
5051 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5052 break;
5053 }
5054
5055 /*
5056 Useful if you want to trap moves with 0 constant
5057 if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
5058 {
5059 printf("MOV 0\n");
5060 }
5061 */
5062
5063 UNATIVE_OFFSET sz;
5064 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5065 id->idIns(ins);
5066 id->idInsFmt(fmt);
5067
5068 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5069 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5070
5071 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5072
5073 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5074 id->idCodeSize(sz);
5075
5076 dispIns(id);
5077 emitCurIGsize += sz;
5078}
5079
5080void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
5081{
5082 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE) && (ireg != REG_NA));
5083 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5084
5085 if (ins == INS_lea)
5086 {
5087 if (ireg == base && disp == 0)
5088 {
5089 // Maybe the emitter is not the common place for this optimization, but it's a better choke point
5090 // for all the emitIns(ins, tree), we would have to be analyzing at each call site
5091 //
5092 return;
5093 }
5094 }
5095
5096 UNATIVE_OFFSET sz;
5097 instrDesc* id = emitNewInstrAmd(attr, disp);
5098 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5099
5100 id->idIns(ins);
5101 id->idInsFmt(fmt);
5102 id->idReg1(ireg);
5103
5104 id->idAddr()->iiaAddrMode.amBaseReg = base;
5105 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5106
5107 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5108
5109 sz = emitInsSizeAM(id, insCodeRM(ins));
5110
5111 if (Is4ByteSSEInstruction(ins))
5112 {
5113 // The 4-Byte SSE instructions require two additional bytes
5114 sz += 2;
5115 }
5116
5117 id->idCodeSize(sz);
5118
5119 dispIns(id);
5120 emitCurIGsize += sz;
5121}
5122
5123void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5124{
5125 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5126 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5127
5128 UNATIVE_OFFSET sz;
5129 instrDesc* id = emitNewInstrAmd(attr, disp);
5130 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5131
5132 id->idIns(ins);
5133 id->idInsFmt(fmt);
5134 id->idReg1(ireg);
5135
5136 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5137 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5138
5139 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5140
5141 sz = emitInsSizeAM(id, insCodeRM(ins));
5142 id->idCodeSize(sz);
5143
5144 dispIns(id);
5145 emitCurIGsize += sz;
5146}
5147
5148void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
5149{
5150 UNATIVE_OFFSET sz;
5151 instrDesc* id = emitNewInstrAmd(attr, disp);
5152 insFormat fmt;
5153
5154 if (ireg == REG_NA)
5155 {
5156 fmt = emitInsModeFormat(ins, IF_ARD);
5157 }
5158 else
5159 {
5160 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5161
5162 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE));
5163 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5164
5165 id->idReg1(ireg);
5166 }
5167
5168 id->idIns(ins);
5169 id->idInsFmt(fmt);
5170
5171 id->idAddr()->iiaAddrMode.amBaseReg = base;
5172 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5173
5174 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5175
5176 sz = emitInsSizeAM(id, insCodeMR(ins));
5177 id->idCodeSize(sz);
5178
5179 dispIns(id);
5180 emitCurIGsize += sz;
5181
5182 emitAdjustStackDepthPushPop(ins);
5183}
5184
5185void emitter::emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int disp, regNumber ireg, int ival)
5186{
5187 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
5188 assert(base != REG_NA);
5189 assert(ireg != REG_NA);
5190 instrDesc* id = emitNewInstrAmdCns(attr, disp, ival);
5191
5192 id->idIns(ins);
5193 id->idInsFmt(IF_AWR_RRD_CNS);
5194 id->idAddr()->iiaAddrMode.amBaseReg = base;
5195 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5196 id->idReg1(ireg);
5197
5198 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5199
5200 UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins), ival);
5201 id->idCodeSize(sz);
5202
5203 dispIns(id);
5204 emitCurIGsize += sz;
5205}
5206
5207void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5208{
5209 UNATIVE_OFFSET sz;
5210 instrDesc* id = emitNewInstrAmd(attr, disp);
5211 insFormat fmt;
5212
5213 if (ireg == REG_NA)
5214 {
5215 fmt = emitInsModeFormat(ins, IF_ARD);
5216 }
5217 else
5218 {
5219 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5220
5221 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5222 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5223
5224 id->idReg1(ireg);
5225 }
5226
5227 id->idIns(ins);
5228 id->idInsFmt(fmt);
5229
5230 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5231 id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5232
5233 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5234
5235 sz = emitInsSizeAM(id, insCodeMR(ins));
5236 id->idCodeSize(sz);
5237
5238 dispIns(id);
5239 emitCurIGsize += sz;
5240
5241 emitAdjustStackDepthPushPop(ins);
5242}
5243
5244void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp)
5245{
5246 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5247
5248#ifdef _TARGET_AMD64_
5249 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5250 // all other opcodes take a sign-extended 4-byte immediate
5251 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5252#endif
5253
5254 insFormat fmt;
5255
5256 switch (ins)
5257 {
5258 case INS_rcl_N:
5259 case INS_rcr_N:
5260 case INS_rol_N:
5261 case INS_ror_N:
5262 case INS_shl_N:
5263 case INS_shr_N:
5264 case INS_sar_N:
5265 assert(val != 1);
5266 fmt = IF_ARW_SHF;
5267 val &= 0x7F;
5268 break;
5269
5270 default:
5271 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5272 break;
5273 }
5274
5275 UNATIVE_OFFSET sz;
5276 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5277 id->idIns(ins);
5278 id->idInsFmt(fmt);
5279
5280 id->idAddr()->iiaAddrMode.amBaseReg = reg;
5281 id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5282 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1;
5283
5284 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5285
5286 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5287 id->idCodeSize(sz);
5288
5289 dispIns(id);
5290 emitCurIGsize += sz;
5291}
5292
5293void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, int disp)
5294{
5295 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5296 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5297
5298 UNATIVE_OFFSET sz;
5299 instrDesc* id = emitNewInstrAmd(attr, disp);
5300 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5301
5302 id->idIns(ins);
5303 id->idInsFmt(fmt);
5304 id->idReg1(ireg);
5305
5306 id->idAddr()->iiaAddrMode.amBaseReg = base;
5307 id->idAddr()->iiaAddrMode.amIndxReg = index;
5308 id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1;
5309
5310 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5311
5312 sz = emitInsSizeAM(id, insCodeRM(ins));
5313 id->idCodeSize(sz);
5314
5315 dispIns(id);
5316 emitCurIGsize += sz;
5317}
5318
5319void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber index, int disp)
5320{
5321 UNATIVE_OFFSET sz;
5322 instrDesc* id = emitNewInstrAmd(attr, disp);
5323 insFormat fmt;
5324
5325 if (ireg == REG_NA)
5326 {
5327 fmt = emitInsModeFormat(ins, IF_ARD);
5328 }
5329 else
5330 {
5331 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5332
5333 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5334 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5335
5336 id->idReg1(ireg);
5337 }
5338
5339 id->idIns(ins);
5340 id->idInsFmt(fmt);
5341
5342 id->idAddr()->iiaAddrMode.amBaseReg = reg;
5343 id->idAddr()->iiaAddrMode.amIndxReg = index;
5344 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1);
5345
5346 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5347
5348 sz = emitInsSizeAM(id, insCodeMR(ins));
5349 id->idCodeSize(sz);
5350
5351 dispIns(id);
5352 emitCurIGsize += sz;
5353
5354 emitAdjustStackDepthPushPop(ins);
5355}
5356
5357void emitter::emitIns_I_ARX(
5358 instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp)
5359{
5360 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5361
5362#ifdef _TARGET_AMD64_
5363 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5364 // all other opcodes take a sign-extended 4-byte immediate
5365 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5366#endif
5367
5368 insFormat fmt;
5369
5370 switch (ins)
5371 {
5372 case INS_rcl_N:
5373 case INS_rcr_N:
5374 case INS_rol_N:
5375 case INS_ror_N:
5376 case INS_shl_N:
5377 case INS_shr_N:
5378 case INS_sar_N:
5379 assert(val != 1);
5380 fmt = IF_ARW_SHF;
5381 val &= 0x7F;
5382 break;
5383
5384 default:
5385 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5386 break;
5387 }
5388
5389 UNATIVE_OFFSET sz;
5390 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5391
5392 id->idIns(ins);
5393 id->idInsFmt(fmt);
5394
5395 id->idAddr()->iiaAddrMode.amBaseReg = reg;
5396 id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5397 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5398
5399 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5400
5401 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5402 id->idCodeSize(sz);
5403
5404 dispIns(id);
5405 emitCurIGsize += sz;
5406}
5407
5408void emitter::emitIns_R_ARX(
5409 instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5410{
5411 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5412 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5413
5414 UNATIVE_OFFSET sz;
5415 instrDesc* id = emitNewInstrAmd(attr, disp);
5416 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5417
5418 id->idIns(ins);
5419 id->idInsFmt(fmt);
5420 id->idReg1(ireg);
5421
5422 id->idAddr()->iiaAddrMode.amBaseReg = base;
5423 id->idAddr()->iiaAddrMode.amIndxReg = index;
5424 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5425
5426 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5427
5428 sz = emitInsSizeAM(id, insCodeRM(ins));
5429 id->idCodeSize(sz);
5430
5431 dispIns(id);
5432 emitCurIGsize += sz;
5433}
5434
5435void emitter::emitIns_ARX_R(
5436 instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5437{
5438 UNATIVE_OFFSET sz;
5439 instrDesc* id = emitNewInstrAmd(attr, disp);
5440 insFormat fmt;
5441
5442 if (ireg == REG_NA)
5443 {
5444 fmt = emitInsModeFormat(ins, IF_ARD);
5445 }
5446 else
5447 {
5448 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5449
5450 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5451 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5452
5453 id->idReg1(ireg);
5454 }
5455
5456 id->idIns(ins);
5457 id->idInsFmt(fmt);
5458
5459 id->idAddr()->iiaAddrMode.amBaseReg = base;
5460 id->idAddr()->iiaAddrMode.amIndxReg = index;
5461 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5462
5463 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5464
5465 sz = emitInsSizeAM(id, insCodeMR(ins));
5466 id->idCodeSize(sz);
5467
5468 dispIns(id);
5469 emitCurIGsize += sz;
5470
5471 emitAdjustStackDepthPushPop(ins);
5472}
5473
5474void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp)
5475{
5476 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5477
5478#ifdef _TARGET_AMD64_
5479 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5480 // all other opcodes take a sign-extended 4-byte immediate
5481 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
5482#endif
5483
5484 insFormat fmt;
5485
5486 switch (ins)
5487 {
5488 case INS_rcl_N:
5489 case INS_rcr_N:
5490 case INS_rol_N:
5491 case INS_ror_N:
5492 case INS_shl_N:
5493 case INS_shr_N:
5494 case INS_sar_N:
5495 assert(val != 1);
5496 fmt = IF_ARW_SHF;
5497 val &= 0x7F;
5498 break;
5499
5500 default:
5501 fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5502 break;
5503 }
5504
5505 UNATIVE_OFFSET sz;
5506 instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5507 id->idIns(ins);
5508 id->idInsFmt(fmt);
5509
5510 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5511 id->idAddr()->iiaAddrMode.amIndxReg = reg;
5512 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5513
5514 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5515
5516 sz = emitInsSizeAM(id, insCodeMI(ins), val);
5517 id->idCodeSize(sz);
5518
5519 dispIns(id);
5520 emitCurIGsize += sz;
5521}
5522
5523void emitter::emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5524{
5525 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5526 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5527
5528 UNATIVE_OFFSET sz;
5529 instrDesc* id = emitNewInstrAmd(attr, disp);
5530 insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5531
5532 id->idIns(ins);
5533 id->idInsFmt(fmt);
5534 id->idReg1(ireg);
5535
5536 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5537 id->idAddr()->iiaAddrMode.amIndxReg = reg;
5538 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5539
5540 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5541
5542 sz = emitInsSizeAM(id, insCodeRM(ins));
5543 id->idCodeSize(sz);
5544
5545 dispIns(id);
5546 emitCurIGsize += sz;
5547}
5548
5549void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5550{
5551 UNATIVE_OFFSET sz;
5552 instrDesc* id = emitNewInstrAmd(attr, disp);
5553 insFormat fmt;
5554
5555 if (ireg == REG_NA)
5556 {
5557 fmt = emitInsModeFormat(ins, IF_ARD);
5558 }
5559 else
5560 {
5561 fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5562 noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5563 assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5564
5565 id->idReg1(ireg);
5566 }
5567
5568 id->idIns(ins);
5569 id->idInsFmt(fmt);
5570
5571 id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5572 id->idAddr()->iiaAddrMode.amIndxReg = reg;
5573 id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5574
5575 assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5576
5577 sz = emitInsSizeAM(id, insCodeMR(ins));
5578 id->idCodeSize(sz);
5579
5580 dispIns(id);
5581 emitCurIGsize += sz;
5582
5583 emitAdjustStackDepthPushPop(ins);
5584}
5585
5586#ifdef FEATURE_HW_INTRINSICS
5587//------------------------------------------------------------------------
5588// emitIns_SIMD_R_R_I: emits the code for a SIMD instruction that takes a register operand, an immediate operand
5589// and that returns a value in register
5590//
5591// Arguments:
5592// ins -- The instruction being emitted
5593// attr -- The emit attribute
5594// targetReg -- The target register
5595// op1Reg -- The register of the first operand
5596// ival -- The immediate value
5597//
5598void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival)
5599{
5600 if (UseVEXEncoding() || IsDstSrcImmAvxInstruction(ins))
5601 {
5602 emitIns_R_R_I(ins, attr, targetReg, op1Reg, ival);
5603 }
5604 else
5605 {
5606 if (op1Reg != targetReg)
5607 {
5608 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5609 }
5610 emitIns_R_I(ins, attr, targetReg, ival);
5611 }
5612}
5613
5614//------------------------------------------------------------------------
5615// emitIns_SIMD_R_R_A: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
5616// and that returns a value in register
5617//
5618// Arguments:
5619// ins -- The instruction being emitted
5620// attr -- The emit attribute
5621// targetReg -- The target register
5622// op1Reg -- The register of the first operand
5623// indir -- The GenTreeIndir used for the memory address
5624//
5625void emitter::emitIns_SIMD_R_R_A(
5626 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir)
5627{
5628 if (UseVEXEncoding())
5629 {
5630 emitIns_R_R_A(ins, attr, targetReg, op1Reg, indir);
5631 }
5632 else
5633 {
5634 if (op1Reg != targetReg)
5635 {
5636 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5637 }
5638 emitIns_R_A(ins, attr, targetReg, indir);
5639 }
5640}
5641
5642//------------------------------------------------------------------------
5643// emitIns_SIMD_R_R_AR: emits the code for a SIMD instruction that takes a register operand, a base memory register,
5644// and that returns a value in register
5645//
5646// Arguments:
5647// ins -- The instruction being emitted
5648// attr -- The emit attribute
5649// targetReg -- The target register
5650// op1Reg -- The register of the first operand
5651// base -- The base register used for the memory address
5652//
5653void emitter::emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base)
5654{
5655 if (UseVEXEncoding())
5656 {
5657 emitIns_R_R_AR(ins, attr, targetReg, op1Reg, base, 0);
5658 }
5659 else
5660 {
5661 if (op1Reg != targetReg)
5662 {
5663 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5664 }
5665 emitIns_R_AR(ins, attr, targetReg, base, 0);
5666 }
5667}
5668
5669//------------------------------------------------------------------------
5670// emitIns_SIMD_R_R_C: emits the code for a SIMD instruction that takes a register operand, a field handle + offset,
5671// and that returns a value in register
5672//
5673// Arguments:
5674// ins -- The instruction being emitted
5675// attr -- The emit attribute
5676// targetReg -- The target register
5677// op1Reg -- The register of the first operand
5678// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
5679// offs -- The offset added to the memory address from fldHnd
5680//
5681void emitter::emitIns_SIMD_R_R_C(
5682 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
5683{
5684 if (UseVEXEncoding())
5685 {
5686 emitIns_R_R_C(ins, attr, targetReg, op1Reg, fldHnd, offs);
5687 }
5688 else
5689 {
5690 if (op1Reg != targetReg)
5691 {
5692 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5693 }
5694 emitIns_R_C(ins, attr, targetReg, fldHnd, offs);
5695 }
5696}
5697
5698//------------------------------------------------------------------------
5699// emitIns_SIMD_R_R_R: emits the code for a SIMD instruction that takes two register operands, and that returns a
5700// value in register
5701//
5702// Arguments:
5703// ins -- The instruction being emitted
5704// attr -- The emit attribute
5705// targetReg -- The target register
5706// op1Reg -- The register of the first operand
5707// op2Reg -- The register of the second operand
5708//
5709void emitter::emitIns_SIMD_R_R_R(
5710 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg)
5711{
5712 if (UseVEXEncoding())
5713 {
5714 emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg);
5715 }
5716 else
5717 {
5718 if (op1Reg != targetReg)
5719 {
5720 // Ensure we aren't overwriting op2
5721 assert(op2Reg != targetReg);
5722
5723 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5724 }
5725 emitIns_R_R(ins, attr, targetReg, op2Reg);
5726 }
5727}
5728
5729//------------------------------------------------------------------------
5730// emitIns_SIMD_R_R_S: emits the code for a SIMD instruction that takes a register operand, a variable index + offset,
5731// and that returns a value in register
5732//
5733// Arguments:
5734// ins -- The instruction being emitted
5735// attr -- The emit attribute
5736// targetReg -- The target register
5737// op1Reg -- The register of the first operand
5738// varx -- The variable index used for the memory address
5739// offs -- The offset added to the memory address from varx
5740//
5741void emitter::emitIns_SIMD_R_R_S(
5742 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs)
5743{
5744 if (UseVEXEncoding())
5745 {
5746 emitIns_R_R_S(ins, attr, targetReg, op1Reg, varx, offs);
5747 }
5748 else
5749 {
5750 if (op1Reg != targetReg)
5751 {
5752 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5753 }
5754 emitIns_R_S(ins, attr, targetReg, varx, offs);
5755 }
5756}
5757
5758//------------------------------------------------------------------------
5759// emitIns_SIMD_R_R_A_I: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
5760// an immediate operand, and that returns a value in register
5761//
5762// Arguments:
5763// ins -- The instruction being emitted
5764// attr -- The emit attribute
5765// targetReg -- The target register
5766// op1Reg -- The register of the first operand
5767// indir -- The GenTreeIndir used for the memory address
5768// ival -- The immediate value
5769//
5770void emitter::emitIns_SIMD_R_R_A_I(
5771 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir, int ival)
5772{
5773 if (UseVEXEncoding())
5774 {
5775 emitIns_R_R_A_I(ins, attr, targetReg, op1Reg, indir, ival, IF_RWR_RRD_ARD_CNS);
5776 }
5777 else
5778 {
5779 if (op1Reg != targetReg)
5780 {
5781 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5782 }
5783 emitIns_R_A_I(ins, attr, targetReg, indir, ival);
5784 }
5785}
5786
5787//------------------------------------------------------------------------
5788// emitIns_SIMD_R_R_AR_I: emits the code for a SIMD instruction that takes a register operand, a base memory register,
5789// an immediate operand, and that returns a value in register
5790//
5791// Arguments:
5792// ins -- The instruction being emitted
5793// attr -- The emit attribute
5794// targetReg -- The target register
5795// op1Reg -- The register of the first operand
5796// base -- The base register used for the memory address
5797// ival -- The immediate value
5798//
5799void emitter::emitIns_SIMD_R_R_AR_I(
5800 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base, int ival)
5801{
5802 if (UseVEXEncoding())
5803 {
5804 emitIns_R_R_AR_I(ins, attr, targetReg, op1Reg, base, 0, ival);
5805 }
5806 else
5807 {
5808 if (op1Reg != targetReg)
5809 {
5810 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5811 }
5812 emitIns_R_AR_I(ins, attr, targetReg, base, 0, ival);
5813 }
5814}
5815
5816//------------------------------------------------------------------------
5817// emitIns_SIMD_R_R_C_I: emits the code for a SIMD instruction that takes a register operand, a field handle + offset,
5818// an immediate operand, and that returns a value in register
5819//
5820// Arguments:
5821// ins -- The instruction being emitted
5822// attr -- The emit attribute
5823// targetReg -- The target register
5824// op1Reg -- The register of the first operand
5825// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
5826// offs -- The offset added to the memory address from fldHnd
5827// ival -- The immediate value
5828//
5829void emitter::emitIns_SIMD_R_R_C_I(instruction ins,
5830 emitAttr attr,
5831 regNumber targetReg,
5832 regNumber op1Reg,
5833 CORINFO_FIELD_HANDLE fldHnd,
5834 int offs,
5835 int ival)
5836{
5837 if (UseVEXEncoding())
5838 {
5839 emitIns_R_R_C_I(ins, attr, targetReg, op1Reg, fldHnd, offs, ival);
5840 }
5841 else
5842 {
5843 if (op1Reg != targetReg)
5844 {
5845 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5846 }
5847 emitIns_R_C_I(ins, attr, targetReg, fldHnd, offs, ival);
5848 }
5849}
5850
5851//------------------------------------------------------------------------
5852// emitIns_SIMD_R_R_R_I: emits the code for a SIMD instruction that takes two register operands, an immediate operand,
5853// and that returns a value in register
5854//
5855// Arguments:
5856// ins -- The instruction being emitted
5857// attr -- The emit attribute
5858// targetReg -- The target register
5859// op1Reg -- The register of the first operand
5860// op2Reg -- The register of the second operand
5861// ival -- The immediate value
5862//
5863void emitter::emitIns_SIMD_R_R_R_I(
5864 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int ival)
5865{
5866 if (UseVEXEncoding())
5867 {
5868 emitIns_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, ival);
5869 }
5870 else
5871 {
5872 if (op1Reg != targetReg)
5873 {
5874 // Ensure we aren't overwriting op2
5875 assert(op2Reg != targetReg);
5876
5877 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5878 }
5879 emitIns_R_R_I(ins, attr, targetReg, op2Reg, ival);
5880 }
5881}
5882
5883//------------------------------------------------------------------------
5884// emitIns_SIMD_R_R_S_I: emits the code for a SIMD instruction that takes a register operand, a variable index + offset,
5885// an imediate operand, and that returns a value in register
5886//
5887// Arguments:
5888// ins -- The instruction being emitted
5889// attr -- The emit attribute
5890// targetReg -- The target register
5891// op1Reg -- The register of the first operand
5892// varx -- The variable index used for the memory address
5893// offs -- The offset added to the memory address from varx
5894// ival -- The immediate value
5895//
5896void emitter::emitIns_SIMD_R_R_S_I(
5897 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs, int ival)
5898{
5899 if (UseVEXEncoding())
5900 {
5901 emitIns_R_R_S_I(ins, attr, targetReg, op1Reg, varx, offs, ival);
5902 }
5903 else
5904 {
5905 if (op1Reg != targetReg)
5906 {
5907 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5908 }
5909 emitIns_R_S_I(ins, attr, targetReg, varx, offs, ival);
5910 }
5911}
5912
5913//------------------------------------------------------------------------
5914// emitIns_SIMD_R_R_R_A: emits the code for a SIMD instruction that takes two register operands, a GenTreeIndir address,
5915// and that returns a value in register
5916//
5917// Arguments:
5918// ins -- The instruction being emitted
5919// attr -- The emit attribute
5920// targetReg -- The target register
5921// op1Reg -- The register of the first operand
5922// op2Reg -- The register of the second operand
5923// indir -- The GenTreeIndir used for the memory address
5924//
5925void emitter::emitIns_SIMD_R_R_R_A(
5926 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir)
5927{
5928 assert(IsFMAInstruction(ins));
5929 assert(UseVEXEncoding());
5930
5931 if (op1Reg != targetReg)
5932 {
5933 // Ensure we aren't overwriting op2
5934 assert(op2Reg != targetReg);
5935
5936 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5937 }
5938
5939 emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir);
5940}
5941
5942//------------------------------------------------------------------------
5943// emitIns_SIMD_R_R_R_AR: emits the code for a SIMD instruction that takes two register operands, a base memory
5944// register, and that returns a value in register
5945//
5946// Arguments:
5947// ins -- The instruction being emitted
5948// attr -- The emit attribute
5949// targetReg -- The target register
5950// op1Reg -- The register of the first operands
5951// op2Reg -- The register of the second operand
5952// base -- The base register used for the memory address
5953//
5954void emitter::emitIns_SIMD_R_R_R_AR(
5955 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber base)
5956{
5957 assert(IsFMAInstruction(ins));
5958 assert(UseVEXEncoding());
5959
5960 if (op1Reg != targetReg)
5961 {
5962 // Ensure we aren't overwriting op2
5963 assert(op2Reg != targetReg);
5964
5965 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5966 }
5967
5968 emitIns_R_R_AR(ins, attr, targetReg, op2Reg, base, 0);
5969}
5970
5971//------------------------------------------------------------------------
5972// emitIns_SIMD_R_R_R_C: emits the code for a SIMD instruction that takes two register operands, a field handle +
5973// offset, and that returns a value in register
5974//
5975// Arguments:
5976// ins -- The instruction being emitted
5977// attr -- The emit attribute
5978// targetReg -- The target register
5979// op1Reg -- The register of the first operand
5980// op2Reg -- The register of the second operand
5981// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
5982// offs -- The offset added to the memory address from fldHnd
5983//
5984void emitter::emitIns_SIMD_R_R_R_C(instruction ins,
5985 emitAttr attr,
5986 regNumber targetReg,
5987 regNumber op1Reg,
5988 regNumber op2Reg,
5989 CORINFO_FIELD_HANDLE fldHnd,
5990 int offs)
5991{
5992 assert(IsFMAInstruction(ins));
5993 assert(UseVEXEncoding());
5994
5995 if (op1Reg != targetReg)
5996 {
5997 // Ensure we aren't overwriting op2
5998 assert(op2Reg != targetReg);
5999
6000 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6001 }
6002
6003 emitIns_R_R_C(ins, attr, targetReg, op2Reg, fldHnd, offs);
6004}
6005
6006//------------------------------------------------------------------------
6007// emitIns_SIMD_R_R_R_R: emits the code for a SIMD instruction that takes three register operands, and that returns a
6008// value in register
6009//
6010// Arguments:
6011// ins -- The instruction being emitted
6012// attr -- The emit attribute
6013// targetReg -- The target register
6014// op1Reg -- The register of the first operand
6015// op2Reg -- The register of the second operand
6016// op3Reg -- The register of the second operand
6017//
6018void emitter::emitIns_SIMD_R_R_R_R(
6019 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg)
6020{
6021 if (IsFMAInstruction(ins))
6022 {
6023 assert(UseVEXEncoding());
6024
6025 if (op1Reg != targetReg)
6026 {
6027 // Ensure we aren't overwriting op2 or op3
6028
6029 assert(op2Reg != targetReg);
6030 assert(op3Reg != targetReg);
6031
6032 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6033 }
6034
6035 emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg);
6036 }
6037 else if (UseVEXEncoding())
6038 {
6039 assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6040
6041 // convert SSE encoding of SSE4.1 instructions to VEX encoding
6042 switch (ins)
6043 {
6044 case INS_blendvps:
6045 ins = INS_vblendvps;
6046 break;
6047 case INS_blendvpd:
6048 ins = INS_vblendvpd;
6049 break;
6050 case INS_pblendvb:
6051 ins = INS_vpblendvb;
6052 break;
6053 default:
6054 break;
6055 }
6056 emitIns_R_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, op3Reg);
6057 }
6058 else
6059 {
6060 assert(isSse41Blendv(ins));
6061 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6062 if (op3Reg != REG_XMM0)
6063 {
6064 // Ensure we aren't overwriting op1 or op2
6065 assert(op1Reg != REG_XMM0);
6066 assert(op2Reg != REG_XMM0);
6067
6068 emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6069 }
6070 if (op1Reg != targetReg)
6071 {
6072 // Ensure we aren't overwriting op2 or oop3 (which should be REG_XMM0)
6073 assert(op2Reg != targetReg);
6074 assert(targetReg != REG_XMM0);
6075
6076 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6077 }
6078 emitIns_R_R(ins, attr, targetReg, op2Reg);
6079 }
6080}
6081
6082//------------------------------------------------------------------------
6083// emitIns_SIMD_R_R_R_S: emits the code for a SIMD instruction that takes two register operands, a variable index +
6084// offset, and that returns a value in register
6085//
6086// Arguments:
6087// ins -- The instruction being emitted
6088// attr -- The emit attribute
6089// targetReg -- The target register
6090// op1Reg -- The register of the first operand
6091// op2Reg -- The register of the second operand
6092// varx -- The variable index used for the memory address
6093// offs -- The offset added to the memory address from varx
6094//
6095void emitter::emitIns_SIMD_R_R_R_S(
6096 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs)
6097{
6098 assert(IsFMAInstruction(ins));
6099 assert(UseVEXEncoding());
6100
6101 if (op1Reg != targetReg)
6102 {
6103 // Ensure we aren't overwriting op2
6104 assert(op2Reg != targetReg);
6105
6106 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6107 }
6108
6109 emitIns_R_R_S(ins, attr, targetReg, op2Reg, varx, offs);
6110}
6111
6112//------------------------------------------------------------------------
6113// emitIns_SIMD_R_R_A_R: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
6114// another register operand, and that returns a value in register
6115//
6116// Arguments:
6117// ins -- The instruction being emitted
6118// attr -- The emit attribute
6119// targetReg -- The target register
6120// op1Reg -- The register of the first operand
6121// op3Reg -- The register of the third operand
6122// indir -- The GenTreeIndir used for the memory address
6123//
6124void emitter::emitIns_SIMD_R_R_A_R(
6125 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir)
6126{
6127 if (UseVEXEncoding())
6128 {
6129 assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6130
6131 // convert SSE encoding of SSE4.1 instructions to VEX encoding
6132 switch (ins)
6133 {
6134 case INS_blendvps:
6135 {
6136 ins = INS_vblendvps;
6137 break;
6138 }
6139
6140 case INS_blendvpd:
6141 {
6142 ins = INS_vblendvpd;
6143 break;
6144 }
6145
6146 case INS_pblendvb:
6147 {
6148 ins = INS_vpblendvb;
6149 break;
6150 }
6151
6152 default:
6153 {
6154 break;
6155 }
6156 }
6157
6158 emitIns_R_R_A_R(ins, attr, targetReg, op1Reg, op3Reg, indir);
6159 }
6160 else
6161 {
6162 assert(isSse41Blendv(ins));
6163
6164 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6165 if (op3Reg != REG_XMM0)
6166 {
6167 // Ensure we aren't overwriting op1
6168 assert(op1Reg != REG_XMM0);
6169
6170 emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6171 }
6172 if (op1Reg != targetReg)
6173 {
6174 // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6175 assert(targetReg != REG_XMM0);
6176
6177 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6178 }
6179
6180 emitIns_R_A(ins, attr, targetReg, indir);
6181 }
6182}
6183
6184//------------------------------------------------------------------------
6185// emitIns_SIMD_R_R_AR_R: emits the code for a SIMD instruction that takes a register operand, a base memory
6186// register, another register operand, and that returns a value in register
6187//
6188// Arguments:
6189// ins -- The instruction being emitted
6190// attr -- The emit attribute
6191// targetReg -- The target register
6192// op1Reg -- The register of the first operands
6193// op3Reg -- The register of the third operand
6194// base -- The base register used for the memory address
6195//
6196void emitter::emitIns_SIMD_R_R_AR_R(
6197 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base)
6198{
6199 if (UseVEXEncoding())
6200 {
6201 assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6202
6203 // convert SSE encoding of SSE4.1 instructions to VEX encoding
6204 switch (ins)
6205 {
6206 case INS_blendvps:
6207 {
6208 ins = INS_vblendvps;
6209 break;
6210 }
6211
6212 case INS_blendvpd:
6213 {
6214 ins = INS_vblendvpd;
6215 break;
6216 }
6217
6218 case INS_pblendvb:
6219 {
6220 ins = INS_vpblendvb;
6221 break;
6222 }
6223
6224 default:
6225 {
6226 break;
6227 }
6228 }
6229
6230 emitIns_R_R_AR_R(ins, attr, targetReg, op1Reg, op3Reg, base, 0);
6231 }
6232 else
6233 {
6234 assert(isSse41Blendv(ins));
6235
6236 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6237 if (op3Reg != REG_XMM0)
6238 {
6239 // Ensure we aren't overwriting op1
6240 assert(op1Reg != REG_XMM0);
6241
6242 emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6243 }
6244 if (op1Reg != targetReg)
6245 {
6246 // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6247 assert(targetReg != REG_XMM0);
6248
6249 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6250 }
6251
6252 emitIns_R_AR(ins, attr, targetReg, base, 0);
6253 }
6254}
6255
6256//------------------------------------------------------------------------
6257// emitIns_SIMD_R_R_C_R: emits the code for a SIMD instruction that takes a register operand, a field handle +
6258// offset, another register operand, and that returns a value in register
6259//
6260// Arguments:
6261// ins -- The instruction being emitted
6262// attr -- The emit attribute
6263// targetReg -- The target register
6264// op1Reg -- The register of the first operand
6265// op3Reg -- The register of the third operand
6266// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
6267// offs -- The offset added to the memory address from fldHnd
6268//
6269void emitter::emitIns_SIMD_R_R_C_R(instruction ins,
6270 emitAttr attr,
6271 regNumber targetReg,
6272 regNumber op1Reg,
6273 regNumber op3Reg,
6274 CORINFO_FIELD_HANDLE fldHnd,
6275 int offs)
6276{
6277 if (UseVEXEncoding())
6278 {
6279 assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6280
6281 // convert SSE encoding of SSE4.1 instructions to VEX encoding
6282 switch (ins)
6283 {
6284 case INS_blendvps:
6285 {
6286 ins = INS_vblendvps;
6287 break;
6288 }
6289
6290 case INS_blendvpd:
6291 {
6292 ins = INS_vblendvpd;
6293 break;
6294 }
6295
6296 case INS_pblendvb:
6297 {
6298 ins = INS_vpblendvb;
6299 break;
6300 }
6301
6302 default:
6303 {
6304 break;
6305 }
6306 }
6307
6308 emitIns_R_R_C_R(ins, attr, targetReg, op1Reg, op3Reg, fldHnd, offs);
6309 }
6310 else
6311 {
6312 assert(isSse41Blendv(ins));
6313
6314 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6315 if (op3Reg != REG_XMM0)
6316 {
6317 // Ensure we aren't overwriting op1
6318 assert(op1Reg != REG_XMM0);
6319
6320 emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6321 }
6322 if (op1Reg != targetReg)
6323 {
6324 // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6325 assert(targetReg != REG_XMM0);
6326
6327 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6328 }
6329
6330 emitIns_R_C(ins, attr, targetReg, fldHnd, offs);
6331 }
6332}
6333
6334//------------------------------------------------------------------------
6335// emitIns_SIMD_R_R_S_R: emits the code for a SIMD instruction that takes a register operand, a variable index +
6336// offset, another register operand, and that returns a value in register
6337//
6338// Arguments:
6339// ins -- The instruction being emitted
6340// attr -- The emit attribute
6341// targetReg -- The target register
6342// op1Reg -- The register of the first operand
6343// op3Reg -- The register of the third operand
6344// varx -- The variable index used for the memory address
6345// offs -- The offset added to the memory address from varx
6346//
6347void emitter::emitIns_SIMD_R_R_S_R(
6348 instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs)
6349{
6350 if (UseVEXEncoding())
6351 {
6352 assert(isAvxBlendv(ins) || isSse41Blendv(ins));
6353
6354 // convert SSE encoding of SSE4.1 instructions to VEX encoding
6355 switch (ins)
6356 {
6357 case INS_blendvps:
6358 {
6359 ins = INS_vblendvps;
6360 break;
6361 }
6362
6363 case INS_blendvpd:
6364 {
6365 ins = INS_vblendvpd;
6366 break;
6367 }
6368
6369 case INS_pblendvb:
6370 {
6371 ins = INS_vpblendvb;
6372 break;
6373 }
6374
6375 default:
6376 {
6377 break;
6378 }
6379 }
6380
6381 emitIns_R_R_S_R(ins, attr, targetReg, op1Reg, op3Reg, varx, offs);
6382 }
6383 else
6384 {
6385 assert(isSse41Blendv(ins));
6386
6387 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
6388 if (op3Reg != REG_XMM0)
6389 {
6390 // Ensure we aren't overwriting op1
6391 assert(op1Reg != REG_XMM0);
6392
6393 emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6394 }
6395 if (op1Reg != targetReg)
6396 {
6397 // Ensure we aren't overwriting op3 (which should be REG_XMM0)
6398 assert(targetReg != REG_XMM0);
6399
6400 emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6401 }
6402
6403 emitIns_R_S(ins, attr, targetReg, varx, offs);
6404 }
6405}
6406#endif // FEATURE_HW_INTRINSICS
6407
6408/*****************************************************************************
6409 *
6410 * The following add instructions referencing stack-based local variables.
6411 */
6412
6413void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
6414{
6415 instrDesc* id = emitNewInstr(attr);
6416 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs);
6417 insFormat fmt = emitInsModeFormat(ins, IF_SRD);
6418
6419 // 16-bit operand instructions will need a prefix
6420 if (EA_SIZE(attr) == EA_2BYTE)
6421 {
6422 sz += 1;
6423 }
6424
6425 // VEX prefix
6426 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
6427
6428 // 64-bit operand instructions will need a REX.W prefix
6429 if (TakesRexWPrefix(ins, attr))
6430 {
6431 sz += emitGetRexPrefixSize(ins);
6432 }
6433
6434 id->idIns(ins);
6435 id->idInsFmt(fmt);
6436 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6437 id->idCodeSize(sz);
6438
6439#ifdef DEBUG
6440 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6441#endif
6442 dispIns(id);
6443 emitCurIGsize += sz;
6444
6445 emitAdjustStackDepthPushPop(ins);
6446}
6447
6448void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
6449{
6450 instrDesc* id = emitNewInstr(attr);
6451 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs);
6452 insFormat fmt = emitInsModeFormat(ins, IF_SRD_RRD);
6453
6454#ifdef _TARGET_X86_
6455 if (attr == EA_1BYTE)
6456 {
6457 assert(isByteReg(ireg));
6458 }
6459#endif
6460 // 16-bit operand instructions will need a prefix
6461 if (EA_SIZE(attr) == EA_2BYTE)
6462 {
6463 sz++;
6464 }
6465
6466 // VEX prefix
6467 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
6468
6469 // 64-bit operand instructions will need a REX.W prefix
6470 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
6471 {
6472 sz += emitGetRexPrefixSize(ins);
6473 }
6474
6475 id->idIns(ins);
6476 id->idInsFmt(fmt);
6477 id->idReg1(ireg);
6478 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6479 id->idCodeSize(sz);
6480#ifdef DEBUG
6481 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6482#endif
6483 dispIns(id);
6484 emitCurIGsize += sz;
6485}
6486
6487void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
6488{
6489 emitAttr size = EA_SIZE(attr);
6490 noway_assert(emitVerifyEncodable(ins, size, ireg));
6491
6492 instrDesc* id = emitNewInstr(attr);
6493 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs);
6494 insFormat fmt = emitInsModeFormat(ins, IF_RRD_SRD);
6495
6496 // Most 16-bit operand instructions need a prefix
6497 if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
6498 {
6499 sz++;
6500 }
6501
6502 // VEX prefix
6503 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
6504
6505 // 64-bit operand instructions will need a REX.W prefix
6506 if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
6507 {
6508 sz += emitGetRexPrefixSize(ins);
6509 }
6510
6511 if (ins == INS_crc32)
6512 {
6513 sz += 1;
6514 }
6515
6516 id->idIns(ins);
6517 id->idInsFmt(fmt);
6518 id->idReg1(ireg);
6519 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6520 id->idCodeSize(sz);
6521#ifdef DEBUG
6522 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6523#endif
6524 dispIns(id);
6525 emitCurIGsize += sz;
6526}
6527
6528void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
6529{
6530#ifdef _TARGET_AMD64_
6531 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
6532 // all other opcodes take a sign-extended 4-byte immediate
6533 noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
6534#endif
6535
6536 insFormat fmt;
6537
6538 switch (ins)
6539 {
6540 case INS_rcl_N:
6541 case INS_rcr_N:
6542 case INS_rol_N:
6543 case INS_ror_N:
6544 case INS_shl_N:
6545 case INS_shr_N:
6546 case INS_sar_N:
6547 assert(val != 1);
6548 fmt = IF_SRW_SHF;
6549 val &= 0x7F;
6550 break;
6551
6552 default:
6553 fmt = emitInsModeFormat(ins, IF_SRD_CNS);
6554 break;
6555 }
6556
6557 instrDesc* id = emitNewInstrCns(attr, val);
6558 id->idIns(ins);
6559 id->idInsFmt(fmt);
6560 UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMI(ins), varx, offs, val);
6561
6562 // VEX prefix
6563 sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
6564
6565 // 64-bit operand instructions will need a REX.W prefix
6566 if (TakesRexWPrefix(ins, attr))
6567 {
6568 sz += emitGetRexPrefixSize(ins);
6569 }
6570
6571 id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6572 id->idCodeSize(sz);
6573#ifdef DEBUG
6574 id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6575#endif
6576 dispIns(id);
6577 emitCurIGsize += sz;
6578}
6579
6580/*****************************************************************************
6581 *
6582 * Record that a jump instruction uses the short encoding
6583 *
6584 */
6585void emitter::emitSetShortJump(instrDescJmp* id)
6586{
6587 if (id->idjKeepLong)
6588 {
6589 return;
6590 }
6591
6592 id->idjShort = true;
6593}
6594
6595/*****************************************************************************
6596 *
6597 * Add a jmp instruction.
6598 */
6599
6600void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */)
6601{
6602 UNATIVE_OFFSET sz;
6603 instrDescJmp* id = emitNewInstrJmp();
6604
6605 assert(dst->bbFlags & BBF_JMP_TARGET);
6606
6607 id->idIns(ins);
6608 id->idInsFmt(IF_LABEL);
6609 id->idAddr()->iiaBBlabel = dst;
6610
6611#ifdef DEBUG
6612 // Mark the finally call
6613 if (ins == INS_call && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
6614 {
6615 id->idDebugOnlyInfo()->idFinallyCall = true;
6616 }
6617#endif // DEBUG
6618
6619 /* Assume the jump will be long */
6620
6621 id->idjShort = 0;
6622 id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
6623
6624 /* Record the jump's IG and offset within it */
6625
6626 id->idjIG = emitCurIG;
6627 id->idjOffs = emitCurIGsize;
6628
6629 /* Append this jump to this IG's jump list */
6630
6631 id->idjNext = emitCurIGjmpList;
6632 emitCurIGjmpList = id;
6633
6634#if EMITTER_STATS
6635 emitTotalIGjmps++;
6636#endif
6637
6638 /* Figure out the max. size of the jump/call instruction */
6639
6640 if (ins == INS_call)
6641 {
6642 sz = CALL_INST_SIZE;
6643 }
6644 else if (ins == INS_push || ins == INS_push_hide)
6645 {
6646 // Pushing the address of a basicBlock will need a reloc
6647 // as the instruction uses the absolute address,
6648 // not a relative address
6649 if (emitComp->opts.compReloc)
6650 {
6651 id->idSetIsDspReloc();
6652 }
6653 sz = PUSH_INST_SIZE;
6654 }
6655 else
6656 {
6657 insGroup* tgt;
6658
6659 /* This is a jump - assume the worst */
6660
6661 sz = (ins == INS_jmp) ? JMP_SIZE_LARGE : JCC_SIZE_LARGE;
6662
6663 /* Can we guess at the jump distance? */
6664
6665 tgt = (insGroup*)emitCodeGetCookie(dst);
6666
6667 if (tgt)
6668 {
6669 int extra;
6670 UNATIVE_OFFSET srcOffs;
6671 int jmpDist;
6672
6673 assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
6674
6675 /* This is a backward jump - figure out the distance */
6676
6677 srcOffs = emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL;
6678
6679 /* Compute the distance estimate */
6680
6681 jmpDist = srcOffs - tgt->igOffs;
6682 assert((int)jmpDist > 0);
6683
6684 /* How much beyond the max. short distance does the jump go? */
6685
6686 extra = jmpDist + JMP_DIST_SMALL_MAX_NEG;
6687
6688#if DEBUG_EMIT
6689 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
6690 {
6691 if (INTERESTING_JUMP_NUM == 0)
6692 {
6693 printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6694 }
6695 printf("[0] Jump source is at %08X\n", srcOffs);
6696 printf("[0] Label block is at %08X\n", tgt->igOffs);
6697 printf("[0] Jump distance - %04X\n", jmpDist);
6698 if (extra > 0)
6699 {
6700 printf("[0] Distance excess = %d \n", extra);
6701 }
6702 }
6703#endif
6704
6705 if (extra <= 0 && !id->idjKeepLong)
6706 {
6707 /* Wonderful - this jump surely will be short */
6708
6709 emitSetShortJump(id);
6710 sz = JMP_SIZE_SMALL;
6711 }
6712 }
6713#if DEBUG_EMIT
6714 else
6715 {
6716 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
6717 {
6718 if (INTERESTING_JUMP_NUM == 0)
6719 {
6720 printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6721 }
6722 printf("[0] Jump source is at %04X/%08X\n", emitCurIGsize,
6723 emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL);
6724 printf("[0] Label block is unknown\n");
6725 }
6726 }
6727#endif
6728 }
6729
6730 id->idCodeSize(sz);
6731
6732 dispIns(id);
6733 emitCurIGsize += sz;
6734
6735 emitAdjustStackDepthPushPop(ins);
6736}
6737
6738#if !FEATURE_FIXED_OUT_ARGS
6739
6740//------------------------------------------------------------------------
6741// emitAdjustStackDepthPushPop: Adjust the current and maximum stack depth.
6742//
6743// Arguments:
6744// ins - the instruction. Only INS_push and INS_pop adjust the stack depth.
6745//
6746// Notes:
6747// 1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6748// 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6749//
6750void emitter::emitAdjustStackDepthPushPop(instruction ins)
6751{
6752 if (ins == INS_push)
6753 {
6754 emitCurStackLvl += emitCntStackDepth;
6755
6756 if (emitMaxStackDepth < emitCurStackLvl)
6757 {
6758 JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6759 emitMaxStackDepth = emitCurStackLvl;
6760 }
6761 }
6762 else if (ins == INS_pop)
6763 {
6764 emitCurStackLvl -= emitCntStackDepth;
6765 assert((int)emitCurStackLvl >= 0);
6766 }
6767}
6768
6769//------------------------------------------------------------------------
6770// emitAdjustStackDepth: Adjust the current and maximum stack depth.
6771//
6772// Arguments:
6773// ins - the instruction. Only INS_add and INS_sub adjust the stack depth.
6774// It is assumed that the add/sub is on the stack pointer.
6775// val - the number of bytes to add to or subtract from the stack pointer.
6776//
6777// Notes:
6778// 1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6779// 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6780//
6781void emitter::emitAdjustStackDepth(instruction ins, ssize_t val)
6782{
6783 // If we're in the prolog or epilog, or otherwise not tracking the stack depth, just return.
6784 if (emitCntStackDepth == 0)
6785 return;
6786
6787 if (ins == INS_sub)
6788 {
6789 S_UINT32 newStackLvl(emitCurStackLvl);
6790 newStackLvl += S_UINT32(val);
6791 noway_assert(!newStackLvl.IsOverflow());
6792
6793 emitCurStackLvl = newStackLvl.Value();
6794
6795 if (emitMaxStackDepth < emitCurStackLvl)
6796 {
6797 JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6798 emitMaxStackDepth = emitCurStackLvl;
6799 }
6800 }
6801 else if (ins == INS_add)
6802 {
6803 S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val);
6804 noway_assert(!newStackLvl.IsOverflow());
6805
6806 emitCurStackLvl = newStackLvl.Value();
6807 }
6808}
6809
6810#endif // EMIT_TRACK_STACK_DEPTH
6811
6812/*****************************************************************************
6813 *
6814 * Add a call instruction (direct or indirect).
6815 * argSize<0 means that the caller will pop the arguments
6816 *
6817 * The other arguments are interpreted depending on callType as shown:
6818 * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
6819 *
6820 * EC_FUNC_TOKEN : addr is the method address
6821 * EC_FUNC_TOKEN_INDIR : addr is the indirect method address
6822 * EC_FUNC_ADDR : addr is the absolute address of the function
6823 * EC_FUNC_VIRTUAL : "call [ireg+disp]"
6824 *
6825 * If callType is one of these emitCallTypes, addr has to be NULL.
6826 * EC_INDIR_R : "call ireg".
6827 * EC_INDIR_SR : "call lcl<disp>" (eg. call [ebp-8]).
6828 * EC_INDIR_C : "call clsVar<disp>" (eg. call [clsVarAddr])
6829 * EC_INDIR_ARD : "call [ireg+xreg*xmul+disp]"
6830 *
6831 */
6832
6833// clang-format off
6834void emitter::emitIns_Call(EmitCallType callType,
6835 CORINFO_METHOD_HANDLE methHnd,
6836 INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
6837 void* addr,
6838 ssize_t argSize,
6839 emitAttr retSize
6840 MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
6841 VARSET_VALARG_TP ptrVars,
6842 regMaskTP gcrefRegs,
6843 regMaskTP byrefRegs,
6844 IL_OFFSETX ilOffset, // = BAD_IL_OFFSET
6845 regNumber ireg, // = REG_NA
6846 regNumber xreg, // = REG_NA
6847 unsigned xmul, // = 0
6848 ssize_t disp, // = 0
6849 bool isJump) // = false
6850// clang-format on
6851{
6852 /* Sanity check the arguments depending on callType */
6853
6854 assert(callType < EC_COUNT);
6855 assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR && callType != EC_FUNC_ADDR) ||
6856 (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
6857 assert(callType != EC_FUNC_VIRTUAL || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0));
6858 assert(callType < EC_INDIR_R || callType == EC_INDIR_ARD || callType == EC_INDIR_C || addr == nullptr);
6859 assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
6860 assert(callType != EC_INDIR_SR ||
6861 (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp < (int)emitComp->lvaCount));
6862 assert(callType != EC_INDIR_C || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp != 0));
6863
6864 // Our stack level should be always greater than the bytes of arguments we push. Just
6865 // a sanity test.
6866 assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel);
6867
6868#if STACK_PROBES
6869 if (emitComp->opts.compNeedStackProbes)
6870 {
6871 // If we've pushed more than JIT_RESERVED_STACK allows, do an additional stack probe
6872 // Else, just make sure the prolog does a probe for us. Invariant we're trying
6873 // to get is that at any point we go out to unmanaged code, there is at least
6874 // CORINFO_STACKPROBE_DEPTH bytes of stack available.
6875 //
6876 // The reason why we are not doing one probe for the max size at the prolog
6877 // is that when don't have the max depth precomputed (it can depend on codegen),
6878 // and we need it at the time we generate locallocs
6879 //
6880 // Compiler::lvaAssignFrameOffsets sets up compLclFrameSize, which takes in
6881 // account everything except for the arguments of a callee.
6882 //
6883 //
6884 //
6885 if ((TARGET_POINTER_SIZE + // return address for call
6886 emitComp->genStackLevel +
6887 // Current stack level. This gets resetted on every
6888 // localloc and on the prolog (invariant is that
6889 // genStackLevel is 0 on basic block entry and exit and
6890 // after any alloca). genStackLevel will include any arguments
6891 // to the call, so we will insert an additional probe if
6892 // we've consumed more than JIT_RESERVED_STACK bytes
6893 // of stack, which is what the prolog probe covers (in
6894 // addition to the EE requested size)
6895 (emitComp->compHndBBtabCount * TARGET_POINTER_SIZE)
6896 // Hidden slots for calling finallys
6897 ) >= JIT_RESERVED_STACK)
6898 {
6899 // This happens when you have a call with a lot of arguments or a call is done
6900 // when there's a lot of stuff pushed on the stack (for example a call whos returned
6901 // value is an argument of another call that has pushed stuff on the stack)
6902 // This should't be very frequent.
6903 // For different values of JIT_RESERVED_STACK
6904 //
6905 // For mscorlib (109605 calls)
6906 //
6907 // 14190 probes in prologs (56760 bytes of code)
6908 //
6909 // JIT_RESERVED_STACK = 16 : 5452 extra probes
6910 // JIT_RESERVED_STACK = 32 : 1084 extra probes
6911 // JIT_RESERVED_STACK = 64 : 1 extra probes
6912 // JIT_RESERVED_STACK = 96 : 0 extra probes
6913 emitComp->genGenerateStackProbe();
6914 }
6915 else
6916 {
6917 if (emitComp->compGeneratingProlog || emitComp->compGeneratingEpilog)
6918 {
6919 if (emitComp->compStackProbePrologDone)
6920 {
6921 // We already generated a probe and this call is not happening
6922 // at a depth >= JIT_RESERVED_STACK, so nothing to do here
6923 }
6924 else
6925 {
6926 // 3 possible ways to get here:
6927 // - We are in an epilog and haven't generated a probe in the prolog.
6928 // This shouldn't happen as we don't generate any calls in epilog.
6929 // - We are in the prolog, but doing a call before generating the probe.
6930 // This shouldn't happen at all.
6931 // - We are in the prolog, did not generate a probe but now we need
6932 // to generate a probe because we need a call (eg: profiler). We'll
6933 // need a probe.
6934 //
6935 // In any case, we need a probe
6936
6937 // Ignore the profiler callback for now.
6938 if (!emitComp->compIsProfilerHookNeeded())
6939 {
6940 assert(!"We do not expect to get here");
6941 emitComp->genGenerateStackProbe();
6942 }
6943 }
6944 }
6945 else
6946 {
6947 // We will need a probe and will generate it in the prolog
6948 emitComp->genNeedPrologStackProbe = true;
6949 }
6950 }
6951 }
6952#endif // STACK_PROBES
6953
6954 // Trim out any callee-trashed registers from the live set.
6955 regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd);
6956 gcrefRegs &= savedSet;
6957 byrefRegs &= savedSet;
6958
6959#ifdef DEBUG
6960 if (EMIT_GC_VERBOSE)
6961 {
6962 printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
6963 dumpConvertedVarSet(emitComp, ptrVars);
6964 printf(", gcrefRegs=");
6965 printRegMaskInt(gcrefRegs);
6966 emitDispRegSet(gcrefRegs);
6967 printf(", byrefRegs=");
6968 printRegMaskInt(byrefRegs);
6969 emitDispRegSet(byrefRegs);
6970 printf("\n");
6971 }
6972#endif
6973
6974 /* Managed RetVal: emit sequence point for the call */
6975 if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
6976 {
6977 codeGen->genIPmappingAdd(ilOffset, false);
6978 }
6979
6980 /*
6981 We need to allocate the appropriate instruction descriptor based
6982 on whether this is a direct/indirect call, and whether we need to
6983 record an updated set of live GC variables.
6984
6985 The stats for a ton of classes is as follows:
6986
6987 Direct call w/o GC vars 220,216
6988 Indir. call w/o GC vars 144,781
6989
6990 Direct call with GC vars 9,440
6991 Indir. call with GC vars 5,768
6992 */
6993
6994 instrDesc* id;
6995
6996 assert(argSize % REGSIZE_BYTES == 0);
6997 int argCnt = (int)(argSize / (int)REGSIZE_BYTES); // we need a signed-divide
6998
6999 if (callType >= EC_FUNC_VIRTUAL)
7000 {
7001 /* Indirect call, virtual calls */
7002
7003 assert(callType == EC_FUNC_VIRTUAL || callType == EC_INDIR_R || callType == EC_INDIR_SR ||
7004 callType == EC_INDIR_C || callType == EC_INDIR_ARD);
7005
7006 id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs,
7007 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
7008 }
7009 else
7010 {
7011 // Helper/static/nonvirtual/function calls (direct or through handle),
7012 // and calls to an absolute addr.
7013
7014 assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR || callType == EC_FUNC_ADDR);
7015
7016 id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs,
7017 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
7018 }
7019
7020 /* Update the emitter's live GC ref sets */
7021
7022 VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
7023 emitThisGCrefRegs = gcrefRegs;
7024 emitThisByrefRegs = byrefRegs;
7025
7026 /* Set the instruction - special case jumping a function */
7027 instruction ins = INS_call;
7028
7029 if (isJump)
7030 {
7031 assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR);
7032 if (callType == EC_FUNC_TOKEN)
7033 {
7034 ins = INS_l_jmp;
7035 }
7036 else
7037 {
7038 ins = INS_i_jmp;
7039 }
7040 }
7041 id->idIns(ins);
7042
7043 id->idSetIsNoGC(emitNoGChelper(methHnd));
7044
7045 UNATIVE_OFFSET sz;
7046
7047 // Record the address: method, indirection, or funcptr
7048 if (callType >= EC_FUNC_VIRTUAL)
7049 {
7050 // This is an indirect call (either a virtual call or func ptr call)
7051
7052 switch (callType)
7053 {
7054 case EC_INDIR_C:
7055 // Indirect call using an absolute code address.
7056 // Must be marked as relocatable and is done at the
7057 // branch target location.
7058 goto CALL_ADDR_MODE;
7059
7060 case EC_INDIR_R: // the address is in a register
7061
7062 id->idSetIsCallRegPtr();
7063
7064 __fallthrough;
7065
7066 case EC_INDIR_ARD: // the address is an indirection
7067
7068 goto CALL_ADDR_MODE;
7069
7070 case EC_INDIR_SR: // the address is in a lcl var
7071
7072 id->idInsFmt(IF_SRD);
7073 // disp is really a lclVarNum
7074 noway_assert((unsigned)disp == (size_t)disp);
7075 id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, 0);
7076 sz = emitInsSizeSV(id, insCodeMR(INS_call), (unsigned)disp, 0);
7077
7078 break;
7079
7080 case EC_FUNC_VIRTUAL:
7081
7082 CALL_ADDR_MODE:
7083
7084 // fall-through
7085
7086 // The function is "ireg" if id->idIsCallRegPtr(),
7087 // else [ireg+xmul*xreg+disp]
7088
7089 id->idInsFmt(IF_ARD);
7090
7091 id->idAddr()->iiaAddrMode.amBaseReg = ireg;
7092 id->idAddr()->iiaAddrMode.amIndxReg = xreg;
7093 id->idAddr()->iiaAddrMode.amScale = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1;
7094
7095 sz = emitInsSizeAM(id, insCodeMR(INS_call));
7096
7097 if (ireg == REG_NA && xreg == REG_NA)
7098 {
7099 if (codeGen->genCodeIndirAddrNeedsReloc(disp))
7100 {
7101 id->idSetIsDspReloc();
7102 }
7103#ifdef _TARGET_AMD64_
7104 else
7105 {
7106 // An absolute indir address that doesn't need reloc should fit within 32-bits
7107 // to be encoded as offset relative to zero. This addr mode requires an extra
7108 // SIB byte
7109 noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
7110 sz++;
7111 }
7112#endif //_TARGET_AMD64_
7113 }
7114
7115 break;
7116
7117 default:
7118 NO_WAY("unexpected instruction");
7119 break;
7120 }
7121 }
7122 else if (callType == EC_FUNC_TOKEN_INDIR)
7123 {
7124 /* "call [method_addr]" */
7125
7126 assert(addr != nullptr);
7127
7128 id->idInsFmt(IF_METHPTR);
7129 id->idAddr()->iiaAddr = (BYTE*)addr;
7130 sz = 6;
7131
7132 // Since this is an indirect call through a pointer and we don't
7133 // currently pass in emitAttr into this function, we query codegen
7134 // whether addr needs a reloc.
7135 if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr))
7136 {
7137 id->idSetIsDspReloc();
7138 }
7139#ifdef _TARGET_AMD64_
7140 else
7141 {
7142 // An absolute indir address that doesn't need reloc should fit within 32-bits
7143 // to be encoded as offset relative to zero. This addr mode requires an extra
7144 // SIB byte
7145 noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
7146 sz++;
7147 }
7148#endif //_TARGET_AMD64_
7149 }
7150 else
7151 {
7152 /* This is a simple direct call: "call helper/method/addr" */
7153
7154 assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
7155
7156 assert(addr != nullptr);
7157
7158 id->idInsFmt(IF_METHOD);
7159 sz = 5;
7160
7161 id->idAddr()->iiaAddr = (BYTE*)addr;
7162
7163 if (callType == EC_FUNC_ADDR)
7164 {
7165 id->idSetIsCallAddr();
7166 }
7167
7168 // Direct call to a method and no addr indirection is needed.
7169 if (codeGen->genCodeAddrNeedsReloc((size_t)addr))
7170 {
7171 id->idSetIsDspReloc();
7172 }
7173 }
7174
7175#ifdef DEBUG
7176 if (emitComp->verbose && 0)
7177 {
7178 if (id->idIsLargeCall())
7179 {
7180 if (callType >= EC_FUNC_VIRTUAL)
7181 {
7182 printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
7183 VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
7184 }
7185 else
7186 {
7187 printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
7188 VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
7189 }
7190 }
7191 }
7192
7193 id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
7194 id->idDebugOnlyInfo()->idCallSig = sigInfo;
7195#endif // DEBUG
7196
7197#ifdef LATE_DISASM
7198 if (addr != nullptr)
7199 {
7200 codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
7201 }
7202#endif // LATE_DISASM
7203
7204 id->idCodeSize(sz);
7205
7206 dispIns(id);
7207 emitCurIGsize += sz;
7208
7209#if !FEATURE_FIXED_OUT_ARGS
7210
7211 /* The call will pop the arguments */
7212
7213 if (emitCntStackDepth && argSize > 0)
7214 {
7215 noway_assert((ssize_t)emitCurStackLvl >= argSize);
7216 emitCurStackLvl -= (int)argSize;
7217 assert((int)emitCurStackLvl >= 0);
7218 }
7219
7220#endif // !FEATURE_FIXED_OUT_ARGS
7221}
7222
7223#ifdef DEBUG
7224/*****************************************************************************
7225 *
7226 * The following called for each recorded instruction -- use for debugging.
7227 */
7228void emitter::emitInsSanityCheck(instrDesc* id)
7229{
7230 // make certain you only try to put relocs on things that can have them.
7231 ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
7232 if ((idOp == ID_OP_SCNS) && id->idIsLargeCns())
7233 {
7234 idOp = ID_OP_CNS;
7235 }
7236
7237 if (id->idIsDspReloc())
7238 {
7239 assert(idOp == ID_OP_NONE || idOp == ID_OP_AMD || idOp == ID_OP_DSP || idOp == ID_OP_DSP_CNS ||
7240 idOp == ID_OP_AMD_CNS || idOp == ID_OP_SPEC || idOp == ID_OP_CALL || idOp == ID_OP_JMP ||
7241 idOp == ID_OP_LBL);
7242 }
7243
7244 if (id->idIsCnsReloc())
7245 {
7246 assert(idOp == ID_OP_CNS || idOp == ID_OP_AMD_CNS || idOp == ID_OP_DSP_CNS || idOp == ID_OP_SPEC ||
7247 idOp == ID_OP_CALL || idOp == ID_OP_JMP);
7248 }
7249}
7250#endif
7251
7252/*****************************************************************************
7253 *
7254 * Return the allocated size (in bytes) of the given instruction descriptor.
7255 */
7256
7257size_t emitter::emitSizeOfInsDsc(instrDesc* id)
7258{
7259 if (emitIsScnsInsDsc(id))
7260 {
7261 return SMALL_IDSC_SIZE;
7262 }
7263
7264 assert((unsigned)id->idInsFmt() < emitFmtCount);
7265
7266 ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
7267
7268 // An INS_call instruction may use a "fat" direct/indirect call descriptor
7269 // except for a local call to a label (i.e. call to a finally)
7270 // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the
7271 // INS_call instruction always uses one of these idOps
7272
7273 if (id->idIns() == INS_call)
7274 {
7275 assert(idOp == ID_OP_CALL || // is a direct call
7276 idOp == ID_OP_SPEC || // is a indirect call
7277 idOp == ID_OP_JMP); // is a local call to finally clause
7278 }
7279
7280 switch (idOp)
7281 {
7282 case ID_OP_NONE:
7283 break;
7284
7285 case ID_OP_LBL:
7286 return sizeof(instrDescLbl);
7287
7288 case ID_OP_JMP:
7289 return sizeof(instrDescJmp);
7290
7291 case ID_OP_CALL:
7292 case ID_OP_SPEC:
7293 if (id->idIsLargeCall())
7294 {
7295 /* Must be a "fat" indirect call descriptor */
7296 return sizeof(instrDescCGCA);
7297 }
7298
7299 __fallthrough;
7300
7301 case ID_OP_SCNS:
7302 case ID_OP_CNS:
7303 case ID_OP_DSP:
7304 case ID_OP_DSP_CNS:
7305 case ID_OP_AMD:
7306 case ID_OP_AMD_CNS:
7307 if (id->idIsLargeCns())
7308 {
7309 if (id->idIsLargeDsp())
7310 {
7311 return sizeof(instrDescCnsDsp);
7312 }
7313 else
7314 {
7315 return sizeof(instrDescCns);
7316 }
7317 }
7318 else
7319 {
7320 if (id->idIsLargeDsp())
7321 {
7322 return sizeof(instrDescDsp);
7323 }
7324 else
7325 {
7326 return sizeof(instrDesc);
7327 }
7328 }
7329
7330 default:
7331 NO_WAY("unexpected instruction descriptor format");
7332 break;
7333 }
7334
7335 return sizeof(instrDesc);
7336}
7337
7338/*****************************************************************************/
7339#ifdef DEBUG
7340/*****************************************************************************
7341 *
7342 * Return a string that represents the given register.
7343 */
7344
7345const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
7346{
7347 static char rb[2][128];
7348 static unsigned char rbc = 0;
7349
7350 const char* rn = emitComp->compRegVarName(reg, varName);
7351
7352#ifdef _TARGET_AMD64_
7353 char suffix = '\0';
7354
7355 switch (EA_SIZE(attr))
7356 {
7357 case EA_32BYTE:
7358 return emitYMMregName(reg);
7359
7360 case EA_16BYTE:
7361 return emitXMMregName(reg);
7362
7363 case EA_8BYTE:
7364 if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
7365 {
7366 return emitXMMregName(reg);
7367 }
7368 break;
7369
7370 case EA_4BYTE:
7371 if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
7372 {
7373 return emitXMMregName(reg);
7374 }
7375
7376 if (reg > REG_R15)
7377 {
7378 break;
7379 }
7380
7381 if (reg > REG_RDI)
7382 {
7383 suffix = 'd';
7384 goto APPEND_SUFFIX;
7385 }
7386 rbc = (rbc + 1) % 2;
7387 rb[rbc][0] = 'e';
7388 rb[rbc][1] = rn[1];
7389 rb[rbc][2] = rn[2];
7390 rb[rbc][3] = 0;
7391 rn = rb[rbc];
7392 break;
7393
7394 case EA_2BYTE:
7395 if (reg > REG_RDI)
7396 {
7397 suffix = 'w';
7398 goto APPEND_SUFFIX;
7399 }
7400 rn++;
7401 break;
7402
7403 case EA_1BYTE:
7404 if (reg > REG_RDI)
7405 {
7406 suffix = 'b';
7407 APPEND_SUFFIX:
7408 rbc = (rbc + 1) % 2;
7409 rb[rbc][0] = rn[0];
7410 rb[rbc][1] = rn[1];
7411 if (rn[2])
7412 {
7413 assert(rn[3] == 0);
7414 rb[rbc][2] = rn[2];
7415 rb[rbc][3] = suffix;
7416 rb[rbc][4] = 0;
7417 }
7418 else
7419 {
7420 rb[rbc][2] = suffix;
7421 rb[rbc][3] = 0;
7422 }
7423 }
7424 else
7425 {
7426 rbc = (rbc + 1) % 2;
7427 rb[rbc][0] = rn[1];
7428 if (reg < 4)
7429 {
7430 rb[rbc][1] = 'l';
7431 rb[rbc][2] = 0;
7432 }
7433 else
7434 {
7435 rb[rbc][1] = rn[2];
7436 rb[rbc][2] = 'l';
7437 rb[rbc][3] = 0;
7438 }
7439 }
7440
7441 rn = rb[rbc];
7442 break;
7443
7444 default:
7445 break;
7446 }
7447#endif // _TARGET_AMD64_
7448
7449#ifdef _TARGET_X86_
7450 assert(strlen(rn) >= 3);
7451
7452 switch (EA_SIZE(attr))
7453 {
7454 case EA_32BYTE:
7455 return emitYMMregName(reg);
7456
7457 case EA_16BYTE:
7458 return emitXMMregName(reg);
7459
7460 case EA_8BYTE:
7461 if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
7462 {
7463 return emitXMMregName(reg);
7464 }
7465 break;
7466
7467 case EA_4BYTE:
7468 if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
7469 {
7470 return emitXMMregName(reg);
7471 }
7472 break;
7473
7474 case EA_2BYTE:
7475 rn++;
7476 break;
7477
7478 case EA_1BYTE:
7479 rbc = (rbc + 1) % 2;
7480 rb[rbc][0] = rn[1];
7481 rb[rbc][1] = 'l';
7482 strcpy_s(&rb[rbc][2], sizeof(rb[0]) - 2, rn + 3);
7483
7484 rn = rb[rbc];
7485 break;
7486
7487 default:
7488 break;
7489 }
7490#endif // _TARGET_X86_
7491
7492#if 0
7493 // The following is useful if you want register names to be tagged with * or ^ representing gcref or byref, respectively,
7494 // however it's possibly not interesting most of the time.
7495 if (EA_IS_GCREF(attr) || EA_IS_BYREF(attr))
7496 {
7497 if (rn != rb[rbc])
7498 {
7499 rbc = (rbc+1)%2;
7500 strcpy_s(rb[rbc], sizeof(rb[rbc]), rn);
7501 rn = rb[rbc];
7502 }
7503
7504 if (EA_IS_GCREF(attr))
7505 {
7506 strcat_s(rb[rbc], sizeof(rb[rbc]), "*");
7507 }
7508 else if (EA_IS_BYREF(attr))
7509 {
7510 strcat_s(rb[rbc], sizeof(rb[rbc]), "^");
7511 }
7512 }
7513#endif // 0
7514
7515 return rn;
7516}
7517
7518/*****************************************************************************
7519 *
7520 * Return a string that represents the given FP register.
7521 */
7522
7523const char* emitter::emitFPregName(unsigned reg, bool varName)
7524{
7525 assert(reg < REG_COUNT);
7526
7527 return emitComp->compFPregVarName((regNumber)(reg), varName);
7528}
7529
7530/*****************************************************************************
7531 *
7532 * Return a string that represents the given XMM register.
7533 */
7534
7535const char* emitter::emitXMMregName(unsigned reg)
7536{
7537 static const char* const regNames[] = {
7538#define REGDEF(name, rnum, mask, sname) "x" sname,
7539#include "register.h"
7540 };
7541
7542 assert(reg < REG_COUNT);
7543 assert(reg < _countof(regNames));
7544
7545 return regNames[reg];
7546}
7547
7548/*****************************************************************************
7549 *
7550 * Return a string that represents the given YMM register.
7551 */
7552
7553const char* emitter::emitYMMregName(unsigned reg)
7554{
7555 static const char* const regNames[] = {
7556#define REGDEF(name, rnum, mask, sname) "y" sname,
7557#include "register.h"
7558 };
7559
7560 assert(reg < REG_COUNT);
7561 assert(reg < _countof(regNames));
7562
7563 return regNames[reg];
7564}
7565
7566/*****************************************************************************
7567 *
7568 * Display a static data member reference.
7569 */
7570
7571void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc /* = false */)
7572{
7573 int doffs;
7574
7575 /* Filter out the special case of fs:[offs] */
7576
7577 // Munge any pointers if we want diff-able disassembly
7578 if (emitComp->opts.disDiffable)
7579 {
7580 ssize_t top12bits = (offs >> 20);
7581 if ((top12bits != 0) && (top12bits != -1))
7582 {
7583 offs = 0xD1FFAB1E;
7584 }
7585 }
7586
7587 if (fldHnd == FLD_GLOBAL_FS)
7588 {
7589 printf("FS:[0x%04X]", offs);
7590 return;
7591 }
7592
7593 if (fldHnd == FLD_GLOBAL_DS)
7594 {
7595 printf("[0x%04X]", offs);
7596 return;
7597 }
7598
7599 printf("[");
7600
7601 doffs = Compiler::eeGetJitDataOffs(fldHnd);
7602
7603 if (reloc)
7604 {
7605 printf("reloc ");
7606 }
7607
7608 if (doffs >= 0)
7609 {
7610 if (doffs & 1)
7611 {
7612 printf("@CNS%02u", doffs - 1);
7613 }
7614 else
7615 {
7616 printf("@RWD%02u", doffs);
7617 }
7618
7619 if (offs)
7620 {
7621 printf("%+Id", offs);
7622 }
7623 }
7624 else
7625 {
7626 printf("classVar[%#x]", emitComp->dspPtr(fldHnd));
7627
7628 if (offs)
7629 {
7630 printf("%+Id", offs);
7631 }
7632 }
7633
7634 printf("]");
7635
7636 if (emitComp->opts.varNames && offs < 0)
7637 {
7638 printf("'%s", emitComp->eeGetFieldName(fldHnd));
7639 if (offs)
7640 {
7641 printf("%+Id", offs);
7642 }
7643 printf("'");
7644 }
7645}
7646
7647/*****************************************************************************
7648 *
7649 * Display a stack frame reference.
7650 */
7651
7652void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
7653{
7654 int addr;
7655 bool bEBP;
7656
7657 printf("[");
7658
7659 if (!asmfm || emitComp->lvaDoneFrameLayout == Compiler::NO_FRAME_LAYOUT)
7660 {
7661 if (varx < 0)
7662 {
7663 printf("TEMP_%02u", -varx);
7664 }
7665 else
7666 {
7667 printf("V%02u", +varx);
7668 }
7669
7670 if (disp < 0)
7671 {
7672 printf("-0x%X", -disp);
7673 }
7674 else if (disp > 0)
7675 {
7676 printf("+0x%X", +disp);
7677 }
7678 }
7679
7680 if (emitComp->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
7681 {
7682 if (!asmfm)
7683 {
7684 printf(" ");
7685 }
7686
7687 addr = emitComp->lvaFrameAddress(varx, &bEBP) + disp;
7688
7689 if (bEBP)
7690 {
7691 printf(STR_FPBASE);
7692
7693 if (addr < 0)
7694 {
7695 printf("-%02XH", -addr);
7696 }
7697 else if (addr > 0)
7698 {
7699 printf("+%02XH", addr);
7700 }
7701 }
7702 else
7703 {
7704 /* Adjust the offset by amount currently pushed on the stack */
7705
7706 printf(STR_SPBASE);
7707
7708 if (addr < 0)
7709 {
7710 printf("-%02XH", -addr);
7711 }
7712 else if (addr > 0)
7713 {
7714 printf("+%02XH", addr);
7715 }
7716
7717#if !FEATURE_FIXED_OUT_ARGS
7718
7719 if (emitCurStackLvl)
7720 printf("+%02XH", emitCurStackLvl);
7721
7722#endif // !FEATURE_FIXED_OUT_ARGS
7723 }
7724 }
7725
7726 printf("]");
7727
7728 if (varx >= 0 && emitComp->opts.varNames)
7729 {
7730 LclVarDsc* varDsc;
7731 const char* varName;
7732
7733 assert((unsigned)varx < emitComp->lvaCount);
7734 varDsc = emitComp->lvaTable + varx;
7735 varName = emitComp->compLocalVarName(varx, offs);
7736
7737 if (varName)
7738 {
7739 printf("'%s", varName);
7740
7741 if (disp < 0)
7742 {
7743 printf("-%d", -disp);
7744 }
7745 else if (disp > 0)
7746 {
7747 printf("+%d", +disp);
7748 }
7749
7750 printf("'");
7751 }
7752 }
7753}
7754
7755/*****************************************************************************
7756 *
7757 * Display an reloc value
7758 * If we are formatting for an assembly listing don't print the hex value
7759 * since it will prevent us from doing assembly diffs
7760 */
7761void emitter::emitDispReloc(ssize_t value)
7762{
7763 if (emitComp->opts.disAsm)
7764 {
7765 printf("(reloc)");
7766 }
7767 else
7768 {
7769 printf("(reloc 0x%Ix)", emitComp->dspPtr(value));
7770 }
7771}
7772
7773/*****************************************************************************
7774 *
7775 * Display an address mode.
7776 */
7777
7778void emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
7779{
7780 bool nsep = false;
7781 ssize_t disp;
7782
7783 unsigned jtno = 0;
7784 dataSection* jdsc = nullptr;
7785
7786 /* The displacement field is in an unusual place for calls */
7787
7788 disp = (id->idIns() == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
7789
7790 /* Display a jump table label if this is a switch table jump */
7791
7792 if (id->idIns() == INS_i_jmp)
7793 {
7794 UNATIVE_OFFSET offs = 0;
7795
7796 /* Find the appropriate entry in the data section list */
7797
7798 for (jdsc = emitConsDsc.dsdList, jtno = 0; jdsc; jdsc = jdsc->dsNext)
7799 {
7800 UNATIVE_OFFSET size = jdsc->dsSize;
7801
7802 /* Is this a label table? */
7803
7804 if (size & 1)
7805 {
7806 size--;
7807 jtno++;
7808
7809 if (offs == id->idDebugOnlyInfo()->idMemCookie)
7810 {
7811 break;
7812 }
7813 }
7814
7815 offs += size;
7816 }
7817
7818 /* If we've found a matching entry then is a table jump */
7819
7820 if (jdsc)
7821 {
7822 if (id->idIsDspReloc())
7823 {
7824 printf("reloc ");
7825 }
7826 printf("J_M%03u_DS%02u", Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie);
7827 }
7828
7829 disp -= id->idDebugOnlyInfo()->idMemCookie;
7830 }
7831
7832 bool frameRef = false;
7833
7834 printf("[");
7835
7836 if (id->idAddr()->iiaAddrMode.amBaseReg != REG_NA)
7837 {
7838 printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
7839 nsep = true;
7840 if (id->idAddr()->iiaAddrMode.amBaseReg == REG_ESP)
7841 {
7842 frameRef = true;
7843 }
7844 else if (emitComp->isFramePointerUsed() && id->idAddr()->iiaAddrMode.amBaseReg == REG_EBP)
7845 {
7846 frameRef = true;
7847 }
7848 }
7849
7850 if (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA)
7851 {
7852 size_t scale = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
7853
7854 if (nsep)
7855 {
7856 printf("+");
7857 }
7858 if (scale > 1)
7859 {
7860 printf("%u*", scale);
7861 }
7862 printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amIndxReg));
7863 nsep = true;
7864 }
7865
7866 if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp))
7867 {
7868 if (nsep)
7869 {
7870 printf("+");
7871 }
7872 emitDispReloc(disp);
7873 }
7874 else
7875 {
7876 // Munge any pointers if we want diff-able disassembly
7877 // It's assumed to be a pointer when disp is outside of the range (-1M, +1M); top bits are not 0 or -1
7878 if (!frameRef && emitComp->opts.disDiffable && (static_cast<size_t>((disp >> 20) + 1) > 1))
7879 {
7880 if (nsep)
7881 {
7882 printf("+");
7883 }
7884 printf("D1FFAB1EH");
7885 }
7886 else if (disp > 0)
7887 {
7888 if (nsep)
7889 {
7890 printf("+");
7891 }
7892 if (frameRef)
7893 {
7894 printf("%02XH", disp);
7895 }
7896 else if (disp < 1000)
7897 {
7898 printf("%d", disp);
7899 }
7900 else if (disp <= 0xFFFF)
7901 {
7902 printf("%04XH", disp);
7903 }
7904 else
7905 {
7906 printf("%08XH", disp);
7907 }
7908 }
7909 else if (disp < 0)
7910 {
7911 if (frameRef)
7912 {
7913 printf("-%02XH", -disp);
7914 }
7915 else if (disp > -1000)
7916 {
7917 printf("-%d", -disp);
7918 }
7919 else if (disp >= -0xFFFF)
7920 {
7921 printf("-%04XH", -disp);
7922 }
7923 else if (disp < -0xFFFFFF)
7924 {
7925 if (nsep)
7926 {
7927 printf("+");
7928 }
7929 printf("%08XH", disp);
7930 }
7931 else
7932 {
7933 printf("-%08XH", -disp);
7934 }
7935 }
7936 else if (!nsep)
7937 {
7938 printf("%04XH", disp);
7939 }
7940 }
7941
7942 printf("]");
7943
7944 // pretty print string if it looks like one
7945 if ((id->idGCref() == GCT_GCREF) && (id->idIns() == INS_mov) && (id->idAddr()->iiaAddrMode.amBaseReg == REG_NA))
7946 {
7947 const wchar_t* str = emitComp->eeGetCPString(disp);
7948 if (str != nullptr)
7949 {
7950 printf(" '%S'", str);
7951 }
7952 }
7953
7954 if (jdsc && !noDetail)
7955 {
7956 unsigned cnt = (jdsc->dsSize - 1) / TARGET_POINTER_SIZE;
7957 BasicBlock** bbp = (BasicBlock**)jdsc->dsCont;
7958
7959#ifdef _TARGET_AMD64_
7960#define SIZE_LETTER "Q"
7961#else
7962#define SIZE_LETTER "D"
7963#endif
7964 printf("\n\n J_M%03u_DS%02u LABEL " SIZE_LETTER "WORD", Compiler::s_compMethodsCount, jtno);
7965
7966 /* Display the label table (it's stored as "BasicBlock*" values) */
7967
7968 do
7969 {
7970 insGroup* lab;
7971
7972 /* Convert the BasicBlock* value to an IG address */
7973
7974 lab = (insGroup*)emitCodeGetCookie(*bbp++);
7975 assert(lab);
7976
7977 printf("\n D" SIZE_LETTER " G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum);
7978 } while (--cnt);
7979 }
7980}
7981
7982/*****************************************************************************
7983 *
7984 * If the given instruction is a shift, display the 2nd operand.
7985 */
7986
7987void emitter::emitDispShift(instruction ins, int cnt)
7988{
7989 switch (ins)
7990 {
7991 case INS_rcl_1:
7992 case INS_rcr_1:
7993 case INS_rol_1:
7994 case INS_ror_1:
7995 case INS_shl_1:
7996 case INS_shr_1:
7997 case INS_sar_1:
7998 printf(", 1");
7999 break;
8000
8001 case INS_rcl:
8002 case INS_rcr:
8003 case INS_rol:
8004 case INS_ror:
8005 case INS_shl:
8006 case INS_shr:
8007 case INS_sar:
8008 printf(", cl");
8009 break;
8010
8011 case INS_rcl_N:
8012 case INS_rcr_N:
8013 case INS_rol_N:
8014 case INS_ror_N:
8015 case INS_shl_N:
8016 case INS_shr_N:
8017 case INS_sar_N:
8018 printf(", %d", cnt);
8019 break;
8020
8021 default:
8022 break;
8023 }
8024}
8025
8026/*****************************************************************************
8027 *
8028 * Display (optionally) the bytes for the instruction encoding in hex
8029 */
8030
8031void emitter::emitDispInsHex(BYTE* code, size_t sz)
8032{
8033 // We do not display the instruction hex if we want diff-able disassembly
8034 if (!emitComp->opts.disDiffable)
8035 {
8036#ifdef _TARGET_AMD64_
8037 // how many bytes per instruction we format for
8038 const size_t digits = 10;
8039#else // _TARGET_X86
8040 const size_t digits = 6;
8041#endif
8042 printf(" ");
8043 for (unsigned i = 0; i < sz; i++)
8044 {
8045 printf("%02X", (*((BYTE*)(code + i))));
8046 }
8047
8048 if (sz < digits)
8049 {
8050 printf("%.*s", 2 * (digits - sz), " ");
8051 }
8052 }
8053}
8054
8055/*****************************************************************************
8056 *
8057 * Display the given instruction.
8058 */
8059
8060void emitter::emitDispIns(
8061 instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
8062{
8063 emitAttr attr;
8064 const char* sstr;
8065
8066 instruction ins = id->idIns();
8067
8068 if (emitComp->verbose)
8069 {
8070 unsigned idNum = id->idDebugOnlyInfo()->idNum;
8071 printf("IN%04x: ", idNum);
8072 }
8073
8074#define ID_INFO_DSP_RELOC ((bool)(id->idIsDspReloc()))
8075
8076 /* Display a constant value if the instruction references one */
8077
8078 if (!isNew)
8079 {
8080 switch (id->idInsFmt())
8081 {
8082 int offs;
8083
8084 case IF_MRD_RRD:
8085 case IF_MWR_RRD:
8086 case IF_MRW_RRD:
8087
8088 case IF_RRD_MRD:
8089 case IF_RWR_MRD:
8090 case IF_RRW_MRD:
8091
8092 case IF_MRD_CNS:
8093 case IF_MWR_CNS:
8094 case IF_MRW_CNS:
8095 case IF_MRW_SHF:
8096
8097 case IF_MRD:
8098 case IF_MWR:
8099 case IF_MRW:
8100
8101 case IF_MRD_OFF:
8102
8103 /* Is this actually a reference to a data section? */
8104
8105 offs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
8106
8107 if (offs >= 0)
8108 {
8109 void* addr;
8110
8111 /* Display a data section reference */
8112
8113 assert((unsigned)offs < emitConsDsc.dsdOffs);
8114 addr = emitConsBlock ? emitConsBlock + offs : nullptr;
8115
8116#if 0
8117 // TODO-XArch-Cleanup: Fix or remove this code.
8118 /* Is the operand an integer or floating-point value? */
8119
8120 bool isFP = false;
8121
8122 if (CodeGen::instIsFP(id->idIns()))
8123 {
8124 switch (id->idIns())
8125 {
8126 case INS_fild:
8127 case INS_fildl:
8128 break;
8129
8130 default:
8131 isFP = true;
8132 break;
8133 }
8134 }
8135
8136 if (offs & 1)
8137 printf("@CNS%02u", offs);
8138 else
8139 printf("@RWD%02u", offs);
8140
8141 printf(" ");
8142
8143 if (addr)
8144 {
8145 addr = 0;
8146 // TODO-XArch-Bug?:
8147 // This was busted by switching the order
8148 // in which we output the code block vs.
8149 // the data blocks -- when we get here,
8150 // the data block has not been filled in
8151 // yet, so we'll display garbage.
8152
8153 if (isFP)
8154 {
8155 if (id->idOpSize() == EA_4BYTE)
8156 printf("DF %f \n", addr ? *(float *)addr : 0);
8157 else
8158 printf("DQ %lf\n", addr ? *(double *)addr : 0);
8159 }
8160 else
8161 {
8162 if (id->idOpSize() <= EA_4BYTE)
8163 printf("DD %d \n", addr ? *(int *)addr : 0);
8164 else
8165 printf("DQ %D \n", addr ? *(__int64 *)addr : 0);
8166 }
8167 }
8168#endif
8169 }
8170 break;
8171
8172 default:
8173 break;
8174 }
8175 }
8176
8177 // printf("[F=%s] " , emitIfName(id->idInsFmt()));
8178 // printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
8179 // printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
8180 // printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
8181 // printf("[A=%08X] " , emitSimpleStkMask);
8182 // printf("[A=%08X] " , emitSimpleByrefStkMask);
8183 // printf("[L=%02u] " , id->idCodeSize());
8184
8185 if (!emitComp->opts.dspEmit && !isNew && !asmfm)
8186 {
8187 doffs = true;
8188 }
8189
8190 /* Display the instruction offset */
8191
8192 emitDispInsOffs(offset, doffs);
8193
8194 if (code != nullptr)
8195 {
8196 /* Display the instruction hex code */
8197
8198 emitDispInsHex(code, sz);
8199 }
8200
8201 /* Display the instruction name */
8202
8203 sstr = codeGen->genInsName(ins);
8204
8205 if (IsAVXInstruction(ins) && !IsBMIInstruction(ins))
8206 {
8207 printf(" v%-8s", sstr);
8208 }
8209 else
8210 {
8211 printf(" %-9s", sstr);
8212 }
8213#ifndef FEATURE_PAL
8214 if (strnlen_s(sstr, 10) >= 8)
8215#else // FEATURE_PAL
8216 if (strnlen(sstr, 10) >= 8)
8217#endif // FEATURE_PAL
8218 {
8219 printf(" ");
8220 }
8221
8222 /* By now the size better be set to something */
8223
8224 assert(emitInstCodeSz(id) || emitInstHasNoCode(ins));
8225
8226 /* Figure out the operand size */
8227
8228 if (id->idGCref() == GCT_GCREF)
8229 {
8230 attr = EA_GCREF;
8231 sstr = "gword ptr ";
8232 }
8233 else if (id->idGCref() == GCT_BYREF)
8234 {
8235 attr = EA_BYREF;
8236 sstr = "bword ptr ";
8237 }
8238 else
8239 {
8240 attr = id->idOpSize();
8241 sstr = codeGen->genSizeStr(attr);
8242
8243 if (ins == INS_lea)
8244 {
8245#ifdef _TARGET_AMD64_
8246 assert((attr == EA_4BYTE) || (attr == EA_8BYTE));
8247#else
8248 assert(attr == EA_4BYTE);
8249#endif
8250 sstr = "";
8251 }
8252 }
8253
8254 /* Now see what instruction format we've got */
8255
8256 // First print the implicit register usage
8257 if (instrHasImplicitRegPairDest(ins))
8258 {
8259 printf("%s:%s, ", emitRegName(REG_EDX, id->idOpSize()), emitRegName(REG_EAX, id->idOpSize()));
8260 }
8261 else if (instrIs3opImul(ins))
8262 {
8263 regNumber tgtReg = inst3opImulReg(ins);
8264 printf("%s, ", emitRegName(tgtReg, id->idOpSize()));
8265 }
8266
8267 switch (id->idInsFmt())
8268 {
8269 ssize_t val;
8270 ssize_t offs;
8271 CnsVal cnsVal;
8272 const char* methodName;
8273
8274 case IF_CNS:
8275 val = emitGetInsSC(id);
8276#ifdef _TARGET_AMD64_
8277 // no 8-byte immediates allowed here!
8278 assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8279#endif
8280 if (id->idIsCnsReloc())
8281 {
8282 emitDispReloc(val);
8283 }
8284 else
8285 {
8286 PRINT_CONSTANT:
8287 // Munge any pointers if we want diff-able disassembly
8288 if (emitComp->opts.disDiffable)
8289 {
8290 ssize_t top14bits = (val >> 18);
8291 if ((top14bits != 0) && (top14bits != -1))
8292 {
8293 val = 0xD1FFAB1E;
8294 }
8295 }
8296 if ((val > -1000) && (val < 1000))
8297 {
8298 printf("%d", val);
8299 }
8300 else if ((val > 0) || (val < -0xFFFFFF))
8301 {
8302 printf("0x%IX", val);
8303 }
8304 else
8305 { // (val < 0)
8306 printf("-0x%IX", -val);
8307 }
8308 }
8309 break;
8310
8311 case IF_ARD:
8312 case IF_AWR:
8313 case IF_ARW:
8314
8315 if (ins == INS_call && id->idIsCallRegPtr())
8316 {
8317 printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
8318 break;
8319 }
8320
8321 printf("%s", sstr);
8322 emitDispAddrMode(id, isNew);
8323 emitDispShift(ins);
8324
8325 if (ins == INS_call)
8326 {
8327 assert(id->idInsFmt() == IF_ARD);
8328
8329 /* Ignore indirect calls */
8330
8331 if (id->idDebugOnlyInfo()->idMemCookie == 0)
8332 {
8333 break;
8334 }
8335
8336 assert(id->idDebugOnlyInfo()->idMemCookie);
8337
8338 /* This is a virtual call */
8339
8340 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
8341 printf("%s", methodName);
8342 }
8343 break;
8344
8345 case IF_RRD_ARD:
8346 case IF_RWR_ARD:
8347 case IF_RRW_ARD:
8348#ifdef _TARGET_AMD64_
8349 if (ins == INS_movsxd)
8350 {
8351 printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
8352 }
8353 else
8354#endif
8355 if (ins == INS_movsx || ins == INS_movzx)
8356 {
8357 printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
8358 }
8359 else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8360 {
8361 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8362 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8363 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8364 }
8365 else
8366 {
8367 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8368 }
8369 emitDispAddrMode(id);
8370 break;
8371
8372 case IF_RRW_ARD_CNS:
8373 case IF_RWR_ARD_CNS:
8374 {
8375 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8376 emitDispAddrMode(id);
8377 emitGetInsAmdCns(id, &cnsVal);
8378
8379 val = cnsVal.cnsVal;
8380 printf(", ");
8381
8382 if (cnsVal.cnsReloc)
8383 {
8384 emitDispReloc(val);
8385 }
8386 else
8387 {
8388 goto PRINT_CONSTANT;
8389 }
8390
8391 break;
8392 }
8393
8394 case IF_AWR_RRD_CNS:
8395 {
8396 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
8397 // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
8398 sstr = codeGen->genSizeStr(EA_ATTR(16));
8399 printf(sstr);
8400 emitDispAddrMode(id);
8401 printf(", %s", emitRegName(id->idReg1(), attr));
8402
8403 emitGetInsAmdCns(id, &cnsVal);
8404
8405 val = cnsVal.cnsVal;
8406 printf(", ");
8407
8408 if (cnsVal.cnsReloc)
8409 {
8410 emitDispReloc(val);
8411 }
8412 else
8413 {
8414 goto PRINT_CONSTANT;
8415 }
8416
8417 break;
8418 }
8419
8420 case IF_RWR_RRD_ARD:
8421 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8422 emitDispAddrMode(id);
8423 break;
8424
8425 case IF_RWR_ARD_RRD:
8426 if (ins == INS_vpgatherqd || ins == INS_vgatherqps)
8427 {
8428 attr = EA_16BYTE;
8429 }
8430 sstr = codeGen->genSizeStr(EA_ATTR(4));
8431 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8432 emitDispAddrMode(id);
8433 printf(", %s", emitRegName(id->idReg2(), attr));
8434 break;
8435
8436 case IF_RWR_RRD_ARD_CNS:
8437 {
8438 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8439 emitDispAddrMode(id);
8440 emitGetInsAmdCns(id, &cnsVal);
8441
8442 val = cnsVal.cnsVal;
8443 printf(", ");
8444
8445 if (cnsVal.cnsReloc)
8446 {
8447 emitDispReloc(val);
8448 }
8449 else
8450 {
8451 goto PRINT_CONSTANT;
8452 }
8453
8454 break;
8455 }
8456
8457 case IF_RWR_RRD_ARD_RRD:
8458 {
8459 printf("%s, ", emitRegName(id->idReg1(), attr));
8460 printf("%s, ", emitRegName(id->idReg2(), attr));
8461 emitDispAddrMode(id);
8462
8463 emitGetInsAmdCns(id, &cnsVal);
8464 val = (cnsVal.cnsVal >> 4) + XMMBASE;
8465 printf(", %s", emitRegName((regNumber)val, attr));
8466 break;
8467 }
8468
8469 case IF_ARD_RRD:
8470 case IF_AWR_RRD:
8471 case IF_ARW_RRD:
8472
8473 printf("%s", sstr);
8474 emitDispAddrMode(id);
8475 printf(", %s", emitRegName(id->idReg1(), attr));
8476 break;
8477
8478 case IF_AWR_RRD_RRD:
8479 {
8480 printf("%s", sstr);
8481 emitDispAddrMode(id);
8482 printf(", %s", emitRegName(id->idReg1(), attr));
8483 printf(", %s", emitRegName(id->idReg2(), attr));
8484 break;
8485 }
8486
8487 case IF_ARD_CNS:
8488 case IF_AWR_CNS:
8489 case IF_ARW_CNS:
8490 case IF_ARW_SHF:
8491
8492 printf("%s", sstr);
8493 emitDispAddrMode(id);
8494 emitGetInsAmdCns(id, &cnsVal);
8495 val = cnsVal.cnsVal;
8496#ifdef _TARGET_AMD64_
8497 // no 8-byte immediates allowed here!
8498 assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8499#endif
8500 if (id->idInsFmt() == IF_ARW_SHF)
8501 {
8502 emitDispShift(ins, (BYTE)val);
8503 }
8504 else
8505 {
8506 printf(", ");
8507 if (cnsVal.cnsReloc)
8508 {
8509 emitDispReloc(val);
8510 }
8511 else
8512 {
8513 goto PRINT_CONSTANT;
8514 }
8515 }
8516 break;
8517
8518 case IF_SRD:
8519 case IF_SWR:
8520 case IF_SRW:
8521
8522 printf("%s", sstr);
8523
8524#if !FEATURE_FIXED_OUT_ARGS
8525 if (ins == INS_pop)
8526 emitCurStackLvl -= sizeof(int);
8527#endif
8528
8529 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8530 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8531
8532#if !FEATURE_FIXED_OUT_ARGS
8533 if (ins == INS_pop)
8534 emitCurStackLvl += sizeof(int);
8535#endif
8536
8537 emitDispShift(ins);
8538 break;
8539
8540 case IF_SRD_RRD:
8541 case IF_SWR_RRD:
8542 case IF_SRW_RRD:
8543
8544 printf("%s", sstr);
8545
8546 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8547 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8548
8549 printf(", %s", emitRegName(id->idReg1(), attr));
8550 break;
8551
8552 case IF_SRD_CNS:
8553 case IF_SWR_CNS:
8554 case IF_SRW_CNS:
8555 case IF_SRW_SHF:
8556
8557 printf("%s", sstr);
8558
8559 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8560 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8561
8562 emitGetInsCns(id, &cnsVal);
8563 val = cnsVal.cnsVal;
8564#ifdef _TARGET_AMD64_
8565 // no 8-byte immediates allowed here!
8566 assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8567#endif
8568 if (id->idInsFmt() == IF_SRW_SHF)
8569 {
8570 emitDispShift(ins, (BYTE)val);
8571 }
8572 else
8573 {
8574 printf(", ");
8575 if (cnsVal.cnsReloc)
8576 {
8577 emitDispReloc(val);
8578 }
8579 else
8580 {
8581 goto PRINT_CONSTANT;
8582 }
8583 }
8584 break;
8585
8586 case IF_RRD_SRD:
8587 case IF_RWR_SRD:
8588 case IF_RRW_SRD:
8589#ifdef _TARGET_AMD64_
8590 if (ins == INS_movsxd)
8591 {
8592 printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
8593 }
8594 else
8595#endif
8596 if (ins == INS_movsx || ins == INS_movzx)
8597 {
8598 printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
8599 }
8600 else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8601 {
8602 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8603 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8604 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8605 }
8606 else
8607 {
8608 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8609 }
8610
8611 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8612 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8613
8614 break;
8615
8616 case IF_RRW_SRD_CNS:
8617 case IF_RWR_SRD_CNS:
8618 {
8619 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8620 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8621 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8622 emitGetInsCns(id, &cnsVal);
8623
8624 val = cnsVal.cnsVal;
8625 printf(", ");
8626
8627 if (cnsVal.cnsReloc)
8628 {
8629 emitDispReloc(val);
8630 }
8631 else
8632 {
8633 goto PRINT_CONSTANT;
8634 }
8635 break;
8636 }
8637
8638 case IF_RWR_RRD_SRD:
8639 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8640 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8641 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8642 break;
8643
8644 case IF_RWR_RRD_SRD_CNS:
8645 {
8646 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8647 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8648 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8649 emitGetInsCns(id, &cnsVal);
8650
8651 val = cnsVal.cnsVal;
8652 printf(", ");
8653
8654 if (cnsVal.cnsReloc)
8655 {
8656 emitDispReloc(val);
8657 }
8658 else
8659 {
8660 goto PRINT_CONSTANT;
8661 }
8662 break;
8663 }
8664
8665 case IF_RWR_RRD_SRD_RRD:
8666 {
8667 printf("%s, ", emitRegName(id->idReg1(), attr));
8668 printf("%s, ", emitRegName(id->idReg2(), attr));
8669 emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8670 id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8671
8672 emitGetInsCns(id, &cnsVal);
8673 val = (cnsVal.cnsVal >> 4) + XMMBASE;
8674 printf(", %s", emitRegName((regNumber)val, attr));
8675 break;
8676 }
8677
8678 case IF_RRD_RRD:
8679 case IF_RWR_RRD:
8680 case IF_RRW_RRD:
8681 if (ins == INS_mov_i2xmm)
8682 {
8683 printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8684 }
8685 else if (ins == INS_mov_xmm2i)
8686 {
8687 printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), EA_16BYTE));
8688 }
8689 else if (ins == INS_pmovmskb)
8690 {
8691 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8692 }
8693 else if ((ins == INS_cvtsi2ss) || (ins == INS_cvtsi2sd))
8694 {
8695 printf(" %s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8696 }
8697 else if ((ins == INS_cvttsd2si) || (ins == INS_cvtss2si) || (ins == INS_cvtsd2si) || (ins == INS_cvttss2si))
8698 {
8699 printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
8700 }
8701#ifdef _TARGET_AMD64_
8702 else if (ins == INS_movsxd)
8703 {
8704 printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), emitRegName(id->idReg2(), EA_4BYTE));
8705 }
8706#endif // _TARGET_AMD64_
8707 else if (ins == INS_movsx || ins == INS_movzx)
8708 {
8709 printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr));
8710 }
8711 else if (ins == INS_bt)
8712 {
8713 // INS_bt operands are reversed. Display them in the normal order.
8714 printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), attr));
8715 }
8716#ifdef FEATURE_HW_INTRINSICS
8717 else if (ins == INS_crc32 && attr != EA_8BYTE)
8718 {
8719 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8720 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8721 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8722 }
8723#endif // FEATURE_HW_INTRINSICS
8724 else
8725 {
8726 printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr));
8727 }
8728 break;
8729
8730 case IF_RRW_RRW:
8731 assert(ins == INS_xchg);
8732 printf("%s,", emitRegName(id->idReg1(), attr));
8733 printf(" %s", emitRegName(id->idReg2(), attr));
8734 break;
8735
8736 case IF_RWR_RRD_RRD:
8737 {
8738 assert(IsAVXInstruction(ins));
8739 assert(IsThreeOperandAVXInstruction(ins));
8740 regNumber reg2 = id->idReg2();
8741 regNumber reg3 = id->idReg3();
8742 if (ins == INS_bextr || ins == INS_bzhi)
8743 {
8744 // BMI bextr and bzhi encodes the reg2 in VEX.vvvv and reg3 in modRM,
8745 // which is different from most of other instructions
8746 regNumber tmp = reg2;
8747 reg2 = reg3;
8748 reg3 = tmp;
8749 }
8750 printf("%s, ", emitRegName(id->idReg1(), attr));
8751 printf("%s, ", emitRegName(reg2, attr));
8752 printf("%s", emitRegName(reg3, attr));
8753 break;
8754 }
8755
8756 case IF_RWR_RRD_RRD_CNS:
8757 assert(IsAVXInstruction(ins));
8758 assert(IsThreeOperandAVXInstruction(ins));
8759 printf("%s, ", emitRegName(id->idReg1(), attr));
8760 printf("%s, ", emitRegName(id->idReg2(), attr));
8761 printf("%s, ", emitRegName(id->idReg3(), attr));
8762 val = emitGetInsSC(id);
8763 goto PRINT_CONSTANT;
8764 break;
8765 case IF_RWR_RRD_RRD_RRD:
8766 assert(IsAVXOnlyInstruction(ins));
8767 assert(UseVEXEncoding());
8768 printf("%s, ", emitRegName(id->idReg1(), attr));
8769 printf("%s, ", emitRegName(id->idReg2(), attr));
8770 printf("%s, ", emitRegName(id->idReg3(), attr));
8771 printf("%s", emitRegName(id->idReg4(), attr));
8772 break;
8773 case IF_RRW_RRW_CNS:
8774 printf("%s,", emitRegName(id->idReg1(), attr));
8775 printf(" %s", emitRegName(id->idReg2(), attr));
8776 val = emitGetInsSC(id);
8777#ifdef _TARGET_AMD64_
8778 // no 8-byte immediates allowed here!
8779 assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8780#endif
8781 printf(", ");
8782 if (id->idIsCnsReloc())
8783 {
8784 emitDispReloc(val);
8785 }
8786 else
8787 {
8788 goto PRINT_CONSTANT;
8789 }
8790 break;
8791
8792 case IF_RRD:
8793 case IF_RWR:
8794 case IF_RRW:
8795 printf("%s", emitRegName(id->idReg1(), attr));
8796 emitDispShift(ins);
8797 break;
8798
8799 case IF_RRW_SHF:
8800 printf("%s", emitRegName(id->idReg1(), attr));
8801 emitDispShift(ins, (BYTE)emitGetInsSC(id));
8802 break;
8803
8804 case IF_RRD_MRD:
8805 case IF_RWR_MRD:
8806 case IF_RRW_MRD:
8807
8808 if (ins == INS_movsx || ins == INS_movzx)
8809 {
8810 attr = EA_PTRSIZE;
8811 }
8812#ifdef _TARGET_AMD64_
8813 else if (ins == INS_movsxd)
8814 {
8815 attr = EA_PTRSIZE;
8816 }
8817#endif
8818 else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8819 {
8820 // The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8821 // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8822 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8823 }
8824 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8825 offs = emitGetInsDsp(id);
8826 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8827 break;
8828
8829 case IF_RRW_MRD_CNS:
8830 case IF_RWR_MRD_CNS:
8831 {
8832 printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8833 offs = emitGetInsDsp(id);
8834 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8835 emitGetInsDcmCns(id, &cnsVal);
8836
8837 val = cnsVal.cnsVal;
8838 printf(", ");
8839
8840 if (cnsVal.cnsReloc)
8841 {
8842 emitDispReloc(val);
8843 }
8844 else
8845 {
8846 goto PRINT_CONSTANT;
8847 }
8848 break;
8849 }
8850
8851 case IF_MWR_RRD_CNS:
8852 {
8853 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
8854 // vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
8855 sstr = codeGen->genSizeStr(EA_ATTR(16));
8856 printf(sstr);
8857 offs = emitGetInsDsp(id);
8858 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8859 printf(", %s", emitRegName(id->idReg1(), attr));
8860 emitGetInsDcmCns(id, &cnsVal);
8861
8862 val = cnsVal.cnsVal;
8863 printf(", ");
8864
8865 if (cnsVal.cnsReloc)
8866 {
8867 emitDispReloc(val);
8868 }
8869 else
8870 {
8871 goto PRINT_CONSTANT;
8872 }
8873
8874 break;
8875 }
8876
8877 case IF_RWR_RRD_MRD:
8878 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8879 offs = emitGetInsDsp(id);
8880 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8881 break;
8882
8883 case IF_RWR_RRD_MRD_CNS:
8884 {
8885 printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8886 offs = emitGetInsDsp(id);
8887 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8888 emitGetInsDcmCns(id, &cnsVal);
8889
8890 val = cnsVal.cnsVal;
8891 printf(", ");
8892
8893 if (cnsVal.cnsReloc)
8894 {
8895 emitDispReloc(val);
8896 }
8897 else
8898 {
8899 goto PRINT_CONSTANT;
8900 }
8901 break;
8902 }
8903
8904 case IF_RWR_RRD_MRD_RRD:
8905 {
8906 printf("%s, ", emitRegName(id->idReg1(), attr));
8907 printf("%s, ", emitRegName(id->idReg2(), attr));
8908
8909 offs = emitGetInsDsp(id);
8910 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8911
8912 emitGetInsDcmCns(id, &cnsVal);
8913 val = (cnsVal.cnsVal >> 4) + XMMBASE;
8914 printf(", %s", emitRegName((regNumber)val, attr));
8915 break;
8916 }
8917
8918 case IF_RWR_MRD_OFF:
8919
8920 printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
8921 offs = emitGetInsDsp(id);
8922 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8923 break;
8924
8925 case IF_MRD_RRD:
8926 case IF_MWR_RRD:
8927 case IF_MRW_RRD:
8928
8929 printf("%s", sstr);
8930 offs = emitGetInsDsp(id);
8931 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8932 printf(", %s", emitRegName(id->idReg1(), attr));
8933 break;
8934
8935 case IF_MRD_CNS:
8936 case IF_MWR_CNS:
8937 case IF_MRW_CNS:
8938 case IF_MRW_SHF:
8939
8940 printf("%s", sstr);
8941 offs = emitGetInsDsp(id);
8942 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8943 emitGetInsDcmCns(id, &cnsVal);
8944 val = cnsVal.cnsVal;
8945#ifdef _TARGET_AMD64_
8946 // no 8-byte immediates allowed here!
8947 assert((val >= (ssize_t)0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
8948#endif
8949 if (cnsVal.cnsReloc)
8950 {
8951 emitDispReloc(val);
8952 }
8953 else if (id->idInsFmt() == IF_MRW_SHF)
8954 {
8955 emitDispShift(ins, (BYTE)val);
8956 }
8957 else
8958 {
8959 printf(", ");
8960 goto PRINT_CONSTANT;
8961 }
8962 break;
8963
8964 case IF_MRD:
8965 case IF_MWR:
8966 case IF_MRW:
8967
8968 printf("%s", sstr);
8969 offs = emitGetInsDsp(id);
8970 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8971 emitDispShift(ins);
8972 break;
8973
8974 case IF_MRD_OFF:
8975
8976 printf("offset ");
8977 offs = emitGetInsDsp(id);
8978 emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8979 break;
8980
8981 case IF_RRD_CNS:
8982 case IF_RWR_CNS:
8983 case IF_RRW_CNS:
8984 printf("%s, ", emitRegName(id->idReg1(), attr));
8985 val = emitGetInsSC(id);
8986 if (id->idIsCnsReloc())
8987 {
8988 emitDispReloc(val);
8989 }
8990 else
8991 {
8992 goto PRINT_CONSTANT;
8993 }
8994 break;
8995
8996 case IF_LABEL:
8997 case IF_RWR_LABEL:
8998 case IF_SWR_LABEL:
8999
9000 if (ins == INS_lea)
9001 {
9002 printf("%s, ", emitRegName(id->idReg1(), attr));
9003 }
9004 else if (ins == INS_mov)
9005 {
9006 /* mov dword ptr [frame.callSiteReturnAddress], label */
9007 assert(id->idInsFmt() == IF_SWR_LABEL);
9008 instrDescLbl* idlbl = (instrDescLbl*)id;
9009
9010 emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), 0, asmfm);
9011
9012 printf(", ");
9013 }
9014
9015 if (((instrDescJmp*)id)->idjShort)
9016 {
9017 printf("SHORT ");
9018 }
9019
9020 if (id->idIsBound())
9021 {
9022 printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
9023 }
9024 else
9025 {
9026 printf("L_M%03u_" FMT_BB, Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
9027 }
9028 break;
9029
9030 case IF_METHOD:
9031 case IF_METHPTR:
9032 if (id->idIsCallAddr())
9033 {
9034 offs = (ssize_t)id->idAddr()->iiaAddr;
9035 methodName = "";
9036 }
9037 else
9038 {
9039 offs = 0;
9040 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
9041 }
9042
9043 if (id->idInsFmt() == IF_METHPTR)
9044 {
9045 printf("[");
9046 }
9047
9048 if (offs)
9049 {
9050 if (id->idIsDspReloc())
9051 {
9052 printf("reloc ");
9053 }
9054 printf("%08X", offs);
9055 }
9056 else
9057 {
9058 printf("%s", methodName);
9059 }
9060
9061 if (id->idInsFmt() == IF_METHPTR)
9062 {
9063 printf("]");
9064 }
9065
9066 break;
9067
9068 case IF_NONE:
9069 break;
9070
9071 default:
9072 printf("unexpected format %s", emitIfName(id->idInsFmt()));
9073 assert(!"unexpectedFormat");
9074 break;
9075 }
9076
9077 if (sz != 0 && sz != id->idCodeSize() && (!asmfm || emitComp->verbose))
9078 {
9079 // Code size in the instrDesc is different from the actual code size we've been given!
9080 printf(" (ECS:%d, ACS:%d)", id->idCodeSize(), sz);
9081 }
9082
9083 printf("\n");
9084}
9085
9086/*****************************************************************************/
9087#endif
9088
9089/*****************************************************************************
9090 *
9091 * Output nBytes bytes of NOP instructions
9092 */
9093
9094static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes)
9095{
9096 assert(nBytes <= 15);
9097
9098#ifndef _TARGET_AMD64_
9099 // TODO-X86-CQ: when VIA C3 CPU's are out of circulation, switch to the
9100 // more efficient real NOP: 0x0F 0x1F +modR/M
9101 // Also can't use AMD recommended, multiple size prefixes (i.e. 0x66 0x66 0x90 for 3 byte NOP)
9102 // because debugger and msdis don't like it, so maybe VIA doesn't either
9103 // So instead just stick to repeating single byte nops
9104
9105 switch (nBytes)
9106 {
9107 case 15:
9108 *dst++ = 0x90;
9109 __fallthrough;
9110 case 14:
9111 *dst++ = 0x90;
9112 __fallthrough;
9113 case 13:
9114 *dst++ = 0x90;
9115 __fallthrough;
9116 case 12:
9117 *dst++ = 0x90;
9118 __fallthrough;
9119 case 11:
9120 *dst++ = 0x90;
9121 __fallthrough;
9122 case 10:
9123 *dst++ = 0x90;
9124 __fallthrough;
9125 case 9:
9126 *dst++ = 0x90;
9127 __fallthrough;
9128 case 8:
9129 *dst++ = 0x90;
9130 __fallthrough;
9131 case 7:
9132 *dst++ = 0x90;
9133 __fallthrough;
9134 case 6:
9135 *dst++ = 0x90;
9136 __fallthrough;
9137 case 5:
9138 *dst++ = 0x90;
9139 __fallthrough;
9140 case 4:
9141 *dst++ = 0x90;
9142 __fallthrough;
9143 case 3:
9144 *dst++ = 0x90;
9145 __fallthrough;
9146 case 2:
9147 *dst++ = 0x90;
9148 __fallthrough;
9149 case 1:
9150 *dst++ = 0x90;
9151 break;
9152 case 0:
9153 break;
9154 }
9155#else // _TARGET_AMD64_
9156 switch (nBytes)
9157 {
9158 case 2:
9159 *dst++ = 0x66;
9160 __fallthrough;
9161 case 1:
9162 *dst++ = 0x90;
9163 break;
9164 case 0:
9165 break;
9166 case 3:
9167 *dst++ = 0x0F;
9168 *dst++ = 0x1F;
9169 *dst++ = 0x00;
9170 break;
9171 case 4:
9172 *dst++ = 0x0F;
9173 *dst++ = 0x1F;
9174 *dst++ = 0x40;
9175 *dst++ = 0x00;
9176 break;
9177 case 6:
9178 *dst++ = 0x66;
9179 __fallthrough;
9180 case 5:
9181 *dst++ = 0x0F;
9182 *dst++ = 0x1F;
9183 *dst++ = 0x44;
9184 *dst++ = 0x00;
9185 *dst++ = 0x00;
9186 break;
9187 case 7:
9188 *dst++ = 0x0F;
9189 *dst++ = 0x1F;
9190 *dst++ = 0x80;
9191 *dst++ = 0x00;
9192 *dst++ = 0x00;
9193 *dst++ = 0x00;
9194 *dst++ = 0x00;
9195 break;
9196 case 15:
9197 // More than 3 prefixes is slower than just 2 NOPs
9198 dst = emitOutputNOP(emitOutputNOP(dst, 7), 8);
9199 break;
9200 case 14:
9201 // More than 3 prefixes is slower than just 2 NOPs
9202 dst = emitOutputNOP(emitOutputNOP(dst, 7), 7);
9203 break;
9204 case 13:
9205 // More than 3 prefixes is slower than just 2 NOPs
9206 dst = emitOutputNOP(emitOutputNOP(dst, 5), 8);
9207 break;
9208 case 12:
9209 // More than 3 prefixes is slower than just 2 NOPs
9210 dst = emitOutputNOP(emitOutputNOP(dst, 4), 8);
9211 break;
9212 case 11:
9213 *dst++ = 0x66;
9214 __fallthrough;
9215 case 10:
9216 *dst++ = 0x66;
9217 __fallthrough;
9218 case 9:
9219 *dst++ = 0x66;
9220 __fallthrough;
9221 case 8:
9222 *dst++ = 0x0F;
9223 *dst++ = 0x1F;
9224 *dst++ = 0x84;
9225 *dst++ = 0x00;
9226 *dst++ = 0x00;
9227 *dst++ = 0x00;
9228 *dst++ = 0x00;
9229 *dst++ = 0x00;
9230 break;
9231 }
9232#endif // _TARGET_AMD64_
9233
9234 return dst;
9235}
9236
9237/*****************************************************************************
9238 *
9239 * Output an instruction involving an address mode.
9240 */
9241
9242BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
9243{
9244 regNumber reg;
9245 regNumber rgx;
9246 ssize_t dsp;
9247 bool dspInByte;
9248 bool dspIsZero;
9249
9250 instruction ins = id->idIns();
9251 emitAttr size = id->idOpSize();
9252 size_t opsz = EA_SIZE_IN_BYTES(size);
9253
9254 // Get the base/index registers
9255 reg = id->idAddr()->iiaAddrMode.amBaseReg;
9256 rgx = id->idAddr()->iiaAddrMode.amIndxReg;
9257
9258 // For INS_call the instruction size is actually the return value size
9259 if (ins == INS_call)
9260 {
9261 // Special case: call via a register
9262 if (id->idIsCallRegPtr())
9263 {
9264 code_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call));
9265
9266 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode);
9267 dst += emitOutputWord(dst, opcode);
9268 goto DONE;
9269 }
9270
9271 // The displacement field is in an unusual place for calls
9272 dsp = emitGetInsCIdisp(id);
9273
9274#ifdef _TARGET_AMD64_
9275
9276 // Compute the REX prefix if it exists
9277 if (IsExtendedReg(reg, EA_PTRSIZE))
9278 {
9279 insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
9280 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9281 reg = (regNumber)RegEncoding(reg);
9282 }
9283
9284 if (IsExtendedReg(rgx, EA_PTRSIZE))
9285 {
9286 insEncodeRegSIB(ins, rgx, &code);
9287 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9288 rgx = (regNumber)RegEncoding(rgx);
9289 }
9290
9291 // And emit the REX prefix
9292 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9293
9294#endif // _TARGET_AMD64_
9295
9296 goto GOT_DSP;
9297 }
9298
9299 // Is there a large constant operand?
9300 if (addc && (size > EA_1BYTE))
9301 {
9302 ssize_t cval = addc->cnsVal;
9303
9304 // Does the constant fit in a byte?
9305 if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
9306 {
9307 if (id->idInsFmt() != IF_ARW_SHF)
9308 {
9309 code |= 2;
9310 }
9311
9312 opsz = 1;
9313 }
9314 }
9315
9316 // Emit VEX prefix if required
9317 // There are some callers who already add VEX prefix and call this routine.
9318 // Therefore, add VEX prefix is one is not already present.
9319 code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
9320
9321 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
9322 if (TakesVexPrefix(ins))
9323 {
9324 if (IsDstDstSrcAVXInstruction(ins))
9325 {
9326 regNumber src1 = REG_NA;
9327
9328 switch (id->idInsFmt())
9329 {
9330 case IF_RWR_RRD_ARD:
9331 case IF_RWR_ARD_RRD:
9332 case IF_RWR_RRD_ARD_CNS:
9333 case IF_RWR_RRD_ARD_RRD:
9334 {
9335 src1 = id->idReg2();
9336 break;
9337 }
9338
9339 default:
9340 {
9341 src1 = id->idReg1();
9342 break;
9343 }
9344 }
9345
9346 // encode source operand reg in 'vvvv' bits in 1's complement form
9347 code = insEncodeReg3456(ins, src1, size, code);
9348 }
9349 else if (IsDstSrcSrcAVXInstruction(ins))
9350 {
9351 code = insEncodeReg3456(ins, id->idReg2(), size, code);
9352 }
9353 }
9354
9355 // Emit the REX prefix if required
9356 if (TakesRexWPrefix(ins, size))
9357 {
9358 code = AddRexWPrefix(ins, code);
9359 }
9360
9361 if (IsExtendedReg(reg, EA_PTRSIZE))
9362 {
9363 insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
9364 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9365 reg = (regNumber)RegEncoding(reg);
9366 }
9367
9368 if (IsExtendedReg(rgx, EA_PTRSIZE))
9369 {
9370 insEncodeRegSIB(ins, rgx, &code);
9371 // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9372 rgx = (regNumber)RegEncoding(rgx);
9373 }
9374
9375 // Special case emitting AVX instructions
9376 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9377 {
9378 if ((ins == INS_crc32) && (size > EA_1BYTE))
9379 {
9380 code |= 0x0100;
9381
9382 if (size == EA_2BYTE)
9383 {
9384 dst += emitOutputByte(dst, 0x66);
9385 }
9386 }
9387
9388 regNumber reg345 = REG_NA;
9389 if (IsBMIInstruction(ins))
9390 {
9391 reg345 = getBmiRegNumber(ins);
9392 }
9393 if (reg345 == REG_NA)
9394 {
9395 switch (id->idInsFmt())
9396 {
9397 case IF_AWR_RRD_RRD:
9398 {
9399 reg345 = id->idReg2();
9400 break;
9401 }
9402
9403 default:
9404 {
9405 reg345 = id->idReg1();
9406 break;
9407 }
9408 }
9409 }
9410 unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
9411
9412 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9413
9414 if (UseVEXEncoding() && (ins != INS_crc32))
9415 {
9416 // Emit last opcode byte
9417 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
9418 assert((code & 0xFF) == 0);
9419 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
9420 }
9421 else
9422 {
9423 dst += emitOutputWord(dst, code >> 16);
9424 dst += emitOutputWord(dst, code & 0xFFFF);
9425 }
9426
9427 code = regcode;
9428 }
9429 // Is this a 'big' opcode?
9430 else if (code & 0xFF000000)
9431 {
9432 // Output the REX prefix
9433 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9434
9435 // Output the highest word of the opcode
9436 // We need to check again as in case of AVX instructions leading opcode bytes are stripped off
9437 // and encoded as part of VEX prefix.
9438 if (code & 0xFF000000)
9439 {
9440 dst += emitOutputWord(dst, code >> 16);
9441 code &= 0x0000FFFF;
9442 }
9443 }
9444 else if (code & 0x00FF0000)
9445 {
9446 // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
9447 assert(ins != INS_bt);
9448
9449 // Output the REX prefix
9450 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9451
9452 // Output the highest byte of the opcode
9453 if (code & 0x00FF0000)
9454 {
9455 dst += emitOutputByte(dst, code >> 16);
9456 code &= 0x0000FFFF;
9457 }
9458
9459 // Use the large version if this is not a byte. This trick will not
9460 // work in case of SSE2 and AVX instructions.
9461 if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSEInstruction(ins) && !IsAVXInstruction(ins))
9462 {
9463 code++;
9464 }
9465 }
9466 else if (CodeGen::instIsFP(ins))
9467 {
9468 assert(size == EA_4BYTE || size == EA_8BYTE);
9469 if (size == EA_8BYTE)
9470 {
9471 code += 4;
9472 }
9473 }
9474 else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins))
9475 {
9476 /* Is the operand size larger than a byte? */
9477
9478 switch (size)
9479 {
9480 case EA_1BYTE:
9481 break;
9482
9483 case EA_2BYTE:
9484
9485 /* Output a size prefix for a 16-bit operand */
9486
9487 dst += emitOutputByte(dst, 0x66);
9488
9489 __fallthrough;
9490
9491 case EA_4BYTE:
9492#ifdef _TARGET_AMD64_
9493 case EA_8BYTE:
9494#endif
9495
9496 /* Set the 'w' bit to get the large version */
9497
9498 code |= 0x1;
9499 break;
9500
9501#ifdef _TARGET_X86_
9502 case EA_8BYTE:
9503
9504 /* Double operand - set the appropriate bit */
9505
9506 code |= 0x04;
9507 break;
9508
9509#endif // _TARGET_X86_
9510
9511 default:
9512 NO_WAY("unexpected size");
9513 break;
9514 }
9515 }
9516
9517 // Output the REX prefix
9518 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9519
9520 // Get the displacement value
9521 dsp = emitGetInsAmdAny(id);
9522
9523GOT_DSP:
9524
9525 dspInByte = ((signed char)dsp == (ssize_t)dsp);
9526 dspIsZero = (dsp == 0);
9527
9528 if (id->idIsDspReloc())
9529 {
9530 dspInByte = false; // relocs can't be placed in a byte
9531 }
9532
9533 // Is there a [scaled] index component?
9534 if (rgx == REG_NA)
9535 {
9536 // The address is of the form "[reg+disp]"
9537 switch (reg)
9538 {
9539 case REG_NA:
9540 {
9541 if (id->idIsDspReloc())
9542 {
9543 INT32 addlDelta = 0;
9544
9545 // The address is of the form "[disp]"
9546 // On x86 - disp is relative to zero
9547 // On Amd64 - disp is relative to RIP
9548 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9549 {
9550 dst += emitOutputByte(dst, code | 0x05);
9551 }
9552 else
9553 {
9554 dst += emitOutputWord(dst, code | 0x0500);
9555 }
9556
9557 if (addc)
9558 {
9559 // It is of the form "ins [disp], immed"
9560 // For emitting relocation, we also need to take into account of the
9561 // additional bytes of code emitted for immed val.
9562
9563 ssize_t cval = addc->cnsVal;
9564
9565#ifdef _TARGET_AMD64_
9566 // all these opcodes only take a sign-extended 4-byte immediate
9567 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
9568#else //_TARGET_X86_
9569 noway_assert(opsz <= 4);
9570#endif //_TARGET_X86_
9571
9572 switch (opsz)
9573 {
9574 case 0:
9575 case 4:
9576 case 8:
9577 addlDelta = -4;
9578 break;
9579 case 2:
9580 addlDelta = -2;
9581 break;
9582 case 1:
9583 addlDelta = -1;
9584 break;
9585
9586 default:
9587 assert(!"unexpected operand size");
9588 unreached();
9589 }
9590 }
9591
9592#ifdef _TARGET_AMD64_
9593 // We emit zero on Amd64, to avoid the assert in emitOutputLong()
9594 dst += emitOutputLong(dst, 0);
9595#else
9596 dst += emitOutputLong(dst, dsp);
9597#endif
9598 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32, 0,
9599 addlDelta);
9600 }
9601 else
9602 {
9603#ifdef _TARGET_X86_
9604 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9605 {
9606 dst += emitOutputByte(dst, code | 0x05);
9607 }
9608 else
9609 {
9610 dst += emitOutputWord(dst, code | 0x0500);
9611 }
9612#else //_TARGET_AMD64_
9613 // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
9614 // This addr mode should never be used while generating relocatable ngen code nor if
9615 // the addr can be encoded as pc-relative address.
9616 noway_assert(!emitComp->opts.compReloc);
9617 noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32);
9618 noway_assert((int)dsp == dsp);
9619
9620 // This requires, specifying a SIB byte after ModRM byte.
9621 if (EncodedBySSE38orSSE3A(ins))
9622 {
9623 dst += emitOutputByte(dst, code | 0x04);
9624 }
9625 else
9626 {
9627 dst += emitOutputWord(dst, code | 0x0400);
9628 }
9629 dst += emitOutputByte(dst, 0x25);
9630#endif //_TARGET_AMD64_
9631 dst += emitOutputLong(dst, dsp);
9632 }
9633 break;
9634 }
9635
9636 case REG_EBP:
9637 {
9638 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9639 {
9640 // Does the offset fit in a byte?
9641 if (dspInByte)
9642 {
9643 dst += emitOutputByte(dst, code | 0x45);
9644 dst += emitOutputByte(dst, dsp);
9645 }
9646 else
9647 {
9648 dst += emitOutputByte(dst, code | 0x85);
9649 dst += emitOutputLong(dst, dsp);
9650
9651 if (id->idIsDspReloc())
9652 {
9653 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9654 }
9655 }
9656 }
9657 else
9658 {
9659 // Does the offset fit in a byte?
9660 if (dspInByte)
9661 {
9662 dst += emitOutputWord(dst, code | 0x4500);
9663 dst += emitOutputByte(dst, dsp);
9664 }
9665 else
9666 {
9667 dst += emitOutputWord(dst, code | 0x8500);
9668 dst += emitOutputLong(dst, dsp);
9669
9670 if (id->idIsDspReloc())
9671 {
9672 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9673 }
9674 }
9675 }
9676 break;
9677 }
9678
9679 case REG_ESP:
9680 {
9681 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9682 {
9683 // Is the offset 0 or does it at least fit in a byte?
9684 if (dspIsZero)
9685 {
9686 dst += emitOutputByte(dst, code | 0x04);
9687 dst += emitOutputByte(dst, 0x24);
9688 }
9689 else if (dspInByte)
9690 {
9691 dst += emitOutputByte(dst, code | 0x44);
9692 dst += emitOutputByte(dst, 0x24);
9693 dst += emitOutputByte(dst, dsp);
9694 }
9695 else
9696 {
9697 dst += emitOutputByte(dst, code | 0x84);
9698 dst += emitOutputByte(dst, 0x24);
9699 dst += emitOutputLong(dst, dsp);
9700 if (id->idIsDspReloc())
9701 {
9702 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9703 }
9704 }
9705 }
9706 else
9707 {
9708 // Is the offset 0 or does it at least fit in a byte?
9709 if (dspIsZero)
9710 {
9711 dst += emitOutputWord(dst, code | 0x0400);
9712 dst += emitOutputByte(dst, 0x24);
9713 }
9714 else if (dspInByte)
9715 {
9716 dst += emitOutputWord(dst, code | 0x4400);
9717 dst += emitOutputByte(dst, 0x24);
9718 dst += emitOutputByte(dst, dsp);
9719 }
9720 else
9721 {
9722 dst += emitOutputWord(dst, code | 0x8400);
9723 dst += emitOutputByte(dst, 0x24);
9724 dst += emitOutputLong(dst, dsp);
9725 if (id->idIsDspReloc())
9726 {
9727 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9728 }
9729 }
9730 }
9731 break;
9732 }
9733
9734 default:
9735 {
9736 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9737 {
9738 // Put the register in the opcode
9739 code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr);
9740
9741 // Is there a displacement?
9742 if (dspIsZero)
9743 {
9744 // This is simply "[reg]"
9745 dst += emitOutputByte(dst, code);
9746 }
9747 else
9748 {
9749 // This is [reg + dsp]" -- does the offset fit in a byte?
9750 if (dspInByte)
9751 {
9752 dst += emitOutputByte(dst, code | 0x40);
9753 dst += emitOutputByte(dst, dsp);
9754 }
9755 else
9756 {
9757 dst += emitOutputByte(dst, code | 0x80);
9758 dst += emitOutputLong(dst, dsp);
9759 if (id->idIsDspReloc())
9760 {
9761 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9762 }
9763 }
9764 }
9765 }
9766 else
9767 {
9768 // Put the register in the opcode
9769 code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) << 8;
9770
9771 // Is there a displacement?
9772 if (dspIsZero)
9773 {
9774 // This is simply "[reg]"
9775 dst += emitOutputWord(dst, code);
9776 }
9777 else
9778 {
9779 // This is [reg + dsp]" -- does the offset fit in a byte?
9780 if (dspInByte)
9781 {
9782 dst += emitOutputWord(dst, code | 0x4000);
9783 dst += emitOutputByte(dst, dsp);
9784 }
9785 else
9786 {
9787 dst += emitOutputWord(dst, code | 0x8000);
9788 dst += emitOutputLong(dst, dsp);
9789 if (id->idIsDspReloc())
9790 {
9791 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9792 }
9793 }
9794 }
9795 }
9796
9797 break;
9798 }
9799 }
9800 }
9801 else
9802 {
9803 unsigned regByte;
9804
9805 // We have a scaled index operand
9806 unsigned mul = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
9807
9808 // Is the index operand scaled?
9809 if (mul > 1)
9810 {
9811 // Is there a base register?
9812 if (reg != REG_NA)
9813 {
9814 // The address is "[reg + {2/4/8} * rgx + icon]"
9815 regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) |
9816 insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
9817
9818 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9819 {
9820 // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
9821 if (dspIsZero && reg != REG_EBP)
9822 {
9823 // The address is "[reg + {2/4/8} * rgx]"
9824 dst += emitOutputByte(dst, code | 0x04);
9825 dst += emitOutputByte(dst, regByte);
9826 }
9827 else
9828 {
9829 // The address is "[reg + {2/4/8} * rgx + disp]"
9830 if (dspInByte)
9831 {
9832 dst += emitOutputByte(dst, code | 0x44);
9833 dst += emitOutputByte(dst, regByte);
9834 dst += emitOutputByte(dst, dsp);
9835 }
9836 else
9837 {
9838 dst += emitOutputByte(dst, code | 0x84);
9839 dst += emitOutputByte(dst, regByte);
9840 dst += emitOutputLong(dst, dsp);
9841 if (id->idIsDspReloc())
9842 {
9843 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9844 }
9845 }
9846 }
9847 }
9848 else
9849 {
9850 // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
9851 if (dspIsZero && reg != REG_EBP)
9852 {
9853 // The address is "[reg + {2/4/8} * rgx]"
9854 dst += emitOutputWord(dst, code | 0x0400);
9855 dst += emitOutputByte(dst, regByte);
9856 }
9857 else
9858 {
9859 // The address is "[reg + {2/4/8} * rgx + disp]"
9860 if (dspInByte)
9861 {
9862 dst += emitOutputWord(dst, code | 0x4400);
9863 dst += emitOutputByte(dst, regByte);
9864 dst += emitOutputByte(dst, dsp);
9865 }
9866 else
9867 {
9868 dst += emitOutputWord(dst, code | 0x8400);
9869 dst += emitOutputByte(dst, regByte);
9870 dst += emitOutputLong(dst, dsp);
9871 if (id->idIsDspReloc())
9872 {
9873 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9874 }
9875 }
9876 }
9877 }
9878 }
9879 else
9880 {
9881 // The address is "[{2/4/8} * rgx + icon]"
9882 regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) |
9883 insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
9884
9885 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9886 {
9887 dst += emitOutputByte(dst, code | 0x04);
9888 }
9889 else
9890 {
9891 dst += emitOutputWord(dst, code | 0x0400);
9892 }
9893
9894 dst += emitOutputByte(dst, regByte);
9895
9896 // Special case: jump through a jump table
9897 if (ins == INS_i_jmp)
9898 {
9899 dsp += (size_t)emitConsBlock;
9900 }
9901
9902 dst += emitOutputLong(dst, dsp);
9903 if (id->idIsDspReloc())
9904 {
9905 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9906 }
9907 }
9908 }
9909 else
9910 {
9911 // The address is "[reg+rgx+dsp]"
9912 regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr);
9913
9914 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
9915 {
9916 if (dspIsZero && reg != REG_EBP)
9917 {
9918 // This is [reg+rgx]"
9919 dst += emitOutputByte(dst, code | 0x04);
9920 dst += emitOutputByte(dst, regByte);
9921 }
9922 else
9923 {
9924 // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9925 if (dspInByte)
9926 {
9927 dst += emitOutputByte(dst, code | 0x44);
9928 dst += emitOutputByte(dst, regByte);
9929 dst += emitOutputByte(dst, dsp);
9930 }
9931 else
9932 {
9933 dst += emitOutputByte(dst, code | 0x84);
9934 dst += emitOutputByte(dst, regByte);
9935 dst += emitOutputLong(dst, dsp);
9936 if (id->idIsDspReloc())
9937 {
9938 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9939 }
9940 }
9941 }
9942 }
9943 else
9944 {
9945 if (dspIsZero && reg != REG_EBP)
9946 {
9947 // This is [reg+rgx]"
9948 dst += emitOutputWord(dst, code | 0x0400);
9949 dst += emitOutputByte(dst, regByte);
9950 }
9951 else
9952 {
9953 // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9954 if (dspInByte)
9955 {
9956 dst += emitOutputWord(dst, code | 0x4400);
9957 dst += emitOutputByte(dst, regByte);
9958 dst += emitOutputByte(dst, dsp);
9959 }
9960 else
9961 {
9962 dst += emitOutputWord(dst, code | 0x8400);
9963 dst += emitOutputByte(dst, regByte);
9964 dst += emitOutputLong(dst, dsp);
9965 if (id->idIsDspReloc())
9966 {
9967 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
9968 }
9969 }
9970 }
9971 }
9972 }
9973 }
9974
9975 // Now generate the constant value, if present
9976 if (addc)
9977 {
9978 ssize_t cval = addc->cnsVal;
9979
9980#ifdef _TARGET_AMD64_
9981 // all these opcodes only take a sign-extended 4-byte immediate
9982 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
9983#endif
9984
9985 switch (opsz)
9986 {
9987 case 0:
9988 case 4:
9989 case 8:
9990 dst += emitOutputLong(dst, cval);
9991 break;
9992 case 2:
9993 dst += emitOutputWord(dst, cval);
9994 break;
9995 case 1:
9996 dst += emitOutputByte(dst, cval);
9997 break;
9998
9999 default:
10000 assert(!"unexpected operand size");
10001 }
10002
10003 if (addc->cnsReloc)
10004 {
10005 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10006 assert(opsz == 4);
10007 }
10008 }
10009
10010DONE:
10011
10012 // Does this instruction operate on a GC ref value?
10013 if (id->idGCref())
10014 {
10015 switch (id->idInsFmt())
10016 {
10017 case IF_ARD:
10018 case IF_AWR:
10019 case IF_ARW:
10020 break;
10021
10022 case IF_RRD_ARD:
10023 break;
10024
10025 case IF_RWR_ARD:
10026 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10027 break;
10028
10029 case IF_RRW_ARD:
10030 // Mark the destination register as holding a GCT_BYREF
10031 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
10032 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10033 break;
10034
10035 case IF_ARD_RRD:
10036 case IF_AWR_RRD:
10037 break;
10038
10039 case IF_AWR_RRD_RRD:
10040 break;
10041
10042 case IF_ARD_CNS:
10043 case IF_AWR_CNS:
10044 break;
10045
10046 case IF_ARW_RRD:
10047 case IF_ARW_CNS:
10048 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
10049 break;
10050
10051 default:
10052#ifdef DEBUG
10053 emitDispIns(id, false, false, false);
10054#endif
10055 assert(!"unexpected GC ref instruction format");
10056 }
10057
10058 // mul can never produce a GC ref
10059 assert(!instrIs3opImul(ins));
10060 assert(ins != INS_mulEAX && ins != INS_imulEAX);
10061 }
10062 else
10063 {
10064 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10065 {
10066 switch (id->idInsFmt())
10067 {
10068 case IF_RWR_ARD:
10069 case IF_RRW_ARD:
10070 case IF_RWR_RRD_ARD:
10071 emitGCregDeadUpd(id->idReg1(), dst);
10072 break;
10073 default:
10074 break;
10075 }
10076
10077 if (ins == INS_mulEAX || ins == INS_imulEAX)
10078 {
10079 emitGCregDeadUpd(REG_EAX, dst);
10080 emitGCregDeadUpd(REG_EDX, dst);
10081 }
10082
10083 // For the three operand imul instruction the target register
10084 // is encoded in the opcode
10085
10086 if (instrIs3opImul(ins))
10087 {
10088 regNumber tgtReg = inst3opImulReg(ins);
10089 emitGCregDeadUpd(tgtReg, dst);
10090 }
10091 }
10092 }
10093
10094 return dst;
10095}
10096
10097/*****************************************************************************
10098 *
10099 * Output an instruction involving a stack frame value.
10100 */
10101
10102BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
10103{
10104 int adr;
10105 int dsp;
10106 bool EBPbased;
10107 bool dspInByte;
10108 bool dspIsZero;
10109
10110 instruction ins = id->idIns();
10111 emitAttr size = id->idOpSize();
10112 size_t opsz = EA_SIZE_IN_BYTES(size);
10113
10114 assert(ins != INS_imul || id->idReg1() == REG_EAX || size == EA_4BYTE || size == EA_8BYTE);
10115
10116 // Is there a large constant operand?
10117 if (addc && (size > EA_1BYTE))
10118 {
10119 ssize_t cval = addc->cnsVal;
10120
10121 // Does the constant fit in a byte?
10122 if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
10123 {
10124 if ((id->idInsFmt() != IF_SRW_SHF) && (id->idInsFmt() != IF_RRW_SRD_CNS) &&
10125 (id->idInsFmt() != IF_RWR_RRD_SRD_CNS))
10126 {
10127 code |= 2;
10128 }
10129
10130 opsz = 1;
10131 }
10132 }
10133
10134 // Add VEX prefix if required.
10135 // There are some callers who already add VEX prefix and call this routine.
10136 // Therefore, add VEX prefix is one is not already present.
10137 code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
10138
10139 // Compute the REX prefix
10140 if (TakesRexWPrefix(ins, size))
10141 {
10142 code = AddRexWPrefix(ins, code);
10143 }
10144
10145 // Special case emitting AVX instructions
10146 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10147 {
10148 if ((ins == INS_crc32) && (size > EA_1BYTE))
10149 {
10150 code |= 0x0100;
10151
10152 if (size == EA_2BYTE)
10153 {
10154 dst += emitOutputByte(dst, 0x66);
10155 }
10156 }
10157
10158 regNumber reg345 = REG_NA;
10159 if (IsBMIInstruction(ins))
10160 {
10161 reg345 = getBmiRegNumber(ins);
10162 }
10163 if (reg345 == REG_NA)
10164 {
10165 reg345 = id->idReg1();
10166 }
10167 else
10168 {
10169 code = insEncodeReg3456(ins, id->idReg1(), size, code);
10170 }
10171 unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
10172
10173 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10174
10175 if (UseVEXEncoding() && (ins != INS_crc32))
10176 {
10177 // Emit last opcode byte
10178 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10179 assert((code & 0xFF) == 0);
10180 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
10181 }
10182 else
10183 {
10184 dst += emitOutputWord(dst, code >> 16);
10185 dst += emitOutputWord(dst, code & 0xFFFF);
10186 }
10187
10188 code = regcode;
10189 }
10190 // Is this a 'big' opcode?
10191 else if (code & 0xFF000000)
10192 {
10193 // Output the REX prefix
10194 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10195
10196 // Output the highest word of the opcode
10197 // We need to check again because in case of AVX instructions the leading
10198 // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
10199 if (code & 0xFF000000)
10200 {
10201 dst += emitOutputWord(dst, code >> 16);
10202 code &= 0x0000FFFF;
10203 }
10204 }
10205 else if (code & 0x00FF0000)
10206 {
10207 // BT supports 16 bit operands and this code doesn't add the necessary 66 prefix.
10208 assert(ins != INS_bt);
10209
10210 // Output the REX prefix
10211 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10212
10213 // Output the highest byte of the opcode.
10214 // We need to check again because in case of AVX instructions the leading
10215 // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
10216 if (code & 0x00FF0000)
10217 {
10218 dst += emitOutputByte(dst, code >> 16);
10219 code &= 0x0000FFFF;
10220 }
10221
10222 // Use the large version if this is not a byte
10223 if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSEInstruction(ins) &&
10224 !IsAVXInstruction(ins))
10225 {
10226 code |= 0x1;
10227 }
10228 }
10229 else if (CodeGen::instIsFP(ins))
10230 {
10231 assert(size == EA_4BYTE || size == EA_8BYTE);
10232
10233 if (size == EA_8BYTE)
10234 {
10235 code += 4;
10236 }
10237 }
10238 else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins))
10239 {
10240 // Is the operand size larger than a byte?
10241 switch (size)
10242 {
10243 case EA_1BYTE:
10244 break;
10245
10246 case EA_2BYTE:
10247 // Output a size prefix for a 16-bit operand
10248 dst += emitOutputByte(dst, 0x66);
10249 __fallthrough;
10250
10251 case EA_4BYTE:
10252#ifdef _TARGET_AMD64_
10253 case EA_8BYTE:
10254#endif // _TARGET_AMD64_
10255
10256 /* Set the 'w' size bit to indicate 32-bit operation
10257 * Note that incrementing "code" for INS_call (0xFF) would
10258 * overflow, whereas setting the lower bit to 1 just works out
10259 */
10260
10261 code |= 0x01;
10262 break;
10263
10264#ifdef _TARGET_X86_
10265 case EA_8BYTE:
10266
10267 // Double operand - set the appropriate bit.
10268 // I don't know what a legitimate reason to end up in this case would be
10269 // considering that FP is taken care of above...
10270 // what is an instruction that takes a double which is not covered by the
10271 // above instIsFP? Of the list in instrsxarch, only INS_fprem
10272 code |= 0x04;
10273 NO_WAY("bad 8 byte op");
10274 break;
10275#endif // _TARGET_X86_
10276
10277 default:
10278 NO_WAY("unexpected size");
10279 break;
10280 }
10281 }
10282
10283 // Output the REX prefix
10284 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10285
10286 // Figure out the variable's frame position
10287 int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
10288
10289 adr = emitComp->lvaFrameAddress(varNum, &EBPbased);
10290 dsp = adr + id->idAddr()->iiaLclVar.lvaOffset();
10291
10292 dspInByte = ((signed char)dsp == (int)dsp);
10293 dspIsZero = (dsp == 0);
10294
10295 // for stack varaibles the dsp should never be a reloc
10296 assert(id->idIsDspReloc() == 0);
10297
10298 if (EBPbased)
10299 {
10300 // EBP-based variable: does the offset fit in a byte?
10301 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10302 {
10303 if (dspInByte)
10304 {
10305 dst += emitOutputByte(dst, code | 0x45);
10306 dst += emitOutputByte(dst, dsp);
10307 }
10308 else
10309 {
10310 dst += emitOutputByte(dst, code | 0x85);
10311 dst += emitOutputLong(dst, dsp);
10312 }
10313 }
10314 else
10315 {
10316 if (dspInByte)
10317 {
10318 dst += emitOutputWord(dst, code | 0x4500);
10319 dst += emitOutputByte(dst, dsp);
10320 }
10321 else
10322 {
10323 dst += emitOutputWord(dst, code | 0x8500);
10324 dst += emitOutputLong(dst, dsp);
10325 }
10326 }
10327 }
10328 else
10329 {
10330
10331#if !FEATURE_FIXED_OUT_ARGS
10332 // Adjust the offset by the amount currently pushed on the CPU stack
10333 dsp += emitCurStackLvl;
10334#endif
10335
10336 dspInByte = ((signed char)dsp == (int)dsp);
10337 dspIsZero = (dsp == 0);
10338
10339 // Does the offset fit in a byte?
10340 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10341 {
10342 if (dspInByte)
10343 {
10344 if (dspIsZero)
10345 {
10346 dst += emitOutputByte(dst, code | 0x04);
10347 dst += emitOutputByte(dst, 0x24);
10348 }
10349 else
10350 {
10351 dst += emitOutputByte(dst, code | 0x44);
10352 dst += emitOutputByte(dst, 0x24);
10353 dst += emitOutputByte(dst, dsp);
10354 }
10355 }
10356 else
10357 {
10358 dst += emitOutputByte(dst, code | 0x84);
10359 dst += emitOutputByte(dst, 0x24);
10360 dst += emitOutputLong(dst, dsp);
10361 }
10362 }
10363 else
10364 {
10365 if (dspInByte)
10366 {
10367 if (dspIsZero)
10368 {
10369 dst += emitOutputWord(dst, code | 0x0400);
10370 dst += emitOutputByte(dst, 0x24);
10371 }
10372 else
10373 {
10374 dst += emitOutputWord(dst, code | 0x4400);
10375 dst += emitOutputByte(dst, 0x24);
10376 dst += emitOutputByte(dst, dsp);
10377 }
10378 }
10379 else
10380 {
10381 dst += emitOutputWord(dst, code | 0x8400);
10382 dst += emitOutputByte(dst, 0x24);
10383 dst += emitOutputLong(dst, dsp);
10384 }
10385 }
10386 }
10387
10388 // Now generate the constant value, if present
10389 if (addc)
10390 {
10391 ssize_t cval = addc->cnsVal;
10392
10393#ifdef _TARGET_AMD64_
10394 // all these opcodes only take a sign-extended 4-byte immediate
10395 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10396#endif
10397
10398 switch (opsz)
10399 {
10400 case 0:
10401 case 4:
10402 case 8:
10403 dst += emitOutputLong(dst, cval);
10404 break;
10405 case 2:
10406 dst += emitOutputWord(dst, cval);
10407 break;
10408 case 1:
10409 dst += emitOutputByte(dst, cval);
10410 break;
10411
10412 default:
10413 assert(!"unexpected operand size");
10414 }
10415
10416 if (addc->cnsReloc)
10417 {
10418 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10419 assert(opsz == 4);
10420 }
10421 }
10422
10423 // Does this instruction operate on a GC ref value?
10424 if (id->idGCref())
10425 {
10426 // Factor in the sub-variable offset
10427 adr += AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
10428
10429 switch (id->idInsFmt())
10430 {
10431 case IF_SRD:
10432 // Read stack -- no change
10433 break;
10434
10435 case IF_SWR: // Stack Write (So we need to update GC live for stack var)
10436 // Write stack -- GC var may be born
10437 emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
10438 break;
10439
10440 case IF_SRD_CNS:
10441 // Read stack -- no change
10442 break;
10443
10444 case IF_SWR_CNS:
10445 // Write stack -- no change
10446 break;
10447
10448 case IF_SRD_RRD:
10449 case IF_RRD_SRD:
10450 // Read stack , read register -- no change
10451 break;
10452
10453 case IF_RWR_SRD: // Register Write, Stack Read (So we need to update GC live for register)
10454
10455 // Read stack , write register -- GC reg may be born
10456 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10457 break;
10458
10459 case IF_SWR_RRD: // Stack Write, Register Read (So we need to update GC live for stack var)
10460 // Read register, write stack -- GC var may be born
10461 emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
10462 break;
10463
10464 case IF_RRW_SRD: // Register Read/Write, Stack Read (So we need to update GC live for register)
10465
10466 // reg could have been a GCREF as GCREF + int=BYREF
10467 // or BYREF+/-int=BYREF
10468 assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
10469 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10470 break;
10471
10472 case IF_SRW_CNS:
10473 case IF_SRW_RRD:
10474 // += -= of a byref, no change
10475
10476 case IF_SRW:
10477 break;
10478
10479 default:
10480#ifdef DEBUG
10481 emitDispIns(id, false, false, false);
10482#endif
10483 assert(!"unexpected GC ref instruction format");
10484 }
10485 }
10486 else
10487 {
10488 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10489 {
10490 switch (id->idInsFmt())
10491 {
10492 case IF_RWR_SRD: // Register Write, Stack Read
10493 case IF_RRW_SRD: // Register Read/Write, Stack Read
10494 case IF_RWR_RRD_SRD:
10495 emitGCregDeadUpd(id->idReg1(), dst);
10496 break;
10497 default:
10498 break;
10499 }
10500
10501 if (ins == INS_mulEAX || ins == INS_imulEAX)
10502 {
10503 emitGCregDeadUpd(REG_EAX, dst);
10504 emitGCregDeadUpd(REG_EDX, dst);
10505 }
10506
10507 // For the three operand imul instruction the target register
10508 // is encoded in the opcode
10509
10510 if (instrIs3opImul(ins))
10511 {
10512 regNumber tgtReg = inst3opImulReg(ins);
10513 emitGCregDeadUpd(tgtReg, dst);
10514 }
10515 }
10516 }
10517
10518 return dst;
10519}
10520
10521/*****************************************************************************
10522 *
10523 * Output an instruction with a static data member (class variable).
10524 */
10525
10526BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
10527{
10528 BYTE* addr;
10529 CORINFO_FIELD_HANDLE fldh;
10530 ssize_t offs;
10531 int doff;
10532
10533 emitAttr size = id->idOpSize();
10534 size_t opsz = EA_SIZE_IN_BYTES(size);
10535 instruction ins = id->idIns();
10536 bool isMoffset = false;
10537
10538 // Get hold of the field handle and offset
10539 fldh = id->idAddr()->iiaFieldHnd;
10540 offs = emitGetInsDsp(id);
10541
10542 // Special case: mov reg, fs:[ddd]
10543 if (fldh == FLD_GLOBAL_FS)
10544 {
10545 dst += emitOutputByte(dst, 0x64);
10546 }
10547
10548 // Compute VEX prefix
10549 // Some of its callers already add VEX prefix and then call this routine.
10550 // Therefore add VEX prefix is not already present.
10551 code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
10552
10553 // Compute the REX prefix
10554 if (TakesRexWPrefix(ins, size))
10555 {
10556 code = AddRexWPrefix(ins, code);
10557 }
10558
10559 // Is there a large constant operand?
10560 if (addc && (size > EA_1BYTE))
10561 {
10562 ssize_t cval = addc->cnsVal;
10563 // Does the constant fit in a byte?
10564 if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
10565 {
10566 if (id->idInsFmt() != IF_MRW_SHF)
10567 {
10568 code |= 2;
10569 }
10570
10571 opsz = 1;
10572 }
10573 }
10574#ifdef _TARGET_X86_
10575 else
10576 {
10577 // Special case: "mov eax, [addr]" and "mov [addr], eax"
10578 // Amd64: this is one case where addr can be 64-bit in size. This is
10579 // currently unused or not enabled on amd64 as it always uses RIP
10580 // relative addressing which results in smaller instruction size.
10581 if (ins == INS_mov && id->idReg1() == REG_EAX)
10582 {
10583 switch (id->idInsFmt())
10584 {
10585 case IF_RWR_MRD:
10586
10587 assert(code == (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
10588
10589 code &= ~((code_t)0xFFFFFFFF);
10590 code |= 0xA0;
10591 isMoffset = true;
10592 break;
10593
10594 case IF_MWR_RRD:
10595
10596 assert(code == (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
10597
10598 code &= ~((code_t)0xFFFFFFFF);
10599 code |= 0xA2;
10600 isMoffset = true;
10601 break;
10602
10603 default:
10604 break;
10605 }
10606 }
10607 }
10608#endif //_TARGET_X86_
10609
10610 // Special case emitting AVX instructions
10611 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
10612 {
10613 if ((ins == INS_crc32) && (size > EA_1BYTE))
10614 {
10615 code |= 0x0100;
10616
10617 if (size == EA_2BYTE)
10618 {
10619 dst += emitOutputByte(dst, 0x66);
10620 }
10621 }
10622
10623 regNumber reg345 = REG_NA;
10624 if (IsBMIInstruction(ins))
10625 {
10626 reg345 = getBmiRegNumber(ins);
10627 }
10628 if (reg345 == REG_NA)
10629 {
10630 reg345 = id->idReg1();
10631 }
10632 else
10633 {
10634 code = insEncodeReg3456(ins, id->idReg1(), size, code);
10635 }
10636 unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
10637
10638 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10639
10640 if (UseVEXEncoding() && (ins != INS_crc32))
10641 {
10642 // Emit last opcode byte
10643 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10644 assert((code & 0xFF) == 0);
10645 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
10646 }
10647 else
10648 {
10649 dst += emitOutputWord(dst, code >> 16);
10650 dst += emitOutputWord(dst, code & 0xFFFF);
10651 }
10652
10653 // Emit Mod,R/M byte
10654 dst += emitOutputByte(dst, regcode | 0x05);
10655 code = 0;
10656 }
10657 // Is this a 'big' opcode?
10658 else if (code & 0xFF000000)
10659 {
10660 // Output the REX prefix
10661 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10662
10663 // Output the highest word of the opcode.
10664 // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix.
10665 if (code & 0xFF000000)
10666 {
10667 dst += emitOutputWord(dst, code >> 16);
10668 }
10669 code &= 0x0000FFFF;
10670 }
10671 else if (code & 0x00FF0000)
10672 {
10673 // Output the REX prefix
10674 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10675
10676 // Check again as VEX prefix would have encoded leading opcode byte
10677 if (code & 0x00FF0000)
10678 {
10679 dst += emitOutputByte(dst, code >> 16);
10680 code &= 0x0000FFFF;
10681 }
10682
10683 if ((ins == INS_movsx || ins == INS_movzx || ins == INS_cmpxchg || ins == INS_xchg || ins == INS_xadd ||
10684 insIsCMOV(ins)) &&
10685 size != EA_1BYTE)
10686 {
10687 // movsx and movzx are 'big' opcodes but also have the 'w' bit
10688 code++;
10689 }
10690 }
10691 else if (CodeGen::instIsFP(ins))
10692 {
10693 assert(size == EA_4BYTE || size == EA_8BYTE);
10694
10695 if (size == EA_8BYTE)
10696 {
10697 code += 4;
10698 }
10699 }
10700 else
10701 {
10702 // Is the operand size larger than a byte?
10703 switch (size)
10704 {
10705 case EA_1BYTE:
10706 break;
10707
10708 case EA_2BYTE:
10709 // Output a size prefix for a 16-bit operand
10710 dst += emitOutputByte(dst, 0x66);
10711 __fallthrough;
10712
10713 case EA_4BYTE:
10714#ifdef _TARGET_AMD64_
10715 case EA_8BYTE:
10716#endif
10717 // Set the 'w' bit to get the large version
10718 code |= 0x1;
10719 break;
10720
10721#ifdef _TARGET_X86_
10722 case EA_8BYTE:
10723 // Double operand - set the appropriate bit
10724 code |= 0x04;
10725 break;
10726#endif // _TARGET_X86_
10727
10728 default:
10729 assert(!"unexpected size");
10730 }
10731 }
10732
10733 // Output the REX prefix
10734 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10735
10736 if (code)
10737 {
10738 if (id->idInsFmt() == IF_MRD_OFF || id->idInsFmt() == IF_RWR_MRD_OFF || isMoffset)
10739 {
10740 dst += emitOutputByte(dst, code);
10741 }
10742 else
10743 {
10744 dst += emitOutputWord(dst, code);
10745 }
10746 }
10747
10748 // Do we have a constant or a static data member?
10749 doff = Compiler::eeGetJitDataOffs(fldh);
10750 if (doff >= 0)
10751 {
10752 addr = emitConsBlock + doff;
10753
10754 int byteSize = EA_SIZE_IN_BYTES(size);
10755
10756 // this instruction has a fixed size (4) src.
10757 if (ins == INS_cvttss2si || ins == INS_cvtss2sd || ins == INS_vbroadcastss)
10758 {
10759 byteSize = 4;
10760 }
10761 // This has a fixed size (8) source.
10762 if (ins == INS_vbroadcastsd)
10763 {
10764 byteSize = 8;
10765 }
10766
10767 // Check that the offset is properly aligned (i.e. the ddd in [ddd])
10768 assert((emitChkAlign == false) || (ins == INS_lea) || (((size_t)addr & (byteSize - 1)) == 0));
10769 }
10770 else
10771 {
10772 // Special case: mov reg, fs:[ddd] or mov reg, [ddd]
10773 if (jitStaticFldIsGlobAddr(fldh))
10774 {
10775 addr = nullptr;
10776 }
10777 else
10778 {
10779 addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr);
10780 if (addr == nullptr)
10781 {
10782 NO_WAY("could not obtain address of static field");
10783 }
10784 }
10785 }
10786
10787 BYTE* target = (addr + offs);
10788
10789 if (!isMoffset)
10790 {
10791 INT32 addlDelta = 0;
10792
10793 if (addc)
10794 {
10795 // It is of the form "ins [disp], immed"
10796 // For emitting relocation, we also need to take into account of the
10797 // additional bytes of code emitted for immed val.
10798
10799 ssize_t cval = addc->cnsVal;
10800
10801#ifdef _TARGET_AMD64_
10802 // all these opcodes only take a sign-extended 4-byte immediate
10803 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10804#else //_TARGET_X86_
10805 noway_assert(opsz <= 4);
10806#endif //_TARGET_X86_
10807
10808 switch (opsz)
10809 {
10810 case 0:
10811 case 4:
10812 case 8:
10813 addlDelta = -4;
10814 break;
10815 case 2:
10816 addlDelta = -2;
10817 break;
10818 case 1:
10819 addlDelta = -1;
10820 break;
10821
10822 default:
10823 assert(!"unexpected operand size");
10824 unreached();
10825 }
10826 }
10827
10828#ifdef _TARGET_AMD64_
10829 // All static field and data section constant accesses should be marked as relocatable
10830 noway_assert(id->idIsDspReloc());
10831 dst += emitOutputLong(dst, 0);
10832#else //_TARGET_X86_
10833 dst += emitOutputLong(dst, (int)target);
10834#endif //_TARGET_X86_
10835
10836 if (id->idIsDspReloc())
10837 {
10838 emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta);
10839 }
10840 }
10841 else
10842 {
10843#ifdef _TARGET_AMD64_
10844 // This code path should never be hit on amd64 since it always uses RIP relative addressing.
10845 // In future if ever there is a need to enable this special case, also enable the logic
10846 // that sets isMoffset to true on amd64.
10847 unreached();
10848#else //_TARGET_X86_
10849
10850 dst += emitOutputSizeT(dst, (ssize_t)target);
10851
10852 if (id->idIsDspReloc())
10853 {
10854 emitRecordRelocation((void*)(dst - TARGET_POINTER_SIZE), target, IMAGE_REL_BASED_MOFFSET);
10855 }
10856
10857#endif //_TARGET_X86_
10858 }
10859
10860 // Now generate the constant value, if present
10861 if (addc)
10862 {
10863 ssize_t cval = addc->cnsVal;
10864
10865#ifdef _TARGET_AMD64_
10866 // all these opcodes only take a sign-extended 4-byte immediate
10867 noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
10868#endif
10869
10870 switch (opsz)
10871 {
10872 case 0:
10873 case 4:
10874 case 8:
10875 dst += emitOutputLong(dst, cval);
10876 break;
10877 case 2:
10878 dst += emitOutputWord(dst, cval);
10879 break;
10880 case 1:
10881 dst += emitOutputByte(dst, cval);
10882 break;
10883
10884 default:
10885 assert(!"unexpected operand size");
10886 }
10887 if (addc->cnsReloc)
10888 {
10889 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10890 assert(opsz == 4);
10891 }
10892 }
10893
10894 // Does this instruction operate on a GC ref value?
10895 if (id->idGCref())
10896 {
10897 switch (id->idInsFmt())
10898 {
10899 case IF_MRD:
10900 case IF_MRW:
10901 case IF_MWR:
10902 break;
10903
10904 case IF_RRD_MRD:
10905 break;
10906
10907 case IF_RWR_MRD:
10908 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10909 break;
10910
10911 case IF_MRD_RRD:
10912 case IF_MWR_RRD:
10913 case IF_MRW_RRD:
10914 break;
10915
10916 case IF_MRD_CNS:
10917 case IF_MWR_CNS:
10918 case IF_MRW_CNS:
10919 break;
10920
10921 case IF_RRW_MRD:
10922
10923 assert(id->idGCref() == GCT_BYREF);
10924 assert(ins == INS_add || ins == INS_sub);
10925
10926 // Mark it as holding a GCT_BYREF
10927 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10928 break;
10929
10930 default:
10931#ifdef DEBUG
10932 emitDispIns(id, false, false, false);
10933#endif
10934 assert(!"unexpected GC ref instruction format");
10935 }
10936 }
10937 else
10938 {
10939 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10940 {
10941 switch (id->idInsFmt())
10942 {
10943 case IF_RWR_MRD:
10944 case IF_RRW_MRD:
10945 case IF_RWR_RRD_MRD:
10946 emitGCregDeadUpd(id->idReg1(), dst);
10947 break;
10948 default:
10949 break;
10950 }
10951
10952 if (ins == INS_mulEAX || ins == INS_imulEAX)
10953 {
10954 emitGCregDeadUpd(REG_EAX, dst);
10955 emitGCregDeadUpd(REG_EDX, dst);
10956 }
10957
10958 // For the three operand imul instruction the target register
10959 // is encoded in the opcode
10960
10961 if (instrIs3opImul(ins))
10962 {
10963 regNumber tgtReg = inst3opImulReg(ins);
10964 emitGCregDeadUpd(tgtReg, dst);
10965 }
10966 }
10967 }
10968
10969 return dst;
10970}
10971
10972/*****************************************************************************
10973 *
10974 * Output an instruction with one register operand.
10975 */
10976
10977BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
10978{
10979 code_t code;
10980
10981 instruction ins = id->idIns();
10982 regNumber reg = id->idReg1();
10983 emitAttr size = id->idOpSize();
10984
10985 // We would to update GC info correctly
10986 assert(!IsSSEInstruction(ins));
10987 assert(!IsAVXInstruction(ins));
10988
10989 // Get the 'base' opcode
10990 switch (ins)
10991 {
10992 case INS_inc:
10993 case INS_dec:
10994
10995#ifdef _TARGET_AMD64_
10996 if (true)
10997#else
10998 if (size == EA_1BYTE)
10999#endif
11000 {
11001 assert(INS_inc_l == INS_inc + 1);
11002 assert(INS_dec_l == INS_dec + 1);
11003
11004 // Can't use the compact form, use the long form
11005 ins = (instruction)(ins + 1);
11006 if (size == EA_2BYTE)
11007 {
11008 // Output a size prefix for a 16-bit operand
11009 dst += emitOutputByte(dst, 0x66);
11010 }
11011
11012 code = insCodeRR(ins);
11013 if (size != EA_1BYTE)
11014 {
11015 // Set the 'w' bit to get the large version
11016 code |= 0x1;
11017 }
11018
11019 if (TakesRexWPrefix(ins, size))
11020 {
11021 code = AddRexWPrefix(ins, code);
11022 }
11023
11024 // Register...
11025 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11026
11027 // Output the REX prefix
11028 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11029
11030 dst += emitOutputWord(dst, code | (regcode << 8));
11031 }
11032 else
11033 {
11034 if (size == EA_2BYTE)
11035 {
11036 // Output a size prefix for a 16-bit operand
11037 dst += emitOutputByte(dst, 0x66);
11038 }
11039 dst += emitOutputByte(dst, insCodeRR(ins) | insEncodeReg012(ins, reg, size, nullptr));
11040 }
11041 break;
11042
11043 case INS_pop:
11044 case INS_pop_hide:
11045 case INS_push:
11046 case INS_push_hide:
11047
11048 assert(size == EA_PTRSIZE);
11049 code = insEncodeOpreg(ins, reg, size);
11050
11051 assert(!TakesVexPrefix(ins));
11052 assert(!TakesRexWPrefix(ins, size));
11053
11054 // Output the REX prefix
11055 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11056
11057 dst += emitOutputByte(dst, code);
11058 break;
11059
11060 case INS_bswap:
11061 {
11062 assert(size >= EA_4BYTE && size <= EA_PTRSIZE); // 16-bit BSWAP is undefined
11063
11064 // The Intel instruction set reference for BSWAP states that extended registers
11065 // should be enabled via REX.R, but per Vol. 2A, Sec. 2.2.1.2 (see also Figure 2-7),
11066 // REX.B should instead be used if the register is encoded in the opcode byte itself.
11067 // Therefore the default logic of insEncodeReg012 is correct for this case.
11068
11069 code = insCodeRR(ins);
11070
11071 if (TakesRexWPrefix(ins, size))
11072 {
11073 code = AddRexWPrefix(ins, code);
11074 }
11075
11076 // Register...
11077 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11078
11079 // Output the REX prefix
11080 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11081
11082 dst += emitOutputWord(dst, code | (regcode << 8));
11083 break;
11084 }
11085
11086 case INS_seto:
11087 case INS_setno:
11088 case INS_setb:
11089 case INS_setae:
11090 case INS_sete:
11091 case INS_setne:
11092 case INS_setbe:
11093 case INS_seta:
11094 case INS_sets:
11095 case INS_setns:
11096 case INS_setpe:
11097 case INS_setpo:
11098 case INS_setl:
11099 case INS_setge:
11100 case INS_setle:
11101 case INS_setg:
11102
11103 assert(id->idGCref() == GCT_NONE);
11104 assert(size == EA_1BYTE);
11105
11106 code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins));
11107
11108 // Output the REX prefix
11109 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11110
11111 // We expect this to always be a 'big' opcode
11112 assert(code & 0x00FF0000);
11113
11114 dst += emitOutputByte(dst, code >> 16);
11115 dst += emitOutputWord(dst, code & 0x0000FFFF);
11116
11117 break;
11118
11119 case INS_mulEAX:
11120 case INS_imulEAX:
11121
11122 // Kill off any GC refs in EAX or EDX
11123 emitGCregDeadUpd(REG_EAX, dst);
11124 emitGCregDeadUpd(REG_EDX, dst);
11125
11126 __fallthrough;
11127
11128 default:
11129
11130 assert(id->idGCref() == GCT_NONE);
11131
11132 code = insEncodeMRreg(ins, reg, size, insCodeMR(ins));
11133
11134 if (size != EA_1BYTE)
11135 {
11136 // Set the 'w' bit to get the large version
11137 code |= 0x1;
11138
11139 if (size == EA_2BYTE)
11140 {
11141 // Output a size prefix for a 16-bit operand
11142 dst += emitOutputByte(dst, 0x66);
11143 }
11144 }
11145
11146 code = AddVexPrefixIfNeeded(ins, code, size);
11147
11148 if (TakesRexWPrefix(ins, size))
11149 {
11150 code = AddRexWPrefix(ins, code);
11151 }
11152
11153 // Output the REX prefix
11154 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11155
11156 dst += emitOutputWord(dst, code);
11157 break;
11158 }
11159
11160 // Are we writing the register? if so then update the GC information
11161 switch (id->idInsFmt())
11162 {
11163 case IF_RRD:
11164 break;
11165 case IF_RWR:
11166 if (id->idGCref())
11167 {
11168 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11169 }
11170 else
11171 {
11172 emitGCregDeadUpd(id->idReg1(), dst);
11173 }
11174 break;
11175 case IF_RRW:
11176 {
11177#ifdef DEBUG
11178 regMaskTP regMask = genRegMask(reg);
11179#endif
11180 if (id->idGCref())
11181 {
11182 // The reg must currently be holding either a gcref or a byref
11183 // and the instruction must be inc or dec
11184 assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) &&
11185 (ins == INS_inc || ins == INS_dec || ins == INS_inc_l || ins == INS_dec_l));
11186 assert(id->idGCref() == GCT_BYREF);
11187 // Mark it as holding a GCT_BYREF
11188 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11189 }
11190 else
11191 {
11192 // Can't use RRW to trash a GC ref. It's OK for unverifiable code
11193 // to trash Byrefs.
11194 assert((emitThisGCrefRegs & regMask) == 0);
11195 }
11196 }
11197 break;
11198 default:
11199#ifdef DEBUG
11200 emitDispIns(id, false, false, false);
11201#endif
11202 assert(!"unexpected instruction format");
11203 break;
11204 }
11205
11206 return dst;
11207}
11208
11209/*****************************************************************************
11210 *
11211 * Output an instruction with two register operands.
11212 */
11213
11214BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
11215{
11216 code_t code;
11217
11218 instruction ins = id->idIns();
11219 regNumber reg1 = id->idReg1();
11220 regNumber reg2 = id->idReg2();
11221 emitAttr size = id->idOpSize();
11222
11223 // Get the 'base' opcode
11224 code = insCodeRM(ins);
11225 code = AddVexPrefixIfNeeded(ins, code, size);
11226 if (IsSSEOrAVXInstruction(ins))
11227 {
11228 code = insEncodeRMreg(ins, code);
11229
11230 if (TakesRexWPrefix(ins, size))
11231 {
11232 code = AddRexWPrefix(ins, code);
11233 }
11234 }
11235 else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
11236 {
11237 code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
11238#ifdef _TARGET_AMD64_
11239
11240 assert((size < EA_4BYTE) || (insIsCMOV(ins)));
11241 if ((size == EA_8BYTE) || (ins == INS_movsx))
11242 {
11243 code = AddRexWPrefix(ins, code);
11244 }
11245 }
11246 else if (ins == INS_movsxd)
11247 {
11248 code = insEncodeRMreg(ins, code);
11249
11250#endif // _TARGET_AMD64_
11251 }
11252#ifdef FEATURE_HW_INTRINSICS
11253 else if ((ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) || (ins == INS_tzcnt))
11254 {
11255 code = insEncodeRMreg(ins, code);
11256 if ((ins == INS_crc32) && (size > EA_1BYTE))
11257 {
11258 code |= 0x0100;
11259 }
11260
11261 if (size == EA_2BYTE)
11262 {
11263 assert(ins == INS_crc32);
11264 dst += emitOutputByte(dst, 0x66);
11265 }
11266 else if (size == EA_8BYTE)
11267 {
11268 code = AddRexWPrefix(ins, code);
11269 }
11270 }
11271#endif // FEATURE_HW_INTRINSICS
11272 else
11273 {
11274 code = insEncodeMRreg(ins, insCodeMR(ins));
11275
11276 if (ins != INS_test)
11277 {
11278 code |= 2;
11279 }
11280
11281 switch (size)
11282 {
11283 case EA_1BYTE:
11284 noway_assert(RBM_BYTE_REGS & genRegMask(reg1));
11285 noway_assert(RBM_BYTE_REGS & genRegMask(reg2));
11286 break;
11287
11288 case EA_2BYTE:
11289 // Output a size prefix for a 16-bit operand
11290 dst += emitOutputByte(dst, 0x66);
11291 __fallthrough;
11292
11293 case EA_4BYTE:
11294 // Set the 'w' bit to get the large version
11295 code |= 0x1;
11296 break;
11297
11298#ifdef _TARGET_AMD64_
11299 case EA_8BYTE:
11300 // TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it
11301 // Don't need to zero out the high bits explicitly
11302 if ((ins != INS_xor) || (reg1 != reg2))
11303 {
11304 code = AddRexWPrefix(ins, code);
11305 }
11306
11307 // Set the 'w' bit to get the large version
11308 code |= 0x1;
11309 break;
11310
11311#endif // _TARGET_AMD64_
11312
11313 default:
11314 assert(!"unexpected size");
11315 }
11316 }
11317
11318 regNumber reg345 = REG_NA;
11319 if (IsBMIInstruction(ins))
11320 {
11321 reg345 = getBmiRegNumber(ins);
11322 }
11323 if (reg345 == REG_NA)
11324 {
11325 reg345 = id->idReg1();
11326 }
11327 unsigned regCode = insEncodeReg345(ins, reg345, size, &code);
11328 regCode |= insEncodeReg012(ins, reg2, size, &code);
11329
11330 if (TakesVexPrefix(ins))
11331 {
11332 // In case of AVX instructions that take 3 operands, we generally want to encode reg1
11333 // as first source. In this case, reg1 is both a source and a destination.
11334 // The exception is the "merge" 3-operand case, where we have a move instruction, such
11335 // as movss, and we want to merge the source with itself.
11336 //
11337 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
11338 // now we use the single source as source1 and source2.
11339 if (IsDstDstSrcAVXInstruction(ins))
11340 {
11341 // encode source/dest operand reg in 'vvvv' bits in 1's complement form
11342 code = insEncodeReg3456(ins, reg1, size, code);
11343 }
11344 else if (IsDstSrcSrcAVXInstruction(ins))
11345 {
11346 // encode source operand reg in 'vvvv' bits in 1's complement form
11347 code = insEncodeReg3456(ins, reg2, size, code);
11348 }
11349 }
11350
11351 // Output the REX prefix
11352 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11353
11354 if (code & 0xFF000000)
11355 {
11356 // Output the highest word of the opcode
11357 dst += emitOutputWord(dst, code >> 16);
11358 code &= 0x0000FFFF;
11359
11360 if (Is4ByteSSEInstruction(ins))
11361 {
11362 // Output 3rd byte of the opcode
11363 dst += emitOutputByte(dst, code);
11364 code &= 0xFF00;
11365 }
11366 }
11367 else if (code & 0x00FF0000)
11368 {
11369 dst += emitOutputByte(dst, code >> 16);
11370 code &= 0x0000FFFF;
11371 }
11372
11373 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
11374 if ((code & 0xFF00) == 0xC000)
11375 {
11376 dst += emitOutputWord(dst, code | (regCode << 8));
11377 }
11378 else if ((code & 0xFF) == 0x00)
11379 {
11380 // This case happens for some SSE/AVX instructions only
11381 assert(IsAVXInstruction(ins) || Is4ByteSSEInstruction(ins));
11382
11383 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
11384 dst += emitOutputByte(dst, (0xC0 | regCode));
11385 }
11386 else
11387 {
11388 dst += emitOutputWord(dst, code);
11389 dst += emitOutputByte(dst, (0xC0 | regCode));
11390 }
11391
11392 // Does this instruction operate on a GC ref value?
11393 if (id->idGCref())
11394 {
11395 switch (id->idInsFmt())
11396 {
11397 case IF_RRD_RRD:
11398 break;
11399
11400 case IF_RWR_RRD:
11401
11402 if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0)
11403 {
11404 // We're relocating "this" in the prolog
11405 assert(emitComp->lvaIsOriginalThisArg(0));
11406 assert(emitComp->lvaTable[0].lvRegister);
11407 assert(emitComp->lvaTable[0].lvRegNum == reg1);
11408
11409 if (emitFullGCinfo)
11410 {
11411 emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true);
11412 break;
11413 }
11414 else
11415 {
11416 /* If emitFullGCinfo==false, the we don't use any
11417 regPtrDsc's and so explictly note the location
11418 of "this" in GCEncode.cpp
11419 */
11420 }
11421 }
11422
11423 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11424 break;
11425
11426 case IF_RRW_RRD:
11427
11428 switch (id->idIns())
11429 {
11430 /*
11431 This must be one of the following cases:
11432
11433 xor reg, reg to assign NULL
11434
11435 and r1 , r2 if (ptr1 && ptr2) ...
11436 or r1 , r2 if (ptr1 || ptr2) ...
11437
11438 add r1 , r2 to compute a normal byref
11439 sub r1 , r2 to compute a strange byref (VC only)
11440
11441 */
11442 case INS_xor:
11443 assert(id->idReg1() == id->idReg2());
11444 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11445 break;
11446
11447 case INS_or:
11448 case INS_and:
11449 emitGCregDeadUpd(id->idReg1(), dst);
11450 break;
11451
11452 case INS_add:
11453 case INS_sub:
11454 assert(id->idGCref() == GCT_BYREF);
11455
11456#ifdef DEBUG
11457 regMaskTP regMask;
11458 regMask = genRegMask(reg1) | genRegMask(reg2);
11459
11460 // r1/r2 could have been a GCREF as GCREF + int=BYREF
11461 // or BYREF+/-int=BYREF
11462 assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
11463 ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
11464#endif
11465 // Mark r1 as holding a byref
11466 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11467 break;
11468
11469 default:
11470#ifdef DEBUG
11471 emitDispIns(id, false, false, false);
11472#endif
11473 assert(!"unexpected GC reg update instruction");
11474 }
11475
11476 break;
11477
11478 case IF_RRW_RRW:
11479 // This must be "xchg reg1, reg2"
11480 assert(id->idIns() == INS_xchg);
11481
11482 // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
11483 // register pointer mask.
11484
11485 GCtype gc1, gc2;
11486
11487 gc1 = emitRegGCtype(reg1);
11488 gc2 = emitRegGCtype(reg2);
11489
11490 if (gc1 != gc2)
11491 {
11492 // Kill the GC-info about the GC registers
11493
11494 if (needsGC(gc1))
11495 {
11496 emitGCregDeadUpd(reg1, dst);
11497 }
11498
11499 if (needsGC(gc2))
11500 {
11501 emitGCregDeadUpd(reg2, dst);
11502 }
11503
11504 // Now, swap the info
11505
11506 if (needsGC(gc1))
11507 {
11508 emitGCregLiveUpd(gc1, reg2, dst);
11509 }
11510
11511 if (needsGC(gc2))
11512 {
11513 emitGCregLiveUpd(gc2, reg1, dst);
11514 }
11515 }
11516 break;
11517
11518 default:
11519#ifdef DEBUG
11520 emitDispIns(id, false, false, false);
11521#endif
11522 assert(!"unexpected GC ref instruction format");
11523 }
11524 }
11525 else
11526 {
11527 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
11528 {
11529 switch (id->idInsFmt())
11530 {
11531 case IF_RRD_CNS:
11532 // INS_mulEAX can not be used with any of these formats
11533 assert(ins != INS_mulEAX && ins != INS_imulEAX);
11534
11535 // For the three operand imul instruction the target
11536 // register is encoded in the opcode
11537
11538 if (instrIs3opImul(ins))
11539 {
11540 regNumber tgtReg = inst3opImulReg(ins);
11541 emitGCregDeadUpd(tgtReg, dst);
11542 }
11543 break;
11544
11545 case IF_RWR_RRD:
11546 case IF_RRW_RRD:
11547 case IF_RWR_RRD_RRD:
11548 // INS_movxmm2i writes to reg2.
11549 if (ins == INS_mov_xmm2i)
11550 {
11551 emitGCregDeadUpd(id->idReg2(), dst);
11552 }
11553 else
11554 {
11555 emitGCregDeadUpd(id->idReg1(), dst);
11556 }
11557 break;
11558
11559 default:
11560 break;
11561 }
11562 }
11563 }
11564
11565 return dst;
11566}
11567
11568BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
11569{
11570 code_t code;
11571
11572 instruction ins = id->idIns();
11573 assert(IsAVXInstruction(ins));
11574 assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins));
11575 regNumber targetReg = id->idReg1();
11576 regNumber src1 = id->idReg2();
11577 regNumber src2 = id->idReg3();
11578 emitAttr size = id->idOpSize();
11579
11580 code = insCodeRM(ins);
11581 code = AddVexPrefixIfNeeded(ins, code, size);
11582 code = insEncodeRMreg(ins, code);
11583
11584 if (TakesRexWPrefix(ins, size))
11585 {
11586 code = AddRexWPrefix(ins, code);
11587 }
11588
11589 unsigned regCode = insEncodeReg345(ins, targetReg, size, &code);
11590 regCode |= insEncodeReg012(ins, src2, size, &code);
11591 // encode source operand reg in 'vvvv' bits in 1's complement form
11592 code = insEncodeReg3456(ins, src1, size, code);
11593
11594 // Output the REX prefix
11595 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11596
11597 // Is this a 'big' opcode?
11598 if (code & 0xFF000000)
11599 {
11600 // Output the highest word of the opcode
11601 dst += emitOutputWord(dst, code >> 16);
11602 code &= 0x0000FFFF;
11603 }
11604 else if (code & 0x00FF0000)
11605 {
11606 dst += emitOutputByte(dst, code >> 16);
11607 code &= 0x0000FFFF;
11608 }
11609
11610 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
11611 if ((code & 0xFF00) == 0xC000)
11612 {
11613 dst += emitOutputWord(dst, code | (regCode << 8));
11614 }
11615 else if ((code & 0xFF) == 0x00)
11616 {
11617 // This case happens for AVX instructions only
11618 assert(IsAVXInstruction(ins));
11619
11620 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
11621 dst += emitOutputByte(dst, (0xC0 | regCode));
11622 }
11623 else
11624 {
11625 dst += emitOutputWord(dst, code);
11626 dst += emitOutputByte(dst, (0xC0 | regCode));
11627 }
11628
11629 noway_assert(!id->idGCref());
11630
11631 return dst;
11632}
11633
11634/*****************************************************************************
11635 *
11636 * Output an instruction with a register and constant operands.
11637 */
11638
11639BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
11640{
11641 code_t code;
11642 emitAttr size = id->idOpSize();
11643 instruction ins = id->idIns();
11644 regNumber reg = id->idReg1();
11645 ssize_t val = emitGetInsSC(id);
11646 bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
11647
11648 // BT reg,imm might be useful but it requires special handling of the immediate value
11649 // (it is always encoded in a byte). Let's not complicate things until this is needed.
11650 assert(ins != INS_bt);
11651
11652 if (id->idIsCnsReloc())
11653 {
11654 valInByte = false; // relocs can't be placed in a byte
11655 }
11656
11657 noway_assert(emitVerifyEncodable(ins, size, reg));
11658
11659 if (IsSSEOrAVXInstruction(ins))
11660 {
11661 // Handle SSE2 instructions of the form "opcode reg, immed8"
11662
11663 assert(id->idGCref() == GCT_NONE);
11664 assert(valInByte);
11665
11666 // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
11667 regNumber regOpcode = getSseShiftRegNumber(ins);
11668
11669 // Get the 'base' opcode.
11670 code = insCodeMI(ins);
11671 code = AddVexPrefixIfNeeded(ins, code, size);
11672 code = insEncodeMIreg(ins, reg, size, code);
11673 assert(code & 0x00FF0000);
11674 if (TakesVexPrefix(ins))
11675 {
11676 // The 'vvvv' bits encode the destination register, which for this case (RI)
11677 // is the same as the source.
11678 code = insEncodeReg3456(ins, reg, size, code);
11679 }
11680
11681 unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8;
11682
11683 // Output the REX prefix
11684 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11685
11686 if (code & 0xFF000000)
11687 {
11688 dst += emitOutputWord(dst, code >> 16);
11689 }
11690 else if (code & 0xFF0000)
11691 {
11692 dst += emitOutputByte(dst, code >> 16);
11693 }
11694
11695 dst += emitOutputWord(dst, code | regcode);
11696
11697 dst += emitOutputByte(dst, val);
11698
11699 return dst;
11700 }
11701
11702 // The 'mov' opcode is special
11703 if (ins == INS_mov)
11704 {
11705 code = insCodeACC(ins);
11706 assert(code < 0x100);
11707
11708 code |= 0x08; // Set the 'w' bit
11709 unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11710 code |= regcode;
11711
11712 // This is INS_mov and will not take VEX prefix
11713 assert(!TakesVexPrefix(ins));
11714
11715 if (TakesRexWPrefix(ins, size))
11716 {
11717 code = AddRexWPrefix(ins, code);
11718 }
11719
11720 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11721
11722 dst += emitOutputByte(dst, code);
11723 if (size == EA_4BYTE)
11724 {
11725 dst += emitOutputLong(dst, val);
11726 }
11727#ifdef _TARGET_AMD64_
11728 else
11729 {
11730 assert(size == EA_PTRSIZE);
11731 dst += emitOutputSizeT(dst, val);
11732 }
11733#endif
11734
11735 if (id->idIsCnsReloc())
11736 {
11737 emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_BASED_MOFFSET);
11738 }
11739
11740 goto DONE;
11741 }
11742
11743 // Decide which encoding is the shortest
11744 bool useSigned, useACC;
11745
11746 if (reg == REG_EAX && !instrIs3opImul(ins))
11747 {
11748 if (size == EA_1BYTE || (ins == INS_test))
11749 {
11750 // For al, ACC encoding is always the smallest
11751 useSigned = false;
11752 useACC = true;
11753 }
11754 else
11755 {
11756 /* For ax/eax, we avoid ACC encoding for small constants as we
11757 * can emit the small constant and have it sign-extended.
11758 * For big constants, the ACC encoding is better as we can use
11759 * the 1 byte opcode
11760 */
11761
11762 if (valInByte)
11763 {
11764 // avoid using ACC encoding
11765 useSigned = true;
11766 useACC = false;
11767 }
11768 else
11769 {
11770 useSigned = false;
11771 useACC = true;
11772 }
11773 }
11774 }
11775 else
11776 {
11777 useACC = false;
11778
11779 if (valInByte)
11780 {
11781 useSigned = true;
11782 }
11783 else
11784 {
11785 useSigned = false;
11786 }
11787 }
11788
11789 // "test" has no 's' bit
11790 if (ins == INS_test)
11791 {
11792 useSigned = false;
11793 }
11794
11795 // Get the 'base' opcode
11796 if (useACC)
11797 {
11798 assert(!useSigned);
11799 code = insCodeACC(ins);
11800 }
11801 else
11802 {
11803 assert(!useSigned || valInByte);
11804
11805 // Some instructions (at least 'imul') do not have a
11806 // r/m, immed form, but do have a dstReg,srcReg,imm8 form.
11807 if (valInByte && useSigned && insNeedsRRIb(ins))
11808 {
11809 code = insEncodeRRIb(ins, reg, size);
11810 }
11811 else
11812 {
11813 code = insCodeMI(ins);
11814 code = AddVexPrefixIfNeeded(ins, code, size);
11815 code = insEncodeMIreg(ins, reg, size, code);
11816 }
11817 }
11818
11819 switch (size)
11820 {
11821 case EA_1BYTE:
11822 break;
11823
11824 case EA_2BYTE:
11825 // Output a size prefix for a 16-bit operand
11826 dst += emitOutputByte(dst, 0x66);
11827 __fallthrough;
11828
11829 case EA_4BYTE:
11830 // Set the 'w' bit to get the large version
11831 code |= 0x1;
11832 break;
11833
11834#ifdef _TARGET_AMD64_
11835 case EA_8BYTE:
11836 /* Set the 'w' bit to get the large version */
11837 /* and the REX.W bit to get the really large version */
11838
11839 code = AddRexWPrefix(ins, code);
11840 code |= 0x1;
11841 break;
11842#endif
11843
11844 default:
11845 assert(!"unexpected size");
11846 }
11847
11848 // Output the REX prefix
11849 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11850
11851 // Does the value fit in a sign-extended byte?
11852 // Important! Only set the 's' bit when we have a size larger than EA_1BYTE.
11853 // Note: A sign-extending immediate when (size == EA_1BYTE) is invalid in 64-bit mode.
11854
11855 if (useSigned && (size > EA_1BYTE))
11856 {
11857 // We can just set the 's' bit, and issue an immediate byte
11858
11859 code |= 0x2; // Set the 's' bit to use a sign-extended immediate byte.
11860 dst += emitOutputWord(dst, code);
11861 dst += emitOutputByte(dst, val);
11862 }
11863 else
11864 {
11865 // Can we use an accumulator (EAX) encoding?
11866 if (useACC)
11867 {
11868 dst += emitOutputByte(dst, code);
11869 }
11870 else
11871 {
11872 dst += emitOutputWord(dst, code);
11873 }
11874
11875 switch (size)
11876 {
11877 case EA_1BYTE:
11878 dst += emitOutputByte(dst, val);
11879 break;
11880 case EA_2BYTE:
11881 dst += emitOutputWord(dst, val);
11882 break;
11883 case EA_4BYTE:
11884 dst += emitOutputLong(dst, val);
11885 break;
11886#ifdef _TARGET_AMD64_
11887 case EA_8BYTE:
11888 dst += emitOutputLong(dst, val);
11889 break;
11890#endif // _TARGET_AMD64_
11891 default:
11892 break;
11893 }
11894
11895 if (id->idIsCnsReloc())
11896 {
11897 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
11898 assert(size == EA_4BYTE);
11899 }
11900 }
11901
11902DONE:
11903
11904 // Does this instruction operate on a GC ref value?
11905 if (id->idGCref())
11906 {
11907 switch (id->idInsFmt())
11908 {
11909 case IF_RRD_CNS:
11910 break;
11911
11912 case IF_RWR_CNS:
11913 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11914 break;
11915
11916 case IF_RRW_CNS:
11917 assert(id->idGCref() == GCT_BYREF);
11918
11919#ifdef DEBUG
11920 regMaskTP regMask;
11921 regMask = genRegMask(reg);
11922 // FIXNOW review the other places and relax the assert there too
11923
11924 // The reg must currently be holding either a gcref or a byref
11925 // GCT_GCREF+int = GCT_BYREF, and GCT_BYREF+/-int = GCT_BYREF
11926 if (emitThisGCrefRegs & regMask)
11927 {
11928 assert(ins == INS_add);
11929 }
11930 if (emitThisByrefRegs & regMask)
11931 {
11932 assert(ins == INS_add || ins == INS_sub);
11933 }
11934#endif
11935 // Mark it as holding a GCT_BYREF
11936 emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11937 break;
11938
11939 default:
11940#ifdef DEBUG
11941 emitDispIns(id, false, false, false);
11942#endif
11943 assert(!"unexpected GC ref instruction format");
11944 }
11945
11946 // mul can never produce a GC ref
11947 assert(!instrIs3opImul(ins));
11948 assert(ins != INS_mulEAX && ins != INS_imulEAX);
11949 }
11950 else
11951 {
11952 switch (id->idInsFmt())
11953 {
11954 case IF_RRD_CNS:
11955 // INS_mulEAX can not be used with any of these formats
11956 assert(ins != INS_mulEAX && ins != INS_imulEAX);
11957
11958 // For the three operand imul instruction the target
11959 // register is encoded in the opcode
11960
11961 if (instrIs3opImul(ins))
11962 {
11963 regNumber tgtReg = inst3opImulReg(ins);
11964 emitGCregDeadUpd(tgtReg, dst);
11965 }
11966 break;
11967
11968 case IF_RRW_CNS:
11969 case IF_RWR_CNS:
11970 assert(!instrIs3opImul(ins));
11971
11972 emitGCregDeadUpd(id->idReg1(), dst);
11973 break;
11974
11975 default:
11976#ifdef DEBUG
11977 emitDispIns(id, false, false, false);
11978#endif
11979 assert(!"unexpected GC ref instruction format");
11980 }
11981 }
11982
11983 return dst;
11984}
11985
11986/*****************************************************************************
11987 *
11988 * Output an instruction with a constant operand.
11989 */
11990
11991BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
11992{
11993 code_t code;
11994 instruction ins = id->idIns();
11995 emitAttr size = id->idOpSize();
11996 ssize_t val = emitGetInsSC(id);
11997 bool valInByte = ((signed char)val == val);
11998
11999 // We would to update GC info correctly
12000 assert(!IsSSEInstruction(ins));
12001 assert(!IsAVXInstruction(ins));
12002
12003#ifdef _TARGET_AMD64_
12004 // all these opcodes take a sign-extended 4-byte immediate, max
12005 noway_assert(size < EA_8BYTE || ((int)val == val && !id->idIsCnsReloc()));
12006#endif
12007
12008 if (id->idIsCnsReloc())
12009 {
12010 valInByte = false; // relocs can't be placed in a byte
12011
12012 // Of these instructions only the push instruction can have reloc
12013 assert(ins == INS_push || ins == INS_push_hide);
12014 }
12015
12016 switch (ins)
12017 {
12018 case INS_jge:
12019 assert((val >= -128) && (val <= 127));
12020 dst += emitOutputByte(dst, insCode(ins));
12021 dst += emitOutputByte(dst, val);
12022 break;
12023
12024 case INS_loop:
12025 assert((val >= -128) && (val <= 127));
12026 dst += emitOutputByte(dst, insCodeMI(ins));
12027 dst += emitOutputByte(dst, val);
12028 break;
12029
12030 case INS_ret:
12031 assert(val);
12032 dst += emitOutputByte(dst, insCodeMI(ins));
12033 dst += emitOutputWord(dst, val);
12034 break;
12035
12036 case INS_push_hide:
12037 case INS_push:
12038 code = insCodeMI(ins);
12039
12040 // Does the operand fit in a byte?
12041 if (valInByte)
12042 {
12043 dst += emitOutputByte(dst, code | 2);
12044 dst += emitOutputByte(dst, val);
12045 }
12046 else
12047 {
12048 if (TakesRexWPrefix(ins, size))
12049 {
12050 code = AddRexWPrefix(ins, code);
12051 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12052 }
12053
12054 dst += emitOutputByte(dst, code);
12055 dst += emitOutputLong(dst, val);
12056 if (id->idIsCnsReloc())
12057 {
12058 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
12059 }
12060 }
12061
12062 // Did we push a GC ref value?
12063 if (id->idGCref())
12064 {
12065#ifdef DEBUG
12066 printf("UNDONE: record GCref push [cns]\n");
12067#endif
12068 }
12069
12070 break;
12071
12072 default:
12073 assert(!"unexpected instruction");
12074 }
12075
12076 return dst;
12077}
12078
12079/*****************************************************************************
12080 *
12081 * Output a local jump instruction.
12082 * This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that
12083 * needs to get bound to an actual address and processed by branch shortening.
12084 */
12085
12086BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
12087{
12088 unsigned srcOffs;
12089 unsigned dstOffs;
12090 ssize_t distVal;
12091
12092 instrDescJmp* id = (instrDescJmp*)i;
12093 instruction ins = id->idIns();
12094 bool jmp;
12095 bool relAddr = true; // does the instruction use relative-addressing?
12096
12097 // SSE/AVX doesnt make any sense here
12098 assert(!IsSSEInstruction(ins));
12099 assert(!IsAVXInstruction(ins));
12100
12101 size_t ssz;
12102 size_t lsz;
12103
12104 switch (ins)
12105 {
12106 default:
12107 ssz = JCC_SIZE_SMALL;
12108 lsz = JCC_SIZE_LARGE;
12109 jmp = true;
12110 break;
12111
12112 case INS_jmp:
12113 ssz = JMP_SIZE_SMALL;
12114 lsz = JMP_SIZE_LARGE;
12115 jmp = true;
12116 break;
12117
12118 case INS_call:
12119 ssz = lsz = CALL_INST_SIZE;
12120 jmp = false;
12121 break;
12122
12123 case INS_push_hide:
12124 case INS_push:
12125 ssz = lsz = 5;
12126 jmp = false;
12127 relAddr = false;
12128 break;
12129
12130 case INS_mov:
12131 case INS_lea:
12132 ssz = lsz = id->idCodeSize();
12133 jmp = false;
12134 relAddr = false;
12135 break;
12136 }
12137
12138 // Figure out the distance to the target
12139 srcOffs = emitCurCodeOffs(dst);
12140 dstOffs = id->idAddr()->iiaIGlabel->igOffs;
12141
12142 if (relAddr)
12143 {
12144 distVal = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
12145 }
12146 else
12147 {
12148 distVal = (ssize_t)emitOffsetToPtr(dstOffs);
12149 }
12150
12151 if (dstOffs <= srcOffs)
12152 {
12153 // This is a backward jump - distance is known at this point
12154 CLANG_FORMAT_COMMENT_ANCHOR;
12155
12156#if DEBUG_EMIT
12157 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
12158 {
12159 size_t blkOffs = id->idjIG->igOffs;
12160
12161 if (INTERESTING_JUMP_NUM == 0)
12162 {
12163 printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
12164 }
12165 printf("[3] Jump block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
12166 printf("[3] Jump is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
12167 printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
12168 }
12169#endif
12170
12171 // Can we use a short jump?
12172 if (jmp && distVal - ssz >= (size_t)JMP_DIST_SMALL_MAX_NEG)
12173 {
12174 emitSetShortJump(id);
12175 }
12176 }
12177 else
12178 {
12179 // This is a forward jump - distance will be an upper limit
12180 emitFwdJumps = true;
12181
12182 // The target offset will be closer by at least 'emitOffsAdj', but only if this
12183 // jump doesn't cross the hot-cold boundary.
12184 if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
12185 {
12186 dstOffs -= emitOffsAdj;
12187 distVal -= emitOffsAdj;
12188 }
12189
12190 // Record the location of the jump for later patching
12191 id->idjOffs = dstOffs;
12192
12193 // Are we overflowing the id->idjOffs bitfield?
12194 if (id->idjOffs != dstOffs)
12195 {
12196 IMPL_LIMITATION("Method is too large");
12197 }
12198
12199#if DEBUG_EMIT
12200 if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
12201 {
12202 size_t blkOffs = id->idjIG->igOffs;
12203
12204 if (INTERESTING_JUMP_NUM == 0)
12205 {
12206 printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
12207 }
12208 printf("[4] Jump block is at %08X\n", blkOffs);
12209 printf("[4] Jump is at %08X\n", srcOffs);
12210 printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
12211 }
12212#endif
12213
12214 // Can we use a short jump?
12215 if (jmp && distVal - ssz <= (size_t)JMP_DIST_SMALL_MAX_POS)
12216 {
12217 emitSetShortJump(id);
12218 }
12219 }
12220
12221 // Adjust the offset to emit relative to the end of the instruction
12222 if (relAddr)
12223 {
12224 distVal -= id->idjShort ? ssz : lsz;
12225 }
12226
12227#ifdef DEBUG
12228 if (0 && emitComp->verbose)
12229 {
12230 size_t sz = id->idjShort ? ssz : lsz;
12231 int distValSize = id->idjShort ? 4 : 8;
12232 printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
12233 emitComp->dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs,
12234 distVal);
12235 }
12236#endif
12237
12238 // What size jump should we use?
12239 if (id->idjShort)
12240 {
12241 // Short jump
12242 assert(!id->idjKeepLong);
12243 assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
12244
12245 assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
12246 assert(JMP_SIZE_SMALL == 2);
12247
12248 assert(jmp);
12249
12250 if (emitInstCodeSz(id) != JMP_SIZE_SMALL)
12251 {
12252 emitOffsAdj += emitInstCodeSz(id) - JMP_SIZE_SMALL;
12253
12254#ifdef DEBUG
12255 if (emitComp->verbose)
12256 {
12257 printf("; NOTE: size of jump [%08X] mis-predicted\n", emitComp->dspPtr(id));
12258 }
12259#endif
12260 }
12261
12262 dst += emitOutputByte(dst, insCode(ins));
12263
12264 // For forward jumps, record the address of the distance value
12265 id->idjTemp.idjAddr = (distVal > 0) ? dst : nullptr;
12266
12267 dst += emitOutputByte(dst, distVal);
12268 }
12269 else
12270 {
12271 code_t code;
12272
12273 // Long jump
12274 if (jmp)
12275 {
12276 // clang-format off
12277 assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp);
12278 assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo);
12279 assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb);
12280 assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae);
12281 assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je);
12282 assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne);
12283 assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe);
12284 assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja);
12285 assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js);
12286 assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns);
12287 assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe);
12288 assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo);
12289 assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl);
12290 assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge);
12291 assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle);
12292 assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg);
12293 // clang-format on
12294
12295 code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp)));
12296 }
12297 else if (ins == INS_push || ins == INS_push_hide)
12298 {
12299 assert(insCodeMI(INS_push) == 0x68);
12300 code = 0x68;
12301 }
12302 else if (ins == INS_mov)
12303 {
12304 // Make it look like IF_SWR_CNS so that emitOutputSV emits the r/m32 for us
12305 insFormat tmpInsFmt = id->idInsFmt();
12306 insGroup* tmpIGlabel = id->idAddr()->iiaIGlabel;
12307 bool tmpDspReloc = id->idIsDspReloc();
12308
12309 id->idInsFmt(IF_SWR_CNS);
12310 id->idAddr()->iiaLclVar = ((instrDescLbl*)id)->dstLclVar;
12311 id->idSetIsDspReloc(false);
12312
12313 dst = emitOutputSV(dst, id, insCodeMI(ins));
12314
12315 // Restore id fields with original values
12316 id->idInsFmt(tmpInsFmt);
12317 id->idAddr()->iiaIGlabel = tmpIGlabel;
12318 id->idSetIsDspReloc(tmpDspReloc);
12319 code = 0xCC;
12320 }
12321 else if (ins == INS_lea)
12322 {
12323 // Make an instrDesc that looks like IF_RWR_ARD so that emitOutputAM emits the r/m32 for us.
12324 // We basically are doing what emitIns_R_AI does.
12325 // TODO-XArch-Cleanup: revisit this.
12326 instrDescAmd idAmdStackLocal;
12327 instrDescAmd* idAmd = &idAmdStackLocal;
12328 *(instrDesc*)idAmd = *(instrDesc*)id; // copy all the "core" fields
12329 memset((BYTE*)idAmd + sizeof(instrDesc), 0,
12330 sizeof(instrDescAmd) - sizeof(instrDesc)); // zero out the tail that wasn't copied
12331
12332 idAmd->idInsFmt(IF_RWR_ARD);
12333 idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
12334 idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
12335 emitSetAmdDisp(idAmd, distVal); // set the displacement
12336 idAmd->idSetIsDspReloc(id->idIsDspReloc());
12337 assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly
12338
12339 UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins));
12340 idAmd->idCodeSize(sz);
12341
12342 code = insCodeRM(ins);
12343 code |= (insEncodeReg345(ins, id->idReg1(), EA_PTRSIZE, &code) << 8);
12344
12345 dst = emitOutputAM(dst, idAmd, code, nullptr);
12346
12347 code = 0xCC;
12348
12349 // For forward jumps, record the address of the distance value
12350 // Hard-coded 4 here because we already output the displacement, as the last thing.
12351 id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : nullptr;
12352
12353 // We're done
12354 return dst;
12355 }
12356 else
12357 {
12358 code = 0xE8;
12359 }
12360
12361 if (ins != INS_mov)
12362 {
12363 dst += emitOutputByte(dst, code);
12364
12365 if (code & 0xFF00)
12366 {
12367 dst += emitOutputByte(dst, code >> 8);
12368 }
12369 }
12370
12371 // For forward jumps, record the address of the distance value
12372 id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : nullptr;
12373
12374 dst += emitOutputLong(dst, distVal);
12375
12376#ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation
12377 if (emitComp->opts.compReloc)
12378#endif
12379 {
12380 if (!relAddr)
12381 {
12382 emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)distVal, IMAGE_REL_BASED_HIGHLOW);
12383 }
12384 else if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
12385 {
12386 assert(id->idjKeepLong);
12387 emitRecordRelocation((void*)(dst - sizeof(INT32)), dst + distVal, IMAGE_REL_BASED_REL32);
12388 }
12389 }
12390 }
12391
12392 // Local calls kill all registers
12393 if (ins == INS_call && (emitThisGCrefRegs | emitThisByrefRegs))
12394 {
12395 emitGCregDeadUpdMask(emitThisGCrefRegs | emitThisByrefRegs, dst);
12396 }
12397
12398 return dst;
12399}
12400
12401/*****************************************************************************
12402 *
12403 * Append the machine code corresponding to the given instruction descriptor
12404 * to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
12405 * is the instruction group that contains the instruction. Updates '*dp' to
12406 * point past the generated code, and returns the size of the instruction
12407 * descriptor in bytes.
12408 */
12409
12410#ifdef _PREFAST_
12411#pragma warning(push)
12412#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
12413#endif
12414size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
12415{
12416 assert(emitIssuing);
12417
12418 BYTE* dst = *dp;
12419 size_t sz = sizeof(instrDesc);
12420 instruction ins = id->idIns();
12421 unsigned char callInstrSize = 0;
12422
12423#ifdef DEBUG
12424 bool dspOffs = emitComp->opts.dspGCtbls;
12425#endif // DEBUG
12426
12427 emitAttr size = id->idOpSize();
12428
12429 assert(REG_NA == (int)REG_NA);
12430
12431 assert(ins != INS_imul || size >= EA_4BYTE); // Has no 'w' bit
12432 assert(instrIs3opImul(id->idIns()) == 0 || size >= EA_4BYTE); // Has no 'w' bit
12433
12434 VARSET_TP GCvars(VarSetOps::UninitVal());
12435
12436 // What instruction format have we got?
12437 switch (id->idInsFmt())
12438 {
12439 code_t code;
12440 unsigned regcode;
12441 int args;
12442 CnsVal cnsVal;
12443
12444 BYTE* addr;
12445 bool recCall;
12446
12447 regMaskTP gcrefRegs;
12448 regMaskTP byrefRegs;
12449
12450 /********************************************************************/
12451 /* No operands */
12452 /********************************************************************/
12453 case IF_NONE:
12454 // the loop alignment pseudo instruction
12455 if (ins == INS_align)
12456 {
12457 sz = SMALL_IDSC_SIZE;
12458 dst = emitOutputNOP(dst, (-(int)(size_t)dst) & 0x0f);
12459 assert(((size_t)dst & 0x0f) == 0);
12460 break;
12461 }
12462
12463 if (ins == INS_nop)
12464 {
12465 dst = emitOutputNOP(dst, id->idCodeSize());
12466 break;
12467 }
12468
12469 // the cdq instruction kills the EDX register implicitly
12470 if (ins == INS_cdq)
12471 {
12472 emitGCregDeadUpd(REG_EDX, dst);
12473 }
12474
12475 assert(id->idGCref() == GCT_NONE);
12476
12477 code = insCodeMR(ins);
12478
12479#ifdef _TARGET_AMD64_
12480 // Support only scalar AVX instructions and hence size is hard coded to 4-byte.
12481 code = AddVexPrefixIfNeeded(ins, code, EA_4BYTE);
12482
12483 if (ins == INS_cdq && TakesRexWPrefix(ins, id->idOpSize()))
12484 {
12485 code = AddRexWPrefix(ins, code);
12486 }
12487 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12488#endif
12489 // Is this a 'big' opcode?
12490 if (code & 0xFF000000)
12491 {
12492 // The high word and then the low word
12493 dst += emitOutputWord(dst, code >> 16);
12494 code &= 0x0000FFFF;
12495 dst += emitOutputWord(dst, code);
12496 }
12497 else if (code & 0x00FF0000)
12498 {
12499 // The high byte and then the low word
12500 dst += emitOutputByte(dst, code >> 16);
12501 code &= 0x0000FFFF;
12502 dst += emitOutputWord(dst, code);
12503 }
12504 else if (code & 0xFF00)
12505 {
12506 // The 2 byte opcode
12507 dst += emitOutputWord(dst, code);
12508 }
12509 else
12510 {
12511 // The 1 byte opcode
12512 dst += emitOutputByte(dst, code);
12513 }
12514
12515 break;
12516
12517 /********************************************************************/
12518 /* Simple constant, local label, method */
12519 /********************************************************************/
12520
12521 case IF_CNS:
12522 dst = emitOutputIV(dst, id);
12523 sz = emitSizeOfInsDsc(id);
12524 break;
12525
12526 case IF_LABEL:
12527 case IF_RWR_LABEL:
12528 case IF_SWR_LABEL:
12529 assert(id->idGCref() == GCT_NONE);
12530 assert(id->idIsBound());
12531
12532 // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
12533 dst = emitOutputLJ(dst, id);
12534 sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));
12535 break;
12536
12537 case IF_METHOD:
12538 case IF_METHPTR:
12539 // Assume we'll be recording this call
12540 recCall = true;
12541
12542 // Get hold of the argument count and field Handle
12543 args = emitGetInsCDinfo(id);
12544
12545 // Is this a "fat" call descriptor?
12546 if (id->idIsLargeCall())
12547 {
12548 instrDescCGCA* idCall = (instrDescCGCA*)id;
12549 gcrefRegs = idCall->idcGcrefRegs;
12550 byrefRegs = idCall->idcByrefRegs;
12551 VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
12552 sz = sizeof(instrDescCGCA);
12553 }
12554 else
12555 {
12556 assert(!id->idIsLargeDsp());
12557 assert(!id->idIsLargeCns());
12558
12559 gcrefRegs = emitDecodeCallGCregs(id);
12560 byrefRegs = 0;
12561 VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
12562 sz = sizeof(instrDesc);
12563 }
12564
12565 addr = (BYTE*)id->idAddr()->iiaAddr;
12566 assert(addr != nullptr);
12567
12568 // Some helpers don't get recorded in GC tables
12569 if (id->idIsNoGC())
12570 {
12571 recCall = false;
12572 }
12573
12574 // What kind of a call do we have here?
12575 if (id->idInsFmt() == IF_METHPTR)
12576 {
12577 // This is call indirect via a method pointer
12578
12579 code = insCodeMR(ins);
12580 if (ins == INS_i_jmp)
12581 {
12582 code |= 1;
12583 }
12584
12585 if (id->idIsDspReloc())
12586 {
12587 dst += emitOutputWord(dst, code | 0x0500);
12588#ifdef _TARGET_AMD64_
12589 dst += emitOutputLong(dst, 0);
12590#else
12591 dst += emitOutputLong(dst, (int)addr);
12592#endif
12593 emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32);
12594 }
12595 else
12596 {
12597#ifdef _TARGET_X86_
12598 dst += emitOutputWord(dst, code | 0x0500);
12599#else //_TARGET_AMD64_
12600 // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
12601 // This addr mode should never be used while generating relocatable ngen code nor if
12602 // the addr can be encoded as pc-relative address.
12603 noway_assert(!emitComp->opts.compReloc);
12604 noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32);
12605 noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (ssize_t)addr);
12606
12607 // This requires, specifying a SIB byte after ModRM byte.
12608 dst += emitOutputWord(dst, code | 0x0400);
12609 dst += emitOutputByte(dst, 0x25);
12610#endif //_TARGET_AMD64_
12611 dst += emitOutputLong(dst, static_cast<int>(reinterpret_cast<intptr_t>(addr)));
12612 }
12613 goto DONE_CALL;
12614 }
12615
12616 // Else
12617 // This is call direct where we know the target, thus we can
12618 // use a direct call; the target to jump to is in iiaAddr.
12619 assert(id->idInsFmt() == IF_METHOD);
12620
12621 // Output the call opcode followed by the target distance
12622 dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins));
12623
12624 ssize_t offset;
12625#ifdef _TARGET_AMD64_
12626 // All REL32 on Amd64 go through recordRelocation. Here we will output zero to advance dst.
12627 offset = 0;
12628 assert(id->idIsDspReloc());
12629#else
12630 // Calculate PC relative displacement.
12631 // Although you think we should be using sizeof(void*), the x86 and x64 instruction set
12632 // only allow a 32-bit offset, so we correctly use sizeof(INT32)
12633 offset = addr - (dst + sizeof(INT32));
12634#endif
12635
12636 dst += emitOutputLong(dst, offset);
12637
12638 if (id->idIsDspReloc())
12639 {
12640 emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32);
12641 }
12642
12643 DONE_CALL:
12644
12645 /* We update the GC info before the call as the variables cannot be
12646 used by the call. Killing variables before the call helps with
12647 boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
12648 If we ever track aliased variables (which could be used by the
12649 call), we would have to keep them alive past the call.
12650 */
12651 assert(FitsIn<unsigned char>(dst - *dp));
12652 callInstrSize = static_cast<unsigned char>(dst - *dp);
12653 emitUpdateLiveGCvars(GCvars, *dp);
12654
12655 // If the method returns a GC ref, mark EAX appropriately
12656 if (id->idGCref() == GCT_GCREF)
12657 {
12658 gcrefRegs |= RBM_EAX;
12659 }
12660 else if (id->idGCref() == GCT_BYREF)
12661 {
12662 byrefRegs |= RBM_EAX;
12663 }
12664
12665#ifdef UNIX_AMD64_ABI
12666 // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64).
12667 if (id->idIsLargeCall())
12668 {
12669 instrDescCGCA* idCall = (instrDescCGCA*)id;
12670 if (idCall->idSecondGCref() == GCT_GCREF)
12671 {
12672 gcrefRegs |= RBM_RDX;
12673 }
12674 else if (idCall->idSecondGCref() == GCT_BYREF)
12675 {
12676 byrefRegs |= RBM_RDX;
12677 }
12678 }
12679#endif // UNIX_AMD64_ABI
12680
12681 // If the GC register set has changed, report the new set
12682 if (gcrefRegs != emitThisGCrefRegs)
12683 {
12684 emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
12685 }
12686
12687 if (byrefRegs != emitThisByrefRegs)
12688 {
12689 emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
12690 }
12691
12692 if (recCall || args)
12693 {
12694 // For callee-pop, all arguments will be popped after the call.
12695 // For caller-pop, any GC arguments will go dead after the call.
12696
12697 assert(callInstrSize != 0);
12698
12699 if (args >= 0)
12700 {
12701 emitStackPop(dst, /*isCall*/ true, callInstrSize, args);
12702 }
12703 else
12704 {
12705 emitStackKillArgs(dst, -args, callInstrSize);
12706 }
12707 }
12708
12709 // Do we need to record a call location for GC purposes?
12710 if (!emitFullGCinfo && recCall)
12711 {
12712 assert(callInstrSize != 0);
12713 emitRecordGCcall(dst, callInstrSize);
12714 }
12715
12716#ifdef DEBUG
12717 if (ins == INS_call)
12718 {
12719 emitRecordCallSite(emitCurCodeOffs(*dp), id->idDebugOnlyInfo()->idCallSig,
12720 (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
12721 }
12722#endif // DEBUG
12723
12724 break;
12725
12726 /********************************************************************/
12727 /* One register operand */
12728 /********************************************************************/
12729
12730 case IF_RRD:
12731 case IF_RWR:
12732 case IF_RRW:
12733 dst = emitOutputR(dst, id);
12734 sz = SMALL_IDSC_SIZE;
12735 break;
12736
12737 /********************************************************************/
12738 /* Register and register/constant */
12739 /********************************************************************/
12740
12741 case IF_RRW_SHF:
12742 code = insCodeMR(ins);
12743 // Emit the VEX prefix if it exists
12744 code = AddVexPrefixIfNeeded(ins, code, size);
12745 code = insEncodeMRreg(ins, id->idReg1(), size, code);
12746
12747 // set the W bit
12748 if (size != EA_1BYTE)
12749 {
12750 code |= 1;
12751 }
12752
12753 // Emit the REX prefix if it exists
12754 if (TakesRexWPrefix(ins, size))
12755 {
12756 code = AddRexWPrefix(ins, code);
12757 }
12758
12759 // Output a size prefix for a 16-bit operand
12760 if (size == EA_2BYTE)
12761 {
12762 dst += emitOutputByte(dst, 0x66);
12763 }
12764
12765 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12766 dst += emitOutputWord(dst, code);
12767 dst += emitOutputByte(dst, emitGetInsSC(id));
12768 sz = emitSizeOfInsDsc(id);
12769
12770 // Update GC info.
12771 assert(!id->idGCref());
12772 emitGCregDeadUpd(id->idReg1(), dst);
12773 break;
12774
12775 case IF_RRD_RRD:
12776 case IF_RWR_RRD:
12777 case IF_RRW_RRD:
12778 case IF_RRW_RRW:
12779 dst = emitOutputRR(dst, id);
12780 sz = SMALL_IDSC_SIZE;
12781 break;
12782
12783 case IF_RRD_CNS:
12784 case IF_RWR_CNS:
12785 case IF_RRW_CNS:
12786 dst = emitOutputRI(dst, id);
12787 sz = emitSizeOfInsDsc(id);
12788 break;
12789
12790 case IF_RWR_RRD_RRD:
12791 dst = emitOutputRRR(dst, id);
12792 sz = emitSizeOfInsDsc(id);
12793 break;
12794 case IF_RWR_RRD_RRD_CNS:
12795 case IF_RWR_RRD_RRD_RRD:
12796 dst = emitOutputRRR(dst, id);
12797 sz = emitSizeOfInsDsc(id);
12798 dst += emitOutputByte(dst, emitGetInsSC(id));
12799 break;
12800
12801 case IF_RRW_RRW_CNS:
12802 assert(id->idGCref() == GCT_NONE);
12803
12804 // Get the 'base' opcode (it's a big one)
12805 // Also, determine which operand goes where in the ModRM byte.
12806 regNumber mReg;
12807 regNumber rReg;
12808 if (hasCodeMR(ins))
12809 {
12810 code = insCodeMR(ins);
12811 // Emit the VEX prefix if it exists
12812 code = AddVexPrefixIfNeeded(ins, code, size);
12813 code = insEncodeMRreg(ins, code);
12814 mReg = id->idReg1();
12815 rReg = id->idReg2();
12816 }
12817 else if (hasCodeMI(ins))
12818 {
12819 code = insCodeMI(ins);
12820
12821 // Emit the VEX prefix if it exists
12822 code = AddVexPrefixIfNeeded(ins, code, size);
12823
12824 assert((code & 0xC000) == 0);
12825 code |= 0xC000;
12826
12827 mReg = id->idReg2();
12828
12829 // The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
12830 rReg = getSseShiftRegNumber(ins);
12831 }
12832 else
12833 {
12834 code = insCodeRM(ins);
12835 // Emit the VEX prefix if it exists
12836 code = AddVexPrefixIfNeeded(ins, code, size);
12837 code = insEncodeRMreg(ins, code);
12838 mReg = id->idReg2();
12839 rReg = id->idReg1();
12840 }
12841 assert(code & 0x00FF0000);
12842
12843 if (TakesRexWPrefix(ins, size))
12844 {
12845 code = AddRexWPrefix(ins, code);
12846 }
12847
12848 if (TakesVexPrefix(ins))
12849 {
12850 if (IsDstDstSrcAVXInstruction(ins))
12851 {
12852 // Encode source/dest operand reg in 'vvvv' bits in 1's complement form
12853 // This code will have to change when we support 3 operands.
12854 // For now, we always overload this source with the destination (always reg1).
12855 // (Though we will need to handle the few ops that can have the 'vvvv' bits as destination,
12856 // e.g. pslldq, when/if we support those instructions with 2 registers.)
12857 // (see x64 manual Table 2-9. Instructions with a VEX.vvvv destination)
12858 code = insEncodeReg3456(ins, id->idReg1(), size, code);
12859 }
12860 else if (IsDstSrcSrcAVXInstruction(ins))
12861 {
12862 // This is a "merge" move instruction.
12863 // Encode source operand reg in 'vvvv' bits in 1's complement form
12864 code = insEncodeReg3456(ins, id->idReg2(), size, code);
12865 }
12866 }
12867
12868 regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code));
12869
12870 // Output the REX prefix
12871 dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12872
12873 if (code & 0xFF000000)
12874 {
12875 // Output the highest word of the opcode
12876 dst += emitOutputWord(dst, code >> 16);
12877 code &= 0x0000FFFF;
12878
12879 if (Is4ByteSSEInstruction(ins))
12880 {
12881 // Output 3rd byte of the opcode
12882 dst += emitOutputByte(dst, code);
12883 code &= 0xFF00;
12884 }
12885 }
12886 else if (code & 0x00FF0000)
12887 {
12888 dst += emitOutputByte(dst, code >> 16);
12889 code &= 0x0000FFFF;
12890 }
12891
12892 // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
12893 if ((code & 0xFF00) == 0xC000)
12894 {
12895 dst += emitOutputWord(dst, code | (regcode << 8));
12896 }
12897 else if ((code & 0xFF) == 0x00)
12898 {
12899 // This case happens for some SSE/AVX instructions only
12900 assert(IsAVXInstruction(ins) || Is4ByteSSEInstruction(ins));
12901
12902 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
12903 dst += emitOutputByte(dst, (0xC0 | regcode));
12904 }
12905 else
12906 {
12907 dst += emitOutputWord(dst, code);
12908 dst += emitOutputByte(dst, (0xC0 | regcode));
12909 }
12910
12911 dst += emitOutputByte(dst, emitGetInsSC(id));
12912 sz = emitSizeOfInsDsc(id);
12913
12914 // Kill any GC ref in the destination register if necessary.
12915 if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
12916 {
12917 emitGCregDeadUpd(id->idReg1(), dst);
12918 }
12919 break;
12920
12921 /********************************************************************/
12922 /* Address mode operand */
12923 /********************************************************************/
12924
12925 case IF_ARD:
12926 case IF_AWR:
12927 case IF_ARW:
12928
12929 dst = emitCodeWithInstructionSize(dst, emitOutputAM(dst, id, insCodeMR(ins)), &callInstrSize);
12930
12931 switch (ins)
12932 {
12933 case INS_call:
12934
12935 IND_CALL:
12936 // Get hold of the argument count and method handle
12937 args = emitGetInsCIargs(id);
12938
12939 // Is this a "fat" call descriptor?
12940 if (id->idIsLargeCall())
12941 {
12942 instrDescCGCA* idCall = (instrDescCGCA*)id;
12943
12944 gcrefRegs = idCall->idcGcrefRegs;
12945 byrefRegs = idCall->idcByrefRegs;
12946 VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
12947 sz = sizeof(instrDescCGCA);
12948 }
12949 else
12950 {
12951 assert(!id->idIsLargeDsp());
12952 assert(!id->idIsLargeCns());
12953
12954 gcrefRegs = emitDecodeCallGCregs(id);
12955 byrefRegs = 0;
12956 VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
12957 sz = sizeof(instrDesc);
12958 }
12959
12960 recCall = true;
12961
12962 goto DONE_CALL;
12963
12964 default:
12965 sz = emitSizeOfInsDsc(id);
12966 break;
12967 }
12968 break;
12969
12970 case IF_RRW_ARD_CNS:
12971 case IF_RWR_ARD_CNS:
12972 emitGetInsAmdCns(id, &cnsVal);
12973 code = insCodeRM(ins);
12974
12975 // Special case 4-byte AVX instructions
12976 if (EncodedBySSE38orSSE3A(ins))
12977 {
12978 dst = emitOutputAM(dst, id, code, &cnsVal);
12979 }
12980 else
12981 {
12982 code = AddVexPrefixIfNeeded(ins, code, size);
12983 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
12984 dst = emitOutputAM(dst, id, code | regcode, &cnsVal);
12985 }
12986
12987 sz = emitSizeOfInsDsc(id);
12988 break;
12989
12990 case IF_AWR_RRD_CNS:
12991 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
12992 assert(UseVEXEncoding());
12993 emitGetInsAmdCns(id, &cnsVal);
12994 code = insCodeMR(ins);
12995 dst = emitOutputAM(dst, id, code, &cnsVal);
12996 sz = emitSizeOfInsDsc(id);
12997 break;
12998
12999 case IF_RRD_ARD:
13000 case IF_RWR_ARD:
13001 case IF_RRW_ARD:
13002 case IF_RWR_RRD_ARD:
13003 {
13004 code = insCodeRM(ins);
13005 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
13006 {
13007 dst = emitOutputAM(dst, id, code);
13008 }
13009 else
13010 {
13011 code = AddVexPrefixIfNeeded(ins, code, size);
13012 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13013 dst = emitOutputAM(dst, id, code | regcode);
13014 }
13015 sz = emitSizeOfInsDsc(id);
13016 break;
13017 }
13018
13019 case IF_RWR_ARD_RRD:
13020 {
13021 assert(IsAVX2GatherInstruction(ins));
13022 code = insCodeRM(ins);
13023 dst = emitOutputAM(dst, id, code);
13024 sz = emitSizeOfInsDsc(id);
13025 break;
13026 }
13027
13028 case IF_RWR_RRD_ARD_CNS:
13029 case IF_RWR_RRD_ARD_RRD:
13030 {
13031 emitGetInsAmdCns(id, &cnsVal);
13032 code = insCodeRM(ins);
13033 if (EncodedBySSE38orSSE3A(ins))
13034 {
13035 dst = emitOutputAM(dst, id, code, &cnsVal);
13036 }
13037 else
13038 {
13039 code = AddVexPrefixIfNeeded(ins, code, size);
13040 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13041 dst = emitOutputAM(dst, id, code | regcode, &cnsVal);
13042 }
13043 sz = emitSizeOfInsDsc(id);
13044 break;
13045 }
13046
13047 case IF_ARD_RRD:
13048 case IF_AWR_RRD:
13049 case IF_ARW_RRD:
13050 code = insCodeMR(ins);
13051 code = AddVexPrefixIfNeeded(ins, code, size);
13052 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13053 dst = emitOutputAM(dst, id, code | regcode);
13054 sz = emitSizeOfInsDsc(id);
13055 break;
13056
13057 case IF_AWR_RRD_RRD:
13058 {
13059 code = insCodeMR(ins);
13060 code = AddVexPrefixIfNeeded(ins, code, size);
13061 dst = emitOutputAM(dst, id, code);
13062 sz = emitSizeOfInsDsc(id);
13063 break;
13064 }
13065
13066 case IF_ARD_CNS:
13067 case IF_AWR_CNS:
13068 case IF_ARW_CNS:
13069 emitGetInsAmdCns(id, &cnsVal);
13070 dst = emitOutputAM(dst, id, insCodeMI(ins), &cnsVal);
13071 sz = emitSizeOfInsDsc(id);
13072 break;
13073
13074 case IF_ARW_SHF:
13075 emitGetInsAmdCns(id, &cnsVal);
13076 dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal);
13077 sz = emitSizeOfInsDsc(id);
13078 break;
13079
13080 /********************************************************************/
13081 /* Stack-based operand */
13082 /********************************************************************/
13083
13084 case IF_SRD:
13085 case IF_SWR:
13086 case IF_SRW:
13087
13088 assert(ins != INS_pop_hide);
13089 if (ins == INS_pop)
13090 {
13091 // The offset in "pop [ESP+xxx]" is relative to the new ESP value
13092 CLANG_FORMAT_COMMENT_ANCHOR;
13093
13094#if !FEATURE_FIXED_OUT_ARGS
13095 emitCurStackLvl -= sizeof(int);
13096#endif
13097 dst = emitOutputSV(dst, id, insCodeMR(ins));
13098
13099#if !FEATURE_FIXED_OUT_ARGS
13100 emitCurStackLvl += sizeof(int);
13101#endif
13102 break;
13103 }
13104
13105 dst = emitCodeWithInstructionSize(dst, emitOutputSV(dst, id, insCodeMR(ins)), &callInstrSize);
13106
13107 if (ins == INS_call)
13108 {
13109 goto IND_CALL;
13110 }
13111
13112 break;
13113
13114 case IF_SRD_CNS:
13115 case IF_SWR_CNS:
13116 case IF_SRW_CNS:
13117 emitGetInsCns(id, &cnsVal);
13118 dst = emitOutputSV(dst, id, insCodeMI(ins), &cnsVal);
13119 sz = emitSizeOfInsDsc(id);
13120 break;
13121
13122 case IF_SRW_SHF:
13123 emitGetInsCns(id, &cnsVal);
13124 dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal);
13125 sz = emitSizeOfInsDsc(id);
13126 break;
13127
13128 case IF_RRW_SRD_CNS:
13129 case IF_RWR_SRD_CNS:
13130 emitGetInsCns(id, &cnsVal);
13131 code = insCodeRM(ins);
13132
13133 // Special case 4-byte AVX instructions
13134 if (EncodedBySSE38orSSE3A(ins))
13135 {
13136 dst = emitOutputSV(dst, id, code, &cnsVal);
13137 }
13138 else
13139 {
13140 code = AddVexPrefixIfNeeded(ins, code, size);
13141
13142 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13143 // Note that reg1 is both a source and a destination.
13144 //
13145 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13146 // now we use the single source as source1 and source2.
13147 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13148 if (IsDstDstSrcAVXInstruction(ins))
13149 {
13150 // encode source operand reg in 'vvvv' bits in 1's complement form
13151 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13152 }
13153
13154 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13155 dst = emitOutputSV(dst, id, code | regcode, &cnsVal);
13156 }
13157
13158 sz = emitSizeOfInsDsc(id);
13159 break;
13160
13161 case IF_RRD_SRD:
13162 case IF_RWR_SRD:
13163 case IF_RRW_SRD:
13164 {
13165 code = insCodeRM(ins);
13166
13167 // 4-byte AVX instructions are special cased inside emitOutputSV
13168 // since they do not have space to encode ModRM byte.
13169 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
13170 {
13171 dst = emitOutputSV(dst, id, code);
13172 }
13173 else
13174 {
13175 code = AddVexPrefixIfNeeded(ins, code, size);
13176
13177 if (IsDstDstSrcAVXInstruction(ins))
13178 {
13179 // encode source operand reg in 'vvvv' bits in 1's complement form
13180 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13181 }
13182
13183 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13184 dst = emitOutputSV(dst, id, code | regcode);
13185 }
13186
13187 sz = emitSizeOfInsDsc(id);
13188 break;
13189 }
13190
13191 case IF_RWR_RRD_SRD:
13192 {
13193 // This should only be called on AVX instructions
13194 assert(IsAVXInstruction(ins));
13195
13196 code = insCodeRM(ins);
13197 code = AddVexPrefixIfNeeded(ins, code, size);
13198 code = insEncodeReg3456(ins, id->idReg2(), size,
13199 code); // encode source operand reg in 'vvvv' bits in 1's complement form
13200
13201 // 4-byte AVX instructions are special cased inside emitOutputSV
13202 // since they do not have space to encode ModRM byte.
13203 if (EncodedBySSE38orSSE3A(ins))
13204 {
13205 dst = emitOutputSV(dst, id, code);
13206 }
13207 else
13208 {
13209 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13210 dst = emitOutputSV(dst, id, code | regcode);
13211 }
13212 break;
13213 }
13214
13215 case IF_RWR_RRD_SRD_CNS:
13216 case IF_RWR_RRD_SRD_RRD:
13217 {
13218 // This should only be called on AVX instructions
13219 assert(IsAVXInstruction(ins));
13220 emitGetInsCns(id, &cnsVal);
13221
13222 code = insCodeRM(ins);
13223 code = AddVexPrefixIfNeeded(ins, code, size);
13224 code = insEncodeReg3456(ins, id->idReg2(), size,
13225 code); // encode source operand reg in 'vvvv' bits in 1's complement form
13226
13227 // 4-byte AVX instructions are special cased inside emitOutputSV
13228 // since they do not have space to encode ModRM byte.
13229 if (EncodedBySSE38orSSE3A(ins))
13230 {
13231 dst = emitOutputSV(dst, id, code, &cnsVal);
13232 }
13233 else
13234 {
13235 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13236 dst = emitOutputSV(dst, id, code | regcode, &cnsVal);
13237 }
13238
13239 sz = emitSizeOfInsDsc(id);
13240 break;
13241 }
13242
13243 case IF_SRD_RRD:
13244 case IF_SWR_RRD:
13245 case IF_SRW_RRD:
13246 code = insCodeMR(ins);
13247 code = AddVexPrefixIfNeeded(ins, code, size);
13248
13249 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13250 // Note that reg1 is both a source and a destination.
13251 //
13252 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13253 // now we use the single source as source1 and source2.
13254 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13255 if (IsDstDstSrcAVXInstruction(ins))
13256 {
13257 // encode source operand reg in 'vvvv' bits in 1's complement form
13258 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13259 }
13260
13261 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13262 dst = emitOutputSV(dst, id, code | regcode);
13263 break;
13264
13265 /********************************************************************/
13266 /* Direct memory address */
13267 /********************************************************************/
13268
13269 case IF_MRD:
13270 case IF_MRW:
13271 case IF_MWR:
13272
13273 noway_assert(ins != INS_call);
13274 dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500);
13275 sz = emitSizeOfInsDsc(id);
13276 break;
13277
13278 case IF_MRD_OFF:
13279 dst = emitOutputCV(dst, id, insCodeMI(ins));
13280 break;
13281
13282 case IF_RRW_MRD_CNS:
13283 case IF_RWR_MRD_CNS:
13284 emitGetInsDcmCns(id, &cnsVal);
13285 code = insCodeRM(ins);
13286
13287 // Special case 4-byte AVX instructions
13288 if (EncodedBySSE38orSSE3A(ins))
13289 {
13290 dst = emitOutputCV(dst, id, code, &cnsVal);
13291 }
13292 else
13293 {
13294 code = AddVexPrefixIfNeeded(ins, code, size);
13295
13296 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13297 // Note that reg1 is both a source and a destination.
13298 //
13299 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13300 // now we use the single source as source1 and source2.
13301 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13302 if (IsDstDstSrcAVXInstruction(ins))
13303 {
13304 // encode source operand reg in 'vvvv' bits in 1's complement form
13305 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13306 }
13307
13308 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13309 dst = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal);
13310 }
13311
13312 sz = emitSizeOfInsDsc(id);
13313 break;
13314
13315 case IF_MWR_RRD_CNS:
13316 assert(ins == INS_vextracti128 || ins == INS_vextractf128);
13317 assert(UseVEXEncoding());
13318 emitGetInsDcmCns(id, &cnsVal);
13319 code = insCodeMR(ins);
13320 // only AVX2 vextracti128 and AVX vextractf128 can reach this path,
13321 // they do not need VEX.vvvv to encode the register operand
13322 dst = emitOutputCV(dst, id, code, &cnsVal);
13323 sz = emitSizeOfInsDsc(id);
13324 break;
13325
13326 case IF_RRD_MRD:
13327 case IF_RWR_MRD:
13328 case IF_RRW_MRD:
13329 {
13330 code = insCodeRM(ins);
13331
13332 // Special case 4-byte AVX instructions
13333 if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
13334 {
13335 dst = emitOutputCV(dst, id, code);
13336 }
13337 else
13338 {
13339 code = AddVexPrefixIfNeeded(ins, code, size);
13340
13341 if (IsDstDstSrcAVXInstruction(ins))
13342 {
13343 // encode source operand reg in 'vvvv' bits in 1's complement form
13344 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13345 }
13346
13347 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13348 dst = emitOutputCV(dst, id, code | regcode | 0x0500);
13349 }
13350
13351 sz = emitSizeOfInsDsc(id);
13352 break;
13353 }
13354
13355 case IF_RWR_RRD_MRD:
13356 {
13357 // This should only be called on AVX instructions
13358 assert(IsAVXInstruction(ins));
13359
13360 code = insCodeRM(ins);
13361 code = AddVexPrefixIfNeeded(ins, code, size);
13362 code = insEncodeReg3456(ins, id->idReg2(), size,
13363 code); // encode source operand reg in 'vvvv' bits in 1's complement form
13364
13365 // Special case 4-byte AVX instructions
13366 if (EncodedBySSE38orSSE3A(ins))
13367 {
13368 dst = emitOutputCV(dst, id, code);
13369 }
13370 else
13371 {
13372 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13373 dst = emitOutputCV(dst, id, code | regcode | 0x0500);
13374 }
13375 sz = emitSizeOfInsDsc(id);
13376 break;
13377 }
13378
13379 case IF_RWR_RRD_MRD_CNS:
13380 case IF_RWR_RRD_MRD_RRD:
13381 {
13382 // This should only be called on AVX instructions
13383 assert(IsAVXInstruction(ins));
13384 emitGetInsCns(id, &cnsVal);
13385
13386 code = insCodeRM(ins);
13387 code = AddVexPrefixIfNeeded(ins, code, size);
13388 code = insEncodeReg3456(ins, id->idReg2(), size,
13389 code); // encode source operand reg in 'vvvv' bits in 1's complement form
13390
13391 // Special case 4-byte AVX instructions
13392 if (EncodedBySSE38orSSE3A(ins))
13393 {
13394 dst = emitOutputCV(dst, id, code, &cnsVal);
13395 }
13396 else
13397 {
13398 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13399 dst = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal);
13400 }
13401 sz = emitSizeOfInsDsc(id);
13402 break;
13403 }
13404
13405 case IF_RWR_MRD_OFF:
13406 code = insCode(ins);
13407 code = AddVexPrefixIfNeeded(ins, code, size);
13408
13409 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13410 // Note that reg1 is both a source and a destination.
13411 //
13412 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13413 // now we use the single source as source1 and source2.
13414 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13415 if (IsDstDstSrcAVXInstruction(ins))
13416 {
13417 // encode source operand reg in 'vvvv' bits in 1's complement form
13418 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13419 }
13420
13421 regcode = insEncodeReg012(id->idIns(), id->idReg1(), size, &code);
13422 dst = emitOutputCV(dst, id, code | 0x30 | regcode);
13423 sz = emitSizeOfInsDsc(id);
13424 break;
13425
13426 case IF_MRD_RRD:
13427 case IF_MWR_RRD:
13428 case IF_MRW_RRD:
13429 code = insCodeMR(ins);
13430 code = AddVexPrefixIfNeeded(ins, code, size);
13431
13432 // In case of AVX instructions that take 3 operands, encode reg1 as first source.
13433 // Note that reg1 is both a source and a destination.
13434 //
13435 // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13436 // now we use the single source as source1 and source2.
13437 // For this format, moves do not support a third operand, so we only need to handle the binary ops.
13438 if (IsDstDstSrcAVXInstruction(ins))
13439 {
13440 // encode source operand reg in 'vvvv' bits in 1's complement form
13441 code = insEncodeReg3456(ins, id->idReg1(), size, code);
13442 }
13443
13444 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
13445 dst = emitOutputCV(dst, id, code | regcode | 0x0500);
13446 sz = emitSizeOfInsDsc(id);
13447 break;
13448
13449 case IF_MRD_CNS:
13450 case IF_MWR_CNS:
13451 case IF_MRW_CNS:
13452 emitGetInsDcmCns(id, &cnsVal);
13453 dst = emitOutputCV(dst, id, insCodeMI(ins) | 0x0500, &cnsVal);
13454 sz = emitSizeOfInsDsc(id);
13455 break;
13456
13457 case IF_MRW_SHF:
13458 emitGetInsDcmCns(id, &cnsVal);
13459 dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500, &cnsVal);
13460 sz = emitSizeOfInsDsc(id);
13461 break;
13462
13463 /********************************************************************/
13464 /* oops */
13465 /********************************************************************/
13466
13467 default:
13468
13469#ifdef DEBUG
13470 printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
13471 assert(!"don't know how to encode this instruction");
13472#endif
13473 break;
13474 }
13475
13476 // Make sure we set the instruction descriptor size correctly
13477 assert(sz == emitSizeOfInsDsc(id));
13478
13479#if !FEATURE_FIXED_OUT_ARGS
13480 bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig);
13481
13482#if FEATURE_EH_FUNCLETS
13483 updateStackLevel = updateStackLevel && !emitIGisInFuncletProlog(ig) && !emitIGisInFuncletEpilog(ig);
13484#endif // FEATURE_EH_FUNCLETS
13485
13486 // Make sure we keep the current stack level up to date
13487 if (updateStackLevel)
13488 {
13489 switch (ins)
13490 {
13491 case INS_push:
13492 // Please note: {INS_push_hide,IF_LABEL} is used to push the address of the
13493 // finally block for calling it locally for an op_leave.
13494 emitStackPush(dst, id->idGCref());
13495 break;
13496
13497 case INS_pop:
13498 emitStackPop(dst, false, /*callInstrSize*/ 0, 1);
13499 break;
13500
13501 case INS_sub:
13502 // Check for "sub ESP, icon"
13503 if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
13504 {
13505 assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
13506 emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
13507 }
13508 break;
13509
13510 case INS_add:
13511 // Check for "add ESP, icon"
13512 if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
13513 {
13514 assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
13515 emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0,
13516 (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
13517 }
13518 break;
13519
13520 default:
13521 break;
13522 }
13523 }
13524
13525#endif // !FEATURE_FIXED_OUT_ARGS
13526
13527 assert((int)emitCurStackLvl >= 0);
13528
13529 // Only epilog "instructions" and some pseudo-instrs
13530 // are allowed not to generate any code
13531
13532 assert(*dp != dst || emitInstHasNoCode(ins));
13533
13534#ifdef DEBUG
13535 if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
13536 {
13537 emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
13538 }
13539
13540 if (emitComp->compDebugBreak)
13541 {
13542 // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
13543 // at the beginning of this method.
13544 if (JitConfig.JitEmitPrintRefRegs() != 0)
13545 {
13546 printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
13547 printf(" emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs));
13548 printRegMaskInt(emitThisGCrefRegs);
13549 emitDispRegSet(emitThisGCrefRegs);
13550 printf("\n");
13551 printf(" emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs));
13552 printRegMaskInt(emitThisByrefRegs);
13553 emitDispRegSet(emitThisByrefRegs);
13554 printf("\n");
13555 }
13556
13557 // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
13558 // emitting instruction a6, (i.e. IN00a6 in jitdump).
13559 if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
13560 {
13561 assert(!"JitBreakEmitOutputInstr reached");
13562 }
13563 }
13564#endif
13565
13566#ifdef TRANSLATE_PDB
13567 if (*dp != dst)
13568 {
13569 // only map instruction groups to instruction groups
13570 MapCode(id->idDebugOnlyInfo()->idilStart, *dp);
13571 }
13572#endif
13573
13574 *dp = dst;
13575
13576#ifdef DEBUG
13577 if (ins == INS_mulEAX || ins == INS_imulEAX)
13578 {
13579 // INS_mulEAX has implicit target of Edx:Eax. Make sure
13580 // that we detected this cleared its GC-status.
13581
13582 assert(((RBM_EAX | RBM_EDX) & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
13583 }
13584
13585 if (instrIs3opImul(ins))
13586 {
13587 // The target of the 3-operand imul is implicitly encoded. Make sure
13588 // that we detected the implicit register and cleared its GC-status.
13589
13590 regMaskTP regMask = genRegMask(inst3opImulReg(ins));
13591 assert((regMask & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
13592 }
13593#endif
13594
13595 return sz;
13596}
13597#ifdef _PREFAST_
13598#pragma warning(pop)
13599#endif
13600
13601/*****************************************************************************/
13602/*****************************************************************************/
13603
13604#endif // defined(_TARGET_XARCH_)
13605