emitxarch.cpp source code [CoreCLR/jit/emitxarch.cpp]

1	// Licensed to the .NET Foundation under one or more agreements.
2	// The .NET Foundation licenses this file to you under the MIT license.
3	// See the LICENSE file in the project root for more information.
4
5	/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*
6	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7	XX XX
8	XX emitX86.cpp XX
9	XX XX
10	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11	XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12	*/
13
14	#include "jitpch.h"
15	#ifdef _MSC_VER
16	#pragma hdrstop
17	#endif
18
19	#if defined(_TARGET_XARCH_)
20
21	/***************************************************************************/
22	/***************************************************************************/
23
24	#include "instr.h"
25	#include "emit.h"
26	#include "codegen.h"
27
28	bool IsSSEInstruction(instruction ins)
29	{
30	return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_SSE_INSTRUCTION);
31	}
32
33	bool IsSSEOrAVXInstruction(instruction ins)
34	{
35	return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION);
36	}
37
38	bool IsAVXOnlyInstruction(instruction ins)
39	{
40	return (ins >= INS_FIRST_AVX_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION);
41	}
42
43	bool IsFMAInstruction(instruction ins)
44	{
45	return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
46	}
47
48	bool IsBMIInstruction(instruction ins)
49	{
50	return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
51	}
52
53	regNumber getBmiRegNumber(instruction ins)
54	{
55	switch (ins)
56	{
57	case INS_blsi:
58	{
59	return (regNumber)`3`;
60	}
61
62	case INS_blsmsk:
63	{
64	return (regNumber)`2`;
65	}
66
67	case INS_blsr:
68	{
69	return (regNumber)`1`;
70	}
71
72	default:
73	{
74	assert(IsBMIInstruction(ins));
75	return REG_NA;
76	}
77	}
78	}
79
80	regNumber getSseShiftRegNumber(instruction ins)
81	{
82	switch (ins)
83	{
84	case INS_psrldq:
85	{
86	return (regNumber)`3`;
87	}
88
89	case INS_pslldq:
90	{
91	return (regNumber)`7`;
92	}
93
94	case INS_psrld:
95	case INS_psrlw:
96	case INS_psrlq:
97	{
98	return (regNumber)`2`;
99	}
100
101	case INS_pslld:
102	case INS_psllw:
103	case INS_psllq:
104	{
105	return (regNumber)`6`;
106	}
107
108	case INS_psrad:
109	case INS_psraw:
110	{
111	return (regNumber)`4`;
112	}
113
114	default:
115	{
116	assert(!"Invalid instruction for SSE2 instruction of the form: opcode reg, immed8");
117	return REG_NA;
118	}
119	}
120	}
121
122	bool emitter::IsAVXInstruction(instruction ins)
123	{
124	return UseVEXEncoding() && IsSSEOrAVXInstruction(ins);
125	}
126
127	// Returns true if the AVX instruction is a binary operator that requires 3 operands.
128	// When we emit an instruction with only two operands, we will duplicate the destination
129	// as a source.
130	// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
131	// be formalized by adding an additional field to instruction table to
132	// to indicate whether a 3-operand instruction.
133	bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
134	{
135	return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstDstSrcAVXInstruction) != `0`) && IsAVXInstruction(ins);
136	}
137
138	// Returns true if the AVX instruction requires 3 operands that duplicate the source
139	// register in the vvvv field.
140	// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
141	// be formalized by adding an additional field to instruction table to
142	// to indicate whether a 3-operand instruction.
143	bool emitter::IsDstSrcSrcAVXInstruction(instruction ins)
144	{
145	return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstSrcSrcAVXInstruction) != `0`) && IsAVXInstruction(ins);
146	}
147
148	#ifdef FEATURE_HW_INTRINSICS
149	//------------------------------------------------------------------------
150	// IsDstSrcImmAvxInstruction: Checks if the instruction has a "reg, reg/mem, imm" or
151	// "reg/mem, reg, imm" form for the legacy, VEX, and EVEX
152	// encodings.
153	//
154	// Arguments:
155	// instruction -- processor instruction to check
156	//
157	// Return Value:
158	// true if instruction has a "reg, reg/mem, imm" or "reg/mem, reg, imm" encoding
159	// form for the legacy, VEX, and EVEX encodings.
160	//
161	// That is, the instruction takes two operands, one of which is immediate, and it
162	// does not need to encode any data in the VEX.vvvv field.
163	//
164	static bool IsDstSrcImmAvxInstruction(instruction ins)
165	{
166	switch (ins)
167	{
168	case INS_aeskeygenassist:
169	case INS_extractps:
170	case INS_pextrb:
171	case INS_pextrw:
172	case INS_pextrd:
173	case INS_pextrq:
174	case INS_pshufd:
175	case INS_pshufhw:
176	case INS_pshuflw:
177	case INS_roundpd:
178	case INS_roundps:
179	return true;
180	default:
181	return false;
182	}
183	}
184	#endif // FEATURE_HW_INTRINSICS
185
186	// -------------------------------------------------------------------
187	// Is4ByteSSEInstruction: Returns true if the SSE instruction is a 4-byte opcode.
188	//
189	// Arguments:
190	// ins - instruction
191	//
192	// Note that this should be true for any of the instructions in instrsXArch.h
193	// that use the SSE38 or SSE3A macro but returns false if the VEX encoding is
194	// in use, since that encoding does not require an additional byte.
195	bool emitter::Is4ByteSSEInstruction(instruction ins)
196	{
197	return !UseVEXEncoding() && EncodedBySSE38orSSE3A(ins);
198	}
199
200	// Returns true if this instruction requires a VEX prefix
201	// All AVX instructions require a VEX prefix
202	bool emitter::TakesVexPrefix(instruction ins)
203	{
204	// special case vzeroupper as it requires 2-byte VEX prefix
205	// special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix
206	switch (ins)
207	{
208	case INS_lfence:
209	case INS_mfence:
210	case INS_movnti:
211	case INS_prefetchnta:
212	case INS_prefetcht0:
213	case INS_prefetcht1:
214	case INS_prefetcht2:
215	case INS_sfence:
216	case INS_vzeroupper:
217	return false;
218	default:
219	break;
220	}
221
222	return IsAVXInstruction(ins);
223	}
224
225	// Add base VEX prefix without setting W, R, X, or B bits
226	// L bit will be set based on emitter attr.
227	//
228	// 2-byte VEX prefix = C5 <R,vvvv,L,pp>
229	// 3-byte VEX prefix = C4 <R,X,B,m-mmmm> <W,vvvv,L,pp>
230	// - R, X, B, W - bits to express corresponding REX prefixes
231	// - m-mmmmm (5-bit)
232	// 0-00001 - implied leading 0F opcode byte
233	// 0-00010 - implied leading 0F 38 opcode bytes
234	// 0-00011 - implied leading 0F 3A opcode bytes
235	// Rest - reserved for future use and usage of them will uresult in Undefined instruction exception
236	//
237	// - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused
238	// - L - scalar or AVX-128 bit operations (L=0), 256-bit operations (L=1)
239	// - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix
240	// these prefixes are treated mandatory when used with escape opcode 0Fh for
241	// some SIMD instructions
242	// 00 - None (0F - packed float)
243	// 01 - 66 (66 0F - packed double)
244	// 10 - F3 (F3 0F - scalar float
245	// 11 - F2 (F2 0F - scalar double)
246	#define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL
247	#define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL
248	#define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL
249	emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr)
250	{
251	// The 2-byte VEX encoding is preferred when possible, but actually emitting
252	// it depends on a number of factors that we may not know until much later.
253	//
254	// In order to handle this "easily", we just carry the 3-byte encoding all
255	// the way through and "fix-up" the encoding when the VEX prefix is actually
256	// emitted, by simply checking that all the requirements were met.
257
258	// Only AVX instructions require VEX prefix
259	assert(IsAVXInstruction(ins));
260
261	// Shouldn't have already added VEX prefix
262	assert(!hasVexPrefix(code));
263
264	assert((code & DEFAULT_3BYTE_VEX_PREFIX_MASK) == `0`);
265
266	code \|= DEFAULT_3BYTE_VEX_PREFIX;
267
268	if (attr == EA_32BYTE)
269	{
270	// Set L bit to 1 in case of instructions that operate on 256-bits.
271	code \|= LBIT_IN_3BYTE_VEX_PREFIX;
272	}
273
274	return code;
275	}
276
277	// Returns true if this instruction, for the given EA_SIZE(attr), will require a REX.W prefix
278	bool TakesRexWPrefix(instruction ins, emitAttr attr)
279	{
280	// Because the current implementation of AVX does not have a way to distinguish between the register
281	// size specification (128 vs. 256 bits) and the operand size specification (32 vs. 64 bits), where both are
282	// required, the instruction must be created with the register size attribute (EA_16BYTE or EA_32BYTE),
283	// and here we must special case these by the opcode.
284	switch (ins)
285	{
286	case INS_vpermpd:
287	case INS_vpermq:
288	case INS_vpsrlvq:
289	case INS_vpsllvq:
290	case INS_pinsrq:
291	case INS_pextrq:
292	case INS_vfmadd132pd:
293	case INS_vfmadd213pd:
294	case INS_vfmadd231pd:
295	case INS_vfmadd132sd:
296	case INS_vfmadd213sd:
297	case INS_vfmadd231sd:
298	case INS_vfmaddsub132pd:
299	case INS_vfmaddsub213pd:
300	case INS_vfmaddsub231pd:
301	case INS_vfmsubadd132pd:
302	case INS_vfmsubadd213pd:
303	case INS_vfmsubadd231pd:
304	case INS_vfmsub132pd:
305	case INS_vfmsub213pd:
306	case INS_vfmsub231pd:
307	case INS_vfmsub132sd:
308	case INS_vfmsub213sd:
309	case INS_vfmsub231sd:
310	case INS_vfnmadd132pd:
311	case INS_vfnmadd213pd:
312	case INS_vfnmadd231pd:
313	case INS_vfnmadd132sd:
314	case INS_vfnmadd213sd:
315	case INS_vfnmadd231sd:
316	case INS_vfnmsub132pd:
317	case INS_vfnmsub213pd:
318	case INS_vfnmsub231pd:
319	case INS_vfnmsub132sd:
320	case INS_vfnmsub213sd:
321	case INS_vfnmsub231sd:
322	case INS_vpmaskmovq:
323	case INS_vpgatherdq:
324	case INS_vpgatherqq:
325	case INS_vgatherdpd:
326	case INS_vgatherqpd:
327	return true;
328	default:
329	break;
330	}
331
332	#ifdef _TARGET_AMD64_
333	// movsx should always sign extend out to 8 bytes just because we don't track
334	// whether the dest should be 4 bytes or 8 bytes (attr indicates the size
335	// of the source, not the dest).
336	// A 4-byte movzx is equivalent to an 8 byte movzx, so it is not special
337	// cased here.
338	//
339	// Rex_jmp = jmp with rex prefix always requires rex.w prefix.
340	if (ins == INS_movsx \|\| ins == INS_rex_jmp)
341	{
342	return true;
343	}
344
345	if (EA_SIZE(attr) != EA_8BYTE)
346	{
347	return false;
348	}
349
350	if (IsSSEOrAVXInstruction(ins))
351	{
352	switch (ins)
353	{
354	case INS_andn:
355	case INS_bextr:
356	case INS_blsi:
357	case INS_blsmsk:
358	case INS_blsr:
359	case INS_bzhi:
360	case INS_cvttsd2si:
361	case INS_cvttss2si:
362	case INS_cvtsd2si:
363	case INS_cvtss2si:
364	case INS_cvtsi2sd:
365	case INS_cvtsi2ss:
366	case INS_mov_xmm2i:
367	case INS_mov_i2xmm:
368	case INS_movnti:
369	case INS_mulx:
370	case INS_pdep:
371	case INS_pext:
372	return true;
373	default:
374	return false;
375	}
376	}
377
378	// TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
379	// opcodes...
380	// These are all the instructions that default to 8-byte operand without the REX.W bit
381	// With 1 special case: movzx because the 4 byte version still zeros-out the hi 4 bytes
382	// so we never need it
383	if ((ins != INS_push) && (ins != INS_pop) && (ins != INS_movq) && (ins != INS_movzx) && (ins != INS_push_hide) &&
384	(ins != INS_pop_hide) && (ins != INS_ret) && (ins != INS_call) && !((ins >= INS_i_jmp) && (ins <= INS_l_jg)))
385	{
386	return true;
387	}
388	else
389	{
390	return false;
391	}
392	#else //!_TARGET_AMD64 = _TARGET_X86_
393	return false;
394	#endif //!_TARGET_AMD64_
395	}
396
397	// Returns true if using this register will require a REX. prefix.*
398	// Since XMM registers overlap with YMM registers, this routine
399	// can also be used to know whether a YMM register if the
400	// instruction in question is AVX.
401	bool IsExtendedReg(regNumber reg)
402	{
403	#ifdef _TARGET_AMD64_
404	return ((reg >= REG_R8) && (reg <= REG_R15)) \|\| ((reg >= REG_XMM8) && (reg <= REG_XMM15));
405	#else
406	// X86 JIT operates in 32-bit mode and hence extended reg are not available.
407	return false;
408	#endif
409	}
410
411	// Returns true if using this register, for the given EA_SIZE(attr), will require a REX. prefix*
412	bool IsExtendedReg(regNumber reg, emitAttr attr)
413	{
414	#ifdef _TARGET_AMD64_
415	// Not a register, so doesn't need a prefix
416	if (reg > REG_XMM15)
417	{
418	return false;
419	}
420
421	// Opcode field only has 3 bits for the register, these high registers
422	// need a 4th bit, that comes from the REX prefix (eiter REX.X, REX.R, or REX.B)
423	if (IsExtendedReg(reg))
424	{
425	return true;
426	}
427
428	if (EA_SIZE(attr) != EA_1BYTE)
429	{
430	return false;
431	}
432
433	// There are 12 one byte registers addressible 'below' r8b:
434	// al, cl, dl, bl, ah, ch, dh, bh, spl, bpl, sil, dil.
435	// The first 4 are always addressible, the last 8 are divided into 2 sets:
436	// ah, ch, dh, bh
437	// -- or --
438	// spl, bpl, sil, dil
439	// Both sets are encoded exactly the same, the difference is the presence
440	// of a REX prefix, even a REX prefix with no other bits set (0x40).
441	// So in order to get to the second set we need a REX prefix (but no bits).
442	//
443	// TODO-AMD64-CQ: if we ever want to start using the first set, we'll need a different way of
444	// encoding/tracking/encoding registers.
445	return (reg >= REG_RSP);
446	#else
447	// X86 JIT operates in 32-bit mode and hence extended reg are not available.
448	return false;
449	#endif
450	}
451
452	// Since XMM registers overlap with YMM registers, this routine
453	// can also used to know whether a YMM register in case of AVX instructions.
454	bool IsXMMReg(regNumber reg)
455	{
456	#ifdef _TARGET_AMD64_
457	return (reg >= REG_XMM0) && (reg <= REG_XMM15);
458	#else // !_TARGET_AMD64_
459	return (reg >= REG_XMM0) && (reg <= REG_XMM7);
460	#endif // !_TARGET_AMD64_
461	}
462
463	// Returns bits to be encoded in instruction for the given register.
464	unsigned RegEncoding(regNumber reg)
465	{
466	static_assert((REG_XMM0 & `0x7`) == `0`, "bad XMMBASE");
467	return (unsigned)(reg & `0x7`);
468	}
469
470	// Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes
471	// SSE2: separate 1-byte prefix gets added before opcode.
472	// AVX: specific bits within VEX prefix need to be set in bit-inverted form.
473	emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code)
474	{
475	if (UseVEXEncoding() && IsAVXInstruction(ins))
476	{
477	if (TakesVexPrefix(ins))
478	{
479	// W-bit is available only in 3-byte VEX prefix that starts with byte C4.
480	assert(hasVexPrefix(code));
481
482	// W-bit is the only bit that is added in non bit-inverted form.
483	return emitter::code_t(code \| `0x00008000000000ULL`);
484	}
485	}
486	#ifdef _TARGET_AMD64_
487	return emitter::code_t(code \| `0x4800000000ULL`);
488	#else
489	assert(!"UNREACHED");
490	return code;
491	#endif
492	}
493
494	#ifdef _TARGET_AMD64_
495
496	emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code)
497	{
498	if (UseVEXEncoding() && IsAVXInstruction(ins))
499	{
500	if (TakesVexPrefix(ins))
501	{
502	// R-bit is supported by both 2-byte and 3-byte VEX prefix
503	assert(hasVexPrefix(code));
504
505	// R-bit is added in bit-inverted form.
506	return code & `0xFF7FFFFFFFFFFFULL`;
507	}
508	}
509
510	return code \| `0x4400000000ULL`;
511	}
512
513	emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code)
514	{
515	if (UseVEXEncoding() && IsAVXInstruction(ins))
516	{
517	if (TakesVexPrefix(ins))
518	{
519	// X-bit is available only in 3-byte VEX prefix that starts with byte C4.
520	assert(hasVexPrefix(code));
521
522	// X-bit is added in bit-inverted form.
523	return code & `0xFFBFFFFFFFFFFFULL`;
524	}
525	}
526
527	return code \| `0x4200000000ULL`;
528	}
529
530	emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code)
531	{
532	if (UseVEXEncoding() && IsAVXInstruction(ins))
533	{
534	if (TakesVexPrefix(ins))
535	{
536	// B-bit is available only in 3-byte VEX prefix that starts with byte C4.
537	assert(hasVexPrefix(code));
538
539	// B-bit is added in bit-inverted form.
540	return code & `0xFFDFFFFFFFFFFFULL`;
541	}
542	}
543
544	return code \| `0x4100000000ULL`;
545	}
546
547	// Adds REX prefix (0x40) without W, R, X or B bits set
548	emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
549	{
550	assert(!UseVEXEncoding() \|\| !IsAVXInstruction(ins));
551	return code \| `0x4000000000ULL`;
552	}
553
554	#endif //_TARGET_AMD64_
555
556	bool isPrefix(BYTE b)
557	{
558	assert(b != `0`); // Caller should check this
559	assert(b != `0x67`); // We don't use the address size prefix
560	assert(b != `0x65`); // The GS segment override prefix is emitted separately
561	assert(b != `0x64`); // The FS segment override prefix is emitted separately
562	assert(b != `0xF0`); // The lock prefix is emitted separately
563	assert(b != `0x2E`); // We don't use the CS segment override prefix
564	assert(b != `0x3E`); // Or the DS segment override prefix
565	assert(b != `0x26`); // Or the ES segment override prefix
566	assert(b != `0x36`); // Or the SS segment override prefix
567
568	// That just leaves the size prefixes used in SSE opcodes:
569	// Scalar Double Scalar Single Packed Double
570	return ((b == `0xF2`) \|\| (b == `0xF3`) \|\| (b == `0x66`));
571	}
572
573	// Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise.
574	unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code)
575	{
576	if (hasVexPrefix(code))
577	{
578	// Only AVX instructions should have a VEX prefix
579	assert(UseVEXEncoding() && IsAVXInstruction(ins));
580	code_t vexPrefix = (code >> `32`) & `0x00FFFFFF`;
581	code &= `0x00000000FFFFFFFFLL`;
582
583	WORD leadingBytes = `0`;
584	BYTE check = (code >> `24`) & `0xFF`;
585	if (check != `0`)
586	{
587	// 3-byte opcode: with the bytes ordered as 0x2211RM33 or
588	// 4-byte opcode: with the bytes ordered as 0x22114433
589	// check for a prefix in the 11 position
590	BYTE sizePrefix = (code >> `16`) & `0xFF`;
591	if ((sizePrefix != `0`) && isPrefix(sizePrefix))
592	{
593	// 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
594	//
595	// 00 - None (0F - packed float)
596	// 01 - 66 (66 0F - packed double)
597	// 10 - F3 (F3 0F - scalar float
598	// 11 - F2 (F2 0F - scalar double)
599	switch (sizePrefix)
600	{
601	case `0x66`:
602	if (IsBMIInstruction(ins))
603	{
604	switch (ins)
605	{
606	case INS_pdep:
607	case INS_mulx:
608	{
609	vexPrefix \|= `0x03`;
610	break;
611	}
612
613	case INS_pext:
614	{
615	vexPrefix \|= `0x02`;
616	break;
617	}
618
619	default:
620	{
621	vexPrefix \|= `0x00`;
622	break;
623	}
624	}
625	}
626	else
627	{
628	vexPrefix \|= `0x01`;
629	}
630	break;
631	case `0xF3`:
632	vexPrefix \|= `0x02`;
633	break;
634	case `0xF2`:
635	vexPrefix \|= `0x03`;
636	break;
637	default:
638	assert(!"unrecognized SIMD size prefix");
639	unreached();
640	}
641
642	// Now the byte in the 22 position must be an escape byte 0F
643	leadingBytes = check;
644	assert(leadingBytes == `0x0F`);
645
646	// Get rid of both sizePrefix and escape byte
647	code &= `0x0000FFFFLL`;
648
649	// Check the byte in the 33 position to see if it is 3A or 38.
650	// In such a case escape bytes must be 0x0F3A or 0x0F38
651	check = code & `0xFF`;
652	if (check == `0x3A` \|\| check == `0x38`)
653	{
654	leadingBytes = (leadingBytes << `8`) \| check;
655	code &= `0x0000FF00LL`;
656	}
657	}
658	}
659	else
660	{
661	// 2-byte opcode with the bytes ordered as 0x0011RM22
662	// the byte in position 11 must be an escape byte.
663	leadingBytes = (code >> `16`) & `0xFF`;
664	assert(leadingBytes == `0x0F` \|\| leadingBytes == `0x00`);
665	code &= `0xFFFF`;
666	}
667
668	// If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38
669	// m-mmmmm bits in byte 1 of VEX prefix allows us to encode these
670	// implied leading bytes. 0x0F is supported by both the 2-byte and
671	// 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by
672	// the 3-byte version.
673
674	switch (leadingBytes)
675	{
676	case `0x00`:
677	// there is no leading byte
678	break;
679	case `0x0F`:
680	vexPrefix \|= `0x0100`;
681	break;
682	case `0x0F38`:
683	vexPrefix \|= `0x0200`;
684	break;
685	case `0x0F3A`:
686	vexPrefix \|= `0x0300`;
687	break;
688	default:
689	assert(!"encountered unknown leading bytes");
690	unreached();
691	}
692
693	// At this point
694	// VEX.2211RM33 got transformed as VEX.0000RM33
695	// VEX.0011RM22 got transformed as VEX.0000RM22
696	//
697	// Now output VEX prefix leaving the 4-byte opcode
698
699	// The 2-byte VEX encoding, requires that the X and B-bits are set (these
700	// bits are inverted from the REX values so set means off), the W-bit is
701	// not set (this bit is not inverted), and that the m-mmmm bits are 0-0001
702	// (the 2-byte VEX encoding only supports the 0x0F leading byte). When these
703	// conditions are met, we can change byte-0 from 0xC4 to 0xC5 and then
704	// byte-1 is the logical-or of bit 7 from byte-1 and bits 0-6 from byte 2
705	// from the 3-byte VEX encoding.
706	//
707	// Given the above, the check can be reduced to a simple mask and comparison.
708	// 0xFFFF7F80 is a mask that ignores any bits whose value we don't care about:*
709	// R can be set or unset (0x7F ignores bit 7)*
710	// vvvv can be any value (0x80 ignores bits 3-6)*
711	// L can be set or unset (0x80 ignores bit 2)*
712	// pp can be any value (0x80 ignores bits 0-1)*
713	// 0x00C46100 is a value that signifies the requirements listed above were met:*
714	// We must be a three-byte VEX opcode (0x00C4)*
715	// X and B must be set (0x61 validates bits 5-6)*
716	// m-mmmm must be 0-00001 (0x61 validates bits 0-4)*
717	// W must be unset (0x00 validates bit 7)*
718	if ((vexPrefix & `0xFFFF7F80`) == `0x00C46100`)
719	{
720	emitOutputByte(dst, `0xC5`);
721	emitOutputByte(dst + `1`, ((vexPrefix >> `8`) & `0x80`) \| (vexPrefix & `0x7F`));
722	return `2`;
723	}
724
725	emitOutputByte(dst, ((vexPrefix >> `16`) & `0xFF`));
726	emitOutputByte(dst + `1`, ((vexPrefix >> `8`) & `0xFF`));
727	emitOutputByte(dst + `2`, vexPrefix & `0xFF`);
728	return `3`;
729	}
730
731	#ifdef _TARGET_AMD64_
732	if (code > `0x00FFFFFFFFLL`)
733	{
734	BYTE prefix = (code >> `32`) & `0xFF`;
735	noway_assert(prefix >= `0x40` && prefix <= `0x4F`);
736	code &= `0x00000000FFFFFFFFLL`;
737
738	// TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now)
739	// we can remove this code as well
740
741	// The REX prefix is required to come after all other prefixes.
742	// Some of our 'opcodes' actually include some prefixes, if that
743	// is the case, shift them over and place the REX prefix after
744	// the other prefixes, and emit any prefix that got moved out.
745	BYTE check = (code >> `24`) & `0xFF`;
746	if (check == `0`)
747	{
748	// 3-byte opcode: with the bytes ordered as 0x00113322
749	// check for a prefix in the 11 position
750	check = (code >> `16`) & `0xFF`;
751	if (check != `0` && isPrefix(check))
752	{
753	// Swap the rex prefix and whatever this prefix is
754	code = (((DWORD)prefix << `16`) \| (code & `0x0000FFFFLL`));
755	// and then emit the other prefix
756	return emitOutputByte(dst, check);
757	}
758	}
759	else
760	{
761	// 4-byte opcode with the bytes ordered as 0x22114433
762	// first check for a prefix in the 11 position
763	BYTE check2 = (code >> `16`) & `0xFF`;
764	if (isPrefix(check2))
765	{
766	assert(!isPrefix(check)); // We currently don't use this, so it is untested
767	if (isPrefix(check))
768	{
769	// 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX
770	// Change to c2rrc1XXXX, and emit check2 now
771	code = (((code_t)prefix << `24`) \| ((code_t)check << `16`) \| (code & `0x0000FFFFLL`));
772	}
773	else
774	{
775	// 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode)
776	// Change to c2XXrrXXXX, and emit check2 now
777	code = (((code_t)check << `24`) \| ((code_t)prefix << `16`) \| (code & `0x0000FFFFLL`));
778	}
779	return emitOutputByte(dst, check2);
780	}
781	}
782
783	return emitOutputByte(dst, prefix);
784	}
785	#endif // _TARGET_AMD64_
786
787	return `0`;
788	}
789
790	#ifdef _TARGET_AMD64_
791	/*****************************************************************************
792	* Is the last instruction emitted a call instruction?
793	*/
794	bool emitter::emitIsLastInsCall()
795	{
796	if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call))
797	{
798	return true;
799	}
800
801	return false;
802	}
803
804	/*****************************************************************************
805	* We're about to create an epilog. If the last instruction we output was a 'call',
806	* then we need to insert a NOP, to allow for proper exception-handling behavior.
807	*/
808	void emitter::emitOutputPreEpilogNOP()
809	{
810	if (emitIsLastInsCall())
811	{
812	emitIns(INS_nop);
813	}
814	}
815
816	#endif //_TARGET_AMD64_
817
818	// Size of rex prefix in bytes
819	unsigned emitter::emitGetRexPrefixSize(instruction ins)
820	{
821	// In case of AVX instructions, REX prefixes are part of VEX prefix.
822	// And hence requires no additional byte to encode REX prefixes.
823	if (IsAVXInstruction(ins))
824	{
825	return `0`;
826	}
827
828	// If not AVX, then we would need 1-byte to encode REX prefix.
829	return `1`;
830	}
831
832	// Size of vex prefix in bytes
833	unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr)
834	{
835	if (IsAVXInstruction(ins))
836	{
837	return `3`;
838	}
839
840	// If not AVX, then we don't need to encode vex prefix.
841	return `0`;
842	}
843
844	// VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
845	// Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate.
846	// Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that
847	// instruction size estimate will be accurate.
848	// Basically this function will decrease the vexPrefixSize,
849	// so that opcodeSize + vexPrefixAdjustedSize will be the right size.
850	// rightOpcodeSize + vexPrefixSize
851	//=(opcodeSize - ExtrabytesSize) + vexPrefixSize
852	//=opcodeSize + (vexPrefixSize - ExtrabytesSize)
853	//=opcodeSize + vexPrefixAdjustedSize
854	unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code)
855	{
856	if (IsAVXInstruction(ins))
857	{
858	unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr);
859	assert(vexPrefixAdjustedSize == `3`);
860
861	// In this case, opcode will contains escape prefix at least one byte,
862	// vexPrefixAdjustedSize should be minus one.
863	vexPrefixAdjustedSize -= `1`;
864
865	// Get the fourth byte in Opcode.
866	// If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not.
867	BYTE check = (code >> `24`) & `0xFF`;
868	if (check != `0`)
869	{
870	// 3-byte opcode: with the bytes ordered as 0x2211RM33 or
871	// 4-byte opcode: with the bytes ordered as 0x22114433
872	// Simd prefix is at the first byte.
873	BYTE sizePrefix = (code >> `16`) & `0xFF`;
874	if (sizePrefix != `0` && isPrefix(sizePrefix))
875	{
876	vexPrefixAdjustedSize -= `1`;
877	}
878
879	// If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
880	// But in this case the opcode has not counted R\M part.
881	// opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize
882	//=opcodeSize + VexPrefixAdjustedSize -1 + 1
883	//=opcodeSize + VexPrefixAdjustedSize
884	// So although we may have second byte escape prefix, we won't decrease vexPrefixAdjustedSize.
885	}
886
887	return vexPrefixAdjustedSize;
888	}
889	return `0`;
890	}
891
892	// Get size of rex or vex prefix emitted in code
893	unsigned emitter::emitGetPrefixSize(code_t code)
894	{
895	if (hasVexPrefix(code))
896	{
897	return `3`;
898	}
899
900	if (hasRexPrefix(code))
901	{
902	return `1`;
903	}
904
905	return `0`;
906	}
907
908	#ifdef _TARGET_X86_
909	/*****************************************************************************
910	*
911	* Record a non-empty stack
912	*/
913
914	void emitter::emitMarkStackLvl(unsigned stackLevel)
915	{
916	assert(int(stackLevel) >= `0`);
917	assert(emitCurStackLvl == `0`);
918	assert(emitCurIG->igStkLvl == `0`);
919	assert(emitCurIGfreeNext == emitCurIGfreeBase);
920
921	assert(stackLevel && stackLevel % sizeof(int) == `0`);
922
923	emitCurStackLvl = emitCurIG->igStkLvl = stackLevel;
924
925	if (emitMaxStackDepth < emitCurStackLvl)
926	{
927	JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
928	emitMaxStackDepth = emitCurStackLvl;
929	}
930	}
931	#endif
932
933	/*****************************************************************************
934	*
935	* Get hold of the address mode displacement value for an indirect call.
936	*/
937
938	inline ssize_t emitter::emitGetInsCIdisp(instrDesc* id)
939	{
940	if (id->idIsLargeCall())
941	{
942	return ((instrDescCGCA*)id)->idcDisp;
943	}
944	else
945	{
946	assert(!id->idIsLargeDsp());
947	assert(!id->idIsLargeCns());
948
949	return id->idAddr()->iiaAddrMode.amDisp;
950	}
951	}
952
953	/ *************************************************************************
954	*
955	* The following table is used by the instIsFP()/instUse/DefFlags() helpers.
956	*/
957
958	// clang-format off
959	const insFlags CodeGenInterface::instInfo[] =
960	{
961	#define INST0(id, nm, um, mr, flags) static_cast<insFlags>(flags),
962	#define INST1(id, nm, um, mr, flags) static_cast<insFlags>(flags),
963	#define INST2(id, nm, um, mr, mi, flags) static_cast<insFlags>(flags),
964	#define INST3(id, nm, um, mr, mi, rm, flags) static_cast<insFlags>(flags),
965	#define INST4(id, nm, um, mr, mi, rm, a4, flags) static_cast<insFlags>(flags),
966	#define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) static_cast<insFlags>(flags),
967	#include "instrs.h"
968	#undef INST0
969	#undef INST1
970	#undef INST2
971	#undef INST3
972	#undef INST4
973	#undef INST5
974	};
975	// clang-format on
976
977	/*****************************************************************************
978	*
979	* Initialize the table used by emitInsModeFormat().
980	*/
981
982	// clang-format off
983	const BYTE emitter::emitInsModeFmtTab[] =
984	{
985	#define INST0(id, nm, um, mr, flags) um,
986	#define INST1(id, nm, um, mr, flags) um,
987	#define INST2(id, nm, um, mr, mi, flags) um,
988	#define INST3(id, nm, um, mr, mi, rm, flags) um,
989	#define INST4(id, nm, um, mr, mi, rm, a4, flags) um,
990	#define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) um,
991	#include "instrs.h"
992	#undef INST0
993	#undef INST1
994	#undef INST2
995	#undef INST3
996	#undef INST4
997	#undef INST5
998	};
999	// clang-format on
1000
1001	#ifdef DEBUG
1002	unsigned const emitter::emitInsModeFmtCnt = _countof(emitInsModeFmtTab);
1003	#endif
1004
1005	/*****************************************************************************
1006	*
1007	* Combine the given base format with the update mode of the instuction.
1008	*/
1009
1010	inline emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base)
1011	{
1012	assert(IF_RRD + IUM_RD == IF_RRD);
1013	assert(IF_RRD + IUM_WR == IF_RWR);
1014	assert(IF_RRD + IUM_RW == IF_RRW);
1015
1016	return (insFormat)(base + emitInsUpdateMode(ins));
1017	}
1018
1019	// This is a helper we need due to Vs Whidbey #254016 in order to distinguish
1020	// if we can not possibly be updating an integer register. This is not the best
1021	// solution, but the other ones (see bug) are going to be much more complicated.
1022	bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
1023	{
1024	instruction ins = id->idIns();
1025
1026	if (!IsSSEOrAVXInstruction(ins))
1027	{
1028	return false;
1029	}
1030
1031	switch (ins)
1032	{
1033	case INS_andn:
1034	case INS_bextr:
1035	case INS_blsi:
1036	case INS_blsmsk:
1037	case INS_blsr:
1038	case INS_bzhi:
1039	case INS_cvttsd2si:
1040	case INS_cvttss2si:
1041	case INS_cvtsd2si:
1042	case INS_cvtss2si:
1043	case INS_extractps:
1044	case INS_mov_xmm2i:
1045	case INS_movmskpd:
1046	case INS_movmskps:
1047	case INS_mulx:
1048	case INS_pdep:
1049	case INS_pext:
1050	case INS_pmovmskb:
1051	case INS_pextrb:
1052	case INS_pextrd:
1053	case INS_pextrq:
1054	case INS_pextrw:
1055	case INS_pextrw_sse41:
1056	{
1057	// These SSE instructions write to a general purpose integer register.
1058	return false;
1059	}
1060
1061	default:
1062	{
1063	return true;
1064	}
1065	}
1066	}
1067
1068	/*****************************************************************************
1069	*
1070	* Returns the base encoding of the given CPU instruction.
1071	*/
1072
1073	inline size_t insCode(instruction ins)
1074	{
1075	// clang-format off
1076	const static
1077	size_t insCodes[] =
1078	{
1079	#define INST0(id, nm, um, mr, flags) mr,
1080	#define INST1(id, nm, um, mr, flags) mr,
1081	#define INST2(id, nm, um, mr, mi, flags) mr,
1082	#define INST3(id, nm, um, mr, mi, rm, flags) mr,
1083	#define INST4(id, nm, um, mr, mi, rm, a4, flags) mr,
1084	#define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr,
1085	#include "instrs.h"
1086	#undef INST0
1087	#undef INST1
1088	#undef INST2
1089	#undef INST3
1090	#undef INST4
1091	#undef INST5
1092	};
1093	// clang-format on
1094
1095	assert((unsigned)ins < _countof(insCodes));
1096	assert((insCodes[ins] != BAD_CODE));
1097
1098	return insCodes[ins];
1099	}
1100
1101	/*****************************************************************************
1102	*
1103	* Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction.
1104	*/
1105
1106	inline size_t insCodeACC(instruction ins)
1107	{
1108	// clang-format off
1109	const static
1110	size_t insCodesACC[] =
1111	{
1112	#define INST0(id, nm, um, mr, flags)
1113	#define INST1(id, nm, um, mr, flags)
1114	#define INST2(id, nm, um, mr, mi, flags)
1115	#define INST3(id, nm, um, mr, mi, rm, flags)
1116	#define INST4(id, nm, um, mr, mi, rm, a4, flags) a4,
1117	#define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) a4,
1118	#include "instrs.h"
1119	#undef INST0
1120	#undef INST1
1121	#undef INST2
1122	#undef INST3
1123	#undef INST4
1124	#undef INST5
1125	};
1126	// clang-format on
1127
1128	assert((unsigned)ins < _countof(insCodesACC));
1129	assert((insCodesACC[ins] != BAD_CODE));
1130
1131	return insCodesACC[ins];
1132	}
1133
1134	/*****************************************************************************
1135	*
1136	* Returns the "register" encoding of the given CPU instruction.
1137	*/
1138
1139	inline size_t insCodeRR(instruction ins)
1140	{
1141	// clang-format off
1142	const static
1143	size_t insCodesRR[] =
1144	{
1145	#define INST0(id, nm, um, mr, flags)
1146	#define INST1(id, nm, um, mr, flags)
1147	#define INST2(id, nm, um, mr, mi, flags)
1148	#define INST3(id, nm, um, mr, mi, rm, flags)
1149	#define INST4(id, nm, um, mr, mi, rm, a4, flags)
1150	#define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rr,
1151	#include "instrs.h"
1152	#undef INST0
1153	#undef INST1
1154	#undef INST2
1155	#undef INST3
1156	#undef INST4
1157	#undef INST5
1158	};
1159	// clang-format on
1160
1161	assert((unsigned)ins < _countof(insCodesRR));
1162	assert((insCodesRR[ins] != BAD_CODE));
1163
1164	return insCodesRR[ins];
1165	}
1166
1167	// clang-format off
1168	const static
1169	size_t insCodesRM[] =
1170	{
1171	#define INST0(id, nm, um, mr, flags)
1172	#define INST1(id, nm, um, mr, flags)
1173	#define INST2(id, nm, um, mr, mi, flags)
1174	#define INST3(id, nm, um, mr, mi, rm, flags) rm,
1175	#define INST4(id, nm, um, mr, mi, rm, a4, flags) rm,
1176	#define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) rm,
1177	#include "instrs.h"
1178	#undef INST0
1179	#undef INST1
1180	#undef INST2
1181	#undef INST3
1182	#undef INST4
1183	#undef INST5
1184	};
1185	// clang-format on
1186
1187	// Returns true iff the give CPU instruction has an RM encoding.
1188	inline bool hasCodeRM(instruction ins)
1189	{
1190	assert((unsigned)ins < _countof(insCodesRM));
1191	return ((insCodesRM[ins] != BAD_CODE));
1192	}
1193
1194	/*****************************************************************************
1195	*
1196	* Returns the "reg, [r/m]" encoding of the given CPU instruction.
1197	*/
1198
1199	inline size_t insCodeRM(instruction ins)
1200	{
1201	assert((unsigned)ins < _countof(insCodesRM));
1202	assert((insCodesRM[ins] != BAD_CODE));
1203
1204	return insCodesRM[ins];
1205	}
1206
1207	// clang-format off
1208	const static
1209	size_t insCodesMI[] =
1210	{
1211	#define INST0(id, nm, um, mr, flags)
1212	#define INST1(id, nm, um, mr, flags)
1213	#define INST2(id, nm, um, mr, mi, flags) mi,
1214	#define INST3(id, nm, um, mr, mi, rm, flags) mi,
1215	#define INST4(id, nm, um, mr, mi, rm, a4, flags) mi,
1216	#define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mi,
1217	#include "instrs.h"
1218	#undef INST0
1219	#undef INST1
1220	#undef INST2
1221	#undef INST3
1222	#undef INST4
1223	#undef INST5
1224	};
1225	// clang-format on
1226
1227	// Returns true iff the give CPU instruction has an MI encoding.
1228	inline bool hasCodeMI(instruction ins)
1229	{
1230	assert((unsigned)ins < _countof(insCodesMI));
1231	return ((insCodesMI[ins] != BAD_CODE));
1232	}
1233
1234	/*****************************************************************************
1235	*
1236	* Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction.
1237	*/
1238
1239	inline size_t insCodeMI(instruction ins)
1240	{
1241	assert((unsigned)ins < _countof(insCodesMI));
1242	assert((insCodesMI[ins] != BAD_CODE));
1243
1244	return insCodesMI[ins];
1245	}
1246
1247	// clang-format off
1248	const static
1249	size_t insCodesMR[] =
1250	{
1251	#define INST0(id, nm, um, mr, flags)
1252	#define INST1(id, nm, um, mr, flags) mr,
1253	#define INST2(id, nm, um, mr, mi, flags) mr,
1254	#define INST3(id, nm, um, mr, mi, rm, flags) mr,
1255	#define INST4(id, nm, um, mr, mi, rm, a4, flags) mr,
1256	#define INST5(id, nm, um, mr, mi, rm, a4, rr, flags) mr,
1257	#include "instrs.h"
1258	#undef INST0
1259	#undef INST1
1260	#undef INST2
1261	#undef INST3
1262	#undef INST4
1263	#undef INST5
1264	};
1265	// clang-format on
1266
1267	// Returns true iff the give CPU instruction has an MR encoding.
1268	inline bool hasCodeMR(instruction ins)
1269	{
1270	assert((unsigned)ins < _countof(insCodesMR));
1271	return ((insCodesMR[ins] != BAD_CODE));
1272	}
1273
1274	/*****************************************************************************
1275	*
1276	* Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction.
1277	*/
1278
1279	inline size_t insCodeMR(instruction ins)
1280	{
1281	assert((unsigned)ins < _countof(insCodesMR));
1282	assert((insCodesMR[ins] != BAD_CODE));
1283
1284	return insCodesMR[ins];
1285	}
1286
1287	// Return true if the instruction uses the SSE38 or SSE3A macro in instrsXArch.h.
1288	bool emitter::EncodedBySSE38orSSE3A(instruction ins)
1289	{
1290	const size_t SSE38 = `0x0F660038`;
1291	const size_t SSE3A = `0x0F66003A`;
1292	const size_t MASK = `0xFFFF00FF`;
1293
1294	size_t insCode = `0`;
1295
1296	if (!IsSSEOrAVXInstruction(ins))
1297	{
1298	return false;
1299	}
1300
1301	if (hasCodeRM(ins))
1302	{
1303	insCode = insCodeRM(ins);
1304	}
1305	else if (hasCodeMI(ins))
1306	{
1307	insCode = insCodeMI(ins);
1308	}
1309	else if (hasCodeMR(ins))
1310	{
1311	insCode = insCodeMR(ins);
1312	}
1313
1314	insCode &= MASK;
1315	return insCode == SSE38 \|\| insCode == SSE3A;
1316	}
1317
1318	/*****************************************************************************
1319	*
1320	* Returns an encoding for the specified register to be used in the bit0-2
1321	* part of an opcode.
1322	*/
1323
1324	inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code)
1325	{
1326	assert(reg < REG_STK);
1327
1328	#ifdef _TARGET_AMD64_
1329	// Either code is not NULL or reg is not an extended reg.
1330	// If reg is an extended reg, instruction needs to be prefixed with 'REX'
1331	// which would require code != NULL.
1332	assert(code != nullptr \|\| !IsExtendedReg(reg));
1333
1334	if (IsExtendedReg(reg))
1335	{
1336	code = AddRexBPrefix(ins, code); // REX.B
1337	}
1338	else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1339	{
1340	// We are assuming that we only use/encode SPL, BPL, SIL and DIL
1341	// not the corresponding AH, CH, DH, or BH
1342	code = AddRexPrefix(ins, code); // REX
1343	}
1344	#endif // _TARGET_AMD64_
1345
1346	unsigned regBits = RegEncoding(reg);
1347
1348	assert(regBits < `8`);
1349	return regBits;
1350	}
1351
1352	/*****************************************************************************
1353	*
1354	* Returns an encoding for the specified register to be used in the bit3-5
1355	* part of an opcode.
1356	*/
1357
1358	inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code)
1359	{
1360	assert(reg < REG_STK);
1361
1362	#ifdef _TARGET_AMD64_
1363	// Either code is not NULL or reg is not an extended reg.
1364	// If reg is an extended reg, instruction needs to be prefixed with 'REX'
1365	// which would require code != NULL.
1366	assert(code != nullptr \|\| !IsExtendedReg(reg));
1367
1368	if (IsExtendedReg(reg))
1369	{
1370	code = AddRexRPrefix(ins, code); // REX.R
1371	}
1372	else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
1373	{
1374	// We are assuming that we only use/encode SPL, BPL, SIL and DIL
1375	// not the corresponding AH, CH, DH, or BH
1376	code = AddRexPrefix(ins, code); // REX
1377	}
1378	#endif // _TARGET_AMD64_
1379
1380	unsigned regBits = RegEncoding(reg);
1381
1382	assert(regBits < `8`);
1383	return (regBits << `3`);
1384	}
1385
1386	/***********************************************************************************
1387	*
1388	* Returns modified AVX opcode with the specified register encoded in bits 3-6 of
1389	* byte 2 of VEX prefix.
1390	*/
1391	inline emitter::code_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code)
1392	{
1393	assert(reg < REG_STK);
1394	assert(IsAVXInstruction(ins));
1395	assert(hasVexPrefix(code));
1396
1397	// Get 4-bit register encoding
1398	// RegEncoding() gives lower 3 bits
1399	// IsExtendedReg() gives MSB.
1400	code_t regBits = RegEncoding(reg);
1401	if (IsExtendedReg(reg))
1402	{
1403	regBits \|= `0x08`;
1404	}
1405
1406	// VEX prefix encodes register operand in 1's complement form
1407	// Shift count = 4-bytes of opcode + 0-2 bits
1408	assert(regBits <= `0xF`);
1409	regBits <<= `35`;
1410	return code ^ regBits;
1411	}
1412
1413	/*****************************************************************************
1414	*
1415	* Returns an encoding for the specified register to be used in the bit3-5
1416	* part of an SIB byte (unshifted).
1417	* Used exclusively to generate the REX.X bit and truncate the register.
1418	*/
1419
1420	inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, code_t* code)
1421	{
1422	assert(reg < REG_STK);
1423
1424	#ifdef _TARGET_AMD64_
1425	// Either code is not NULL or reg is not an extended reg.
1426	// If reg is an extended reg, instruction needs to be prefixed with 'REX'
1427	// which would require code != NULL.
1428	assert(code != nullptr \|\| reg < REG_R8 \|\| (reg >= REG_XMM0 && reg < REG_XMM8));
1429
1430	if (IsExtendedReg(reg))
1431	{
1432	code = AddRexXPrefix(ins, code); // REX.X
1433	}
1434	unsigned regBits = RegEncoding(reg);
1435	#else // !_TARGET_AMD64_
1436	unsigned regBits = reg;
1437	#endif // !_TARGET_AMD64_
1438
1439	assert(regBits < `8`);
1440	return regBits;
1441	}
1442
1443	/*****************************************************************************
1444	*
1445	* Returns the "[r/m]" opcode with the mod/RM field set to register.
1446	*/
1447
1448	inline emitter::code_t emitter::insEncodeMRreg(instruction ins, code_t code)
1449	{
1450	// If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1451	// Otherwise, it will be placed after the 4 byte encoding.
1452	if ((code & `0xFF00`) == `0`)
1453	{
1454	assert((code & `0xC000`) == `0`);
1455	code \|= `0xC000`;
1456	}
1457
1458	return code;
1459	}
1460
1461	/*****************************************************************************
1462	*
1463	* Returns the given "[r/m]" opcode with the mod/RM field set to register.
1464	*/
1465
1466	inline emitter::code_t emitter::insEncodeRMreg(instruction ins, code_t code)
1467	{
1468	// If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
1469	// Otherwise, it will be placed after the 4 byte encoding.
1470	if ((code & `0xFF00`) == `0`)
1471	{
1472	assert((code & `0xC000`) == `0`);
1473	code \|= `0xC000`;
1474	}
1475	return code;
1476	}
1477
1478	/*****************************************************************************
1479	*
1480	* Returns the "byte ptr [r/m]" opcode with the mod/RM field set to
1481	* the given register.
1482	*/
1483
1484	inline emitter::code_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1485	{
1486	assert((code & `0xC000`) == `0`);
1487	code \|= `0xC000`;
1488	unsigned regcode = insEncodeReg012(ins, reg, size, &code) << `8`;
1489	code \|= regcode;
1490	return code;
1491	}
1492
1493	/*****************************************************************************
1494	*
1495	* Returns the "byte ptr [r/m], icon" opcode with the mod/RM field set to
1496	* the given register.
1497	*/
1498
1499	inline emitter::code_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code)
1500	{
1501	assert((code & `0xC000`) == `0`);
1502	code \|= `0xC000`;
1503	unsigned regcode = insEncodeReg012(ins, reg, size, &code) << `8`;
1504	code \|= regcode;
1505	return code;
1506	}
1507
1508	/*****************************************************************************
1509	*
1510	* Returns true iff the given instruction does not have a "[r/m], icon" form, but does have a
1511	* "reg,reg,imm8" form.
1512	*/
1513	inline bool insNeedsRRIb(instruction ins)
1514	{
1515	// If this list gets longer, use a switch or a table.
1516	return ins == INS_imul;
1517	}
1518
1519	/*****************************************************************************
1520	*
1521	* Returns the "reg,reg,imm8" opcode with both the reg's set to the
1522	* the given register.
1523	*/
1524	inline emitter::code_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size)
1525	{
1526	assert(size == EA_4BYTE); // All we handle for now.
1527	assert(insNeedsRRIb(ins));
1528	// If this list gets longer, use a switch, or a table lookup.
1529	code_t code = `0x69c0`;
1530	unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1531	// We use the same register as source and destination. (Could have another version that does both regs...)
1532	code \|= regcode;
1533	code \|= (regcode << `3`);
1534	return code;
1535	}
1536
1537	/*****************************************************************************
1538	*
1539	* Returns the "+reg" opcode with the the given register set into the low
1540	* nibble of the opcode
1541	*/
1542
1543	inline emitter::code_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size)
1544	{
1545	code_t code = insCodeRR(ins);
1546	unsigned regcode = insEncodeReg012(ins, reg, size, &code);
1547	code \|= regcode;
1548	return code;
1549	}
1550
1551	/*****************************************************************************
1552	*
1553	* Return the 'SS' field value for the given index scale factor.
1554	*/
1555
1556	inline unsigned emitter::insSSval(unsigned scale)
1557	{
1558	assert(scale == `1` \|\| scale == `2` \|\| scale == `4` \|\| scale == `8`);
1559
1560	const static BYTE scales[] = {
1561	`0x00`, // 1
1562	`0x40`, // 2
1563	`0xFF`, // 3
1564	`0x80`, // 4
1565	`0xFF`, // 5
1566	`0xFF`, // 6
1567	`0xFF`, // 7
1568	`0xC0`, // 8
1569	};
1570
1571	return scales[scale - `1`];
1572	}
1573
1574	const instruction emitJumpKindInstructions[] = {INS_nop,
1575
1576	#define JMP_SMALL(en, rev, ins) INS_##ins,
1577	#include "emitjmps.h"
1578
1579	INS_call};
1580
1581	const emitJumpKind emitReverseJumpKinds[] = {
1582	EJ_NONE,
1583
1584	#define JMP_SMALL(en, rev, ins) EJ_##rev,
1585	#include "emitjmps.h"
1586	};
1587
1588	/*****************************************************************************
1589	* Look up the instruction for a jump kind
1590	*/
1591
1592	/static/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
1593	{
1594	assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
1595	return emitJumpKindInstructions[jumpKind];
1596	}
1597
1598	/*****************************************************************************
1599	* Reverse the conditional jump
1600	*/
1601
1602	/ static / emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
1603	{
1604	assert(jumpKind < EJ_COUNT);
1605	return emitReverseJumpKinds[jumpKind];
1606	}
1607
1608	/*****************************************************************************
1609	* The size for these instructions is less than EA_4BYTE,
1610	* but the target register need not be byte-addressable
1611	*/
1612
1613	inline bool emitInstHasNoCode(instruction ins)
1614	{
1615	if (ins == INS_align)
1616	{
1617	return true;
1618	}
1619
1620	return false;
1621	}
1622
1623	/*****************************************************************************
1624	* When encoding instructions that operate on byte registers
1625	* we have to ensure that we use a low register (EAX, EBX, ECX or EDX)
1626	* otherwise we will incorrectly encode the instruction
1627	*/
1628
1629	bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 / = REG_NA /)
1630	{
1631	#if CPU_HAS_BYTE_REGS
1632	if (size != EA_1BYTE) // Not operating on a byte register is fine
1633	{
1634	return true;
1635	}
1636
1637	if ((ins != INS_movsx) && // These three instructions support high register
1638	(ins != INS_movzx) // encodings for reg1
1639	#ifdef FEATURE_HW_INTRINSICS
1640	&& (ins != INS_crc32)
1641	#endif
1642	)
1643	{
1644	// reg1 must be a byte-able register
1645	if ((genRegMask(reg1) & RBM_BYTE_REGS) == `0`)
1646	{
1647	return false;
1648	}
1649	}
1650	// if reg2 is not REG_NA then reg2 must be a byte-able register
1651	if ((reg2 != REG_NA) && ((genRegMask(reg2) & RBM_BYTE_REGS) == `0`))
1652	{
1653	return false;
1654	}
1655	#endif
1656	// The instruction can be encoded
1657	return true;
1658	}
1659
1660	/*****************************************************************************
1661	*
1662	* Estimate the size (in bytes of generated code) of the given instruction.
1663	*/
1664
1665	inline UNATIVE_OFFSET emitter::emitInsSize(code_t code)
1666	{
1667	UNATIVE_OFFSET size = (code & `0xFF000000`) ? `4` : (code & `0x00FF0000`) ? `3` : `2`;
1668	#ifdef _TARGET_AMD64_
1669	size += emitGetPrefixSize(code);
1670	#endif
1671	return size;
1672	}
1673
1674	inline UNATIVE_OFFSET emitter::emitInsSizeRM(instruction ins)
1675	{
1676	return emitInsSize(insCodeRM(ins));
1677	}
1678
1679	inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr)
1680	{
1681	emitAttr size = EA_SIZE(attr);
1682
1683	UNATIVE_OFFSET sz;
1684
1685	// If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes.
1686	// Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes.
1687	// This would probably be better expressed as a different format or something?
1688	code_t code = insCodeRM(ins);
1689
1690	if ((code & `0xFF00`) != `0`)
1691	{
1692	sz = `5`;
1693	}
1694	else
1695	{
1696	sz = emitInsSize(insEncodeRMreg(ins, code));
1697	}
1698
1699	// Most 16-bit operand instructions will need a prefix
1700	if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
1701	{
1702	sz += `1`;
1703	}
1704
1705	// VEX prefix
1706	sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins));
1707
1708	// REX prefix
1709	if (!hasRexPrefix(code))
1710	{
1711	if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) \|\| (reg1 != reg2))) \|\| IsExtendedReg(reg1, attr) \|\|
1712	IsExtendedReg(reg2, attr))
1713	{
1714	sz += emitGetRexPrefixSize(ins);
1715	}
1716	}
1717
1718	return sz;
1719	}
1720
1721	/***************************************************************************/
1722
1723	inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp)
1724	{
1725	UNATIVE_OFFSET size = emitInsSize(code);
1726	UNATIVE_OFFSET offs;
1727	bool offsIsUpperBound = true;
1728	bool EBPbased = true;
1729
1730	/ Is this a temporary? /
1731
1732	if (var < `0`)
1733	{
1734	/ An address off of ESP takes an extra byte /
1735
1736	if (!emitHasFramePtr)
1737	{
1738	size++;
1739	}
1740
1741	// The offset is already assigned. Find the temp.
1742	TempDsc* tmp = codeGen->regSet.tmpFindNum(var, RegSet::TEMP_USAGE_USED);
1743	if (tmp == nullptr)
1744	{
1745	// It might be in the free lists, if we're working on zero initializing the temps.
1746	tmp = codeGen->regSet.tmpFindNum(var, RegSet::TEMP_USAGE_FREE);
1747	}
1748	assert(tmp != nullptr);
1749	offs = tmp->tdTempOffs();
1750
1751	// We only care about the magnitude of the offset here, to determine instruction size.
1752	if (emitComp->isFramePointerUsed())
1753	{
1754	if ((int)offs < `0`)
1755	{
1756	offs = -(int)offs;
1757	}
1758	}
1759	else
1760	{
1761	// SP-based offsets must already be positive.
1762	assert((int)offs >= `0`);
1763	}
1764	}
1765	else
1766	{
1767
1768	/ Get the frame offset of the (non-temp) variable /
1769
1770	offs = dsp + emitComp->lvaFrameAddress(var, &EBPbased);
1771
1772	/ An address off of ESP takes an extra byte /
1773
1774	if (!EBPbased)
1775	{
1776	++size;
1777	}
1778
1779	/ Is this a stack parameter reference? /
1780
1781	if (emitComp->lvaIsParameter(var)
1782	#if !defined(_TARGET_AMD64_) \|\| defined(UNIX_AMD64_ABI)
1783	&& !emitComp->lvaIsRegArgument(var)
1784	#endif // !_TARGET_AMD64_ \|\| UNIX_AMD64_ABI
1785	)
1786	{
1787	/ If no EBP frame, arguments are off of ESP, above temps /
1788
1789	if (!EBPbased)
1790	{
1791	assert((int)offs >= `0`);
1792
1793	offsIsUpperBound = false; // since #temps can increase
1794	offs += emitMaxTmpSize;
1795	}
1796	}
1797	else
1798	{
1799	/ Locals off of EBP are at negative offsets /
1800
1801	if (EBPbased)
1802	{
1803	#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
1804	// If localloc is not used, then ebp chaining is done and hence
1805	// offset of locals will be at negative offsets, Otherwise offsets
1806	// will be positive. In future, when RBP gets positioned in the
1807	// middle of the frame so as to optimize instruction encoding size,
1808	// the below asserts needs to be modified appropriately.
1809	// However, for Unix platforms, we always do frame pointer chaining,
1810	// so offsets from the frame pointer will always be negative.
1811	if (emitComp->compLocallocUsed \|\| emitComp->opts.compDbgEnC)
1812	{
1813	noway_assert((int)offs >= `0`);
1814	}
1815	else
1816	#endif
1817	{
1818	// Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
1819	CLANG_FORMAT_COMMENT_ANCHOR;
1820
1821	#ifdef UNIX_AMD64_ABI
1822	LclVarDsc* varDsc = emitComp->lvaTable + var;
1823	bool isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
1824	// Register passed args could have a stack offset of 0.
1825	noway_assert((int)offs < `0` \|\| isRegPassedArg);
1826	#else // !UNIX_AMD64_ABI
1827	noway_assert((int)offs < `0`);
1828	#endif // !UNIX_AMD64_ABI
1829	}
1830
1831	assert(emitComp->lvaTempsHaveLargerOffsetThanVars());
1832
1833	// lvaInlinedPInvokeFrameVar and lvaStubArgumentVar are placed below the temps
1834	if (unsigned(var) == emitComp->lvaInlinedPInvokeFrameVar \|\|
1835	unsigned(var) == emitComp->lvaStubArgumentVar)
1836	{
1837	offs -= emitMaxTmpSize;
1838	}
1839
1840	if ((int)offs < `0`)
1841	{
1842	// offset is negative
1843	return size + ((int(offs) >= SCHAR_MIN) ? sizeof(char) : sizeof(int));
1844	}
1845	#ifdef _TARGET_AMD64_
1846	// This case arises for localloc frames
1847	else
1848	{
1849	return size + ((offs <= SCHAR_MAX) ? sizeof(char) : sizeof(int));
1850	}
1851	#endif
1852	}
1853
1854	if (emitComp->lvaTempsHaveLargerOffsetThanVars() == false)
1855	{
1856	offs += emitMaxTmpSize;
1857	}
1858	}
1859	}
1860
1861	assert((int)offs >= `0`);
1862
1863	#if !FEATURE_FIXED_OUT_ARGS
1864
1865	/ Are we addressing off of ESP? /
1866
1867	if (!emitHasFramePtr)
1868	{
1869	/ Adjust the effective offset if necessary /
1870
1871	if (emitCntStackDepth)
1872	offs += emitCurStackLvl;
1873
1874	// we could (and used to) check for the special case [sp] here but the stack offset
1875	// estimator was off, and there is very little harm in overestimating for such a
1876	// rare case.
1877	}
1878
1879	#endif // !FEATURE_FIXED_OUT_ARGS
1880
1881	// printf("lcl = %04X, tmp = %04X, stk = %04X, offs = %04X\n",
1882	// emitLclSize, emitMaxTmpSize, emitCurStackLvl, offs);
1883
1884	#ifdef _TARGET_AMD64_
1885	bool useSmallEncoding = (SCHAR_MIN <= (int)offs) && ((int)offs <= SCHAR_MAX);
1886	#else
1887	bool useSmallEncoding = (offs <= size_t(SCHAR_MAX));
1888	#endif
1889
1890	// If it is ESP based, and the offset is zero, we will not encode the disp part.
1891	if (!EBPbased && offs == `0`)
1892	{
1893	return size;
1894	}
1895	else
1896	{
1897	return size + (useSmallEncoding ? sizeof(char) : sizeof(int));
1898	}
1899	}
1900
1901	inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp)
1902	{
1903	instruction ins = id->idIns();
1904	emitAttr attrSize = id->idOpSize();
1905	UNATIVE_OFFSET prefix = emitGetVexPrefixAdjustedSize(ins, attrSize, code);
1906	return prefix + emitInsSizeSV(code, var, dsp);
1907	}
1908
1909	inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val)
1910	{
1911	instruction ins = id->idIns();
1912	emitAttr attrSize = id->idOpSize();
1913	UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(attrSize);
1914	UNATIVE_OFFSET prefix = emitGetVexPrefixAdjustedSize(ins, attrSize, code);
1915	bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
1916
1917	#ifdef _TARGET_AMD64_
1918	// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
1919	// all other opcodes take a sign-extended 4-byte immediate
1920	noway_assert(valSize <= sizeof(int) \|\| !id->idIsCnsReloc());
1921	#endif // _TARGET_AMD64_
1922
1923	if (valSize > sizeof(int))
1924	{
1925	valSize = sizeof(int);
1926	}
1927
1928	if (id->idIsCnsReloc())
1929	{
1930	valInByte = false; // relocs can't be placed in a byte
1931	assert(valSize == sizeof(int));
1932	}
1933
1934	if (valInByte)
1935	{
1936	valSize = sizeof(char);
1937	}
1938
1939	// 16-bit operand instructions need a prefix.
1940	// This referes to 66h size prefix override
1941	if (id->idOpSize() == EA_2BYTE)
1942	{
1943	prefix += `1`;
1944	}
1945
1946	return prefix + valSize + emitInsSizeSV(code, var, dsp);
1947	}
1948
1949	/***************************************************************************/
1950
1951	static bool baseRegisterRequiresSibByte(regNumber base)
1952	{
1953	#ifdef _TARGET_AMD64_
1954	return base == REG_ESP \|\| base == REG_R12;
1955	#else
1956	return base == REG_ESP;
1957	#endif
1958	}
1959
1960	static bool baseRegisterRequiresDisplacement(regNumber base)
1961	{
1962	#ifdef _TARGET_AMD64_
1963	return base == REG_EBP \|\| base == REG_R13;
1964	#else
1965	return base == REG_EBP;
1966	#endif
1967	}
1968
1969	UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
1970	{
1971	emitAttr attrSize = id->idOpSize();
1972	instruction ins = id->idIns();
1973	/ The displacement field is in an unusual place for calls /
1974	ssize_t dsp = (ins == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
1975	bool dspInByte = ((signed char)dsp == (ssize_t)dsp);
1976	bool dspIsZero = (dsp == `0`);
1977	UNATIVE_OFFSET size;
1978
1979	// Note that the values in reg and rgx are used in this method to decide
1980	// how many bytes will be needed by the address [reg+rgx+cns]
1981	// this includes the prefix bytes when reg or rgx are registers R8-R15
1982	regNumber reg;
1983	regNumber rgx;
1984
1985	// The idAddr field is a union and only some of the instruction formats use the iiaAddrMode variant
1986	// these are IF_AWR_, IF_ARD_, IF_ARW_ and IF__ARD
1987	// ideally these should really be the only idInsFmts that we see here
1988	// but we have some outliers to deal with:
1989	// emitIns_R_L adds IF_RWR_LABEL and calls emitInsSizeAM
1990	// emitInsRMW adds IF_MRW_CNS, IF_MRW_RRD, IF_MRW_SHF, and calls emitInsSizeAM
1991
1992	switch (id->idInsFmt())
1993	{
1994	case IF_RWR_LABEL:
1995	case IF_MRW_CNS:
1996	case IF_MRW_RRD:
1997	case IF_MRW_SHF:
1998	reg = REG_NA;
1999	rgx = REG_NA;
2000	break;
2001
2002	default:
2003	reg = id->idAddr()->iiaAddrMode.amBaseReg;
2004	rgx = id->idAddr()->iiaAddrMode.amIndxReg;
2005	break;
2006	}
2007
2008	if (id->idIsDspReloc())
2009	{
2010	dspInByte = false; // relocs can't be placed in a byte
2011	dspIsZero = false; // relocs won't always be zero
2012	}
2013
2014	if (code & `0xFF000000`)
2015	{
2016	size = `4`;
2017	}
2018	else if (code & `0x00FF0000`)
2019	{
2020	// BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
2021	assert(ins != INS_bt);
2022
2023	assert((attrSize == EA_4BYTE) \|\| (attrSize == EA_PTRSIZE) // Only for x64
2024	\|\| (attrSize == EA_16BYTE) \|\| (attrSize == EA_32BYTE) // only for x64
2025	\|\| (ins == INS_movzx) \|\| (ins == INS_movsx)
2026	// The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded
2027	\|\| isPrefetch(ins));
2028	size = `3`;
2029	}
2030	else
2031	{
2032	size = `2`;
2033
2034	// Most 16-bit operands will require a size prefix.
2035	// This refers to 66h size prefix override.
2036
2037	if (attrSize == EA_2BYTE)
2038	{
2039	size++;
2040	}
2041	}
2042
2043	size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
2044
2045	if (hasRexPrefix(code))
2046	{
2047	// REX prefix
2048	size += emitGetRexPrefixSize(ins);
2049	}
2050	else if (TakesRexWPrefix(ins, attrSize))
2051	{
2052	// REX.W prefix
2053	size += emitGetRexPrefixSize(ins);
2054	}
2055	else if (IsExtendedReg(reg, EA_PTRSIZE) \|\| IsExtendedReg(rgx, EA_PTRSIZE) \|\|
2056	((ins != INS_call) && IsExtendedReg(id->idReg1(), attrSize)))
2057	{
2058	// Should have a REX byte
2059	size += emitGetRexPrefixSize(ins);
2060	}
2061
2062	if (rgx == REG_NA)
2063	{
2064	/ The address is of the form "[reg+disp]" /
2065
2066	if (reg == REG_NA)
2067	{
2068	/ The address is of the form "[disp]" /
2069
2070	size += sizeof(INT32);
2071
2072	#ifdef _TARGET_AMD64_
2073	// If id is not marked for reloc, add 1 additional byte for SIB that follows disp32
2074	if (!id->idIsDspReloc())
2075	{
2076	size++;
2077	}
2078	#endif
2079	return size;
2080	}
2081
2082	// If this is just "call reg", we're done.
2083	if (id->idIsCallRegPtr())
2084	{
2085	assert(ins == INS_call);
2086	assert(dsp == `0`);
2087	return size;
2088	}
2089
2090	// If the base register is ESP (or R12 on 64-bit systems), a SIB byte must be used.
2091	if (baseRegisterRequiresSibByte(reg))
2092	{
2093	size++;
2094	}
2095
2096	// If the base register is EBP (or R13 on 64-bit systems), a displacement is required.
2097	// Otherwise, the displacement can be elided if it is zero.
2098	if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2099	{
2100	return size;
2101	}
2102
2103	/ Does the offset fit in a byte? /
2104
2105	if (dspInByte)
2106	{
2107	size += sizeof(char);
2108	}
2109	else
2110	{
2111	size += sizeof(INT32);
2112	}
2113	}
2114	else
2115	{
2116	/ An index register is present /
2117
2118	size++;
2119
2120	/ Is the index value scaled? /
2121
2122	if (emitDecodeScale(id->idAddr()->iiaAddrMode.amScale) > `1`)
2123	{
2124	/ Is there a base register? /
2125
2126	if (reg != REG_NA)
2127	{
2128	/ The address is "[reg + {2/4/8} * rgx + icon]" /
2129
2130	if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2131	{
2132	/ The address is "[reg + {2/4/8} * rgx]" /
2133	}
2134	else
2135	{
2136	/ The address is "[reg + {2/4/8} * rgx + disp]" /
2137
2138	if (dspInByte)
2139	{
2140	size += sizeof(char);
2141	}
2142	else
2143	{
2144	size += sizeof(int);
2145	}
2146	}
2147	}
2148	else
2149	{
2150	/ The address is "[{2/4/8} * rgx + icon]" /
2151
2152	size += sizeof(INT32);
2153	}
2154	}
2155	else
2156	{
2157	if (dspIsZero && baseRegisterRequiresDisplacement(reg) && !baseRegisterRequiresDisplacement(rgx))
2158	{
2159	/ Swap reg and rgx, such that reg is not EBP/R13 /
2160	regNumber tmp = reg;
2161	id->idAddr()->iiaAddrMode.amBaseReg = reg = rgx;
2162	id->idAddr()->iiaAddrMode.amIndxReg = rgx = tmp;
2163	}
2164
2165	/ The address is "[reg+rgx+dsp]" /
2166
2167	if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
2168	{
2169	/ This is [reg+rgx]" /
2170	}
2171	else
2172	{
2173	/ This is [reg+rgx+dsp]" /
2174
2175	if (dspInByte)
2176	{
2177	size += sizeof(char);
2178	}
2179	else
2180	{
2181	size += sizeof(int);
2182	}
2183	}
2184	}
2185	}
2186
2187	return size;
2188	}
2189
2190	inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val)
2191	{
2192	instruction ins = id->idIns();
2193	UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
2194	bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2195
2196	// We should never generate BT mem,reg because it has poor performance. BT mem,imm might be useful
2197	// but it requires special handling of the immediate value (it is always encoded in a byte).
2198	// Let's not complicate things until this is needed.
2199	assert(ins != INS_bt);
2200
2201	#ifdef _TARGET_AMD64_
2202	// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
2203	// all other opcodes take a sign-extended 4-byte immediate
2204	noway_assert(valSize <= sizeof(INT32) \|\| !id->idIsCnsReloc());
2205	#endif // _TARGET_AMD64_
2206
2207	if (valSize > sizeof(INT32))
2208	{
2209	valSize = sizeof(INT32);
2210	}
2211
2212	if (id->idIsCnsReloc())
2213	{
2214	valInByte = false; // relocs can't be placed in a byte
2215	assert(valSize == sizeof(INT32));
2216	}
2217
2218	if (valInByte)
2219	{
2220	valSize = sizeof(char);
2221	}
2222
2223	return valSize + emitInsSizeAM(id, code);
2224	}
2225
2226	inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code)
2227	{
2228	instruction ins = id->idIns();
2229
2230	// fgMorph changes any statics that won't fit into 32-bit addresses
2231	// into constants with an indir, rather than GT_CLS_VAR
2232	// so we should only hit this path for statics that are RIP-relative
2233	UNATIVE_OFFSET size = sizeof(INT32);
2234
2235	size += emitGetVexPrefixAdjustedSize(ins, id->idOpSize(), code);
2236
2237	// Most 16-bit operand instructions will need a prefix.
2238	// This refers to 66h size prefix override.
2239
2240	if (id->idOpSize() == EA_2BYTE && ins != INS_movzx && ins != INS_movsx)
2241	{
2242	size++;
2243	}
2244
2245	return size + emitInsSize(code);
2246	}
2247
2248	inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val)
2249	{
2250	instruction ins = id->idIns();
2251	UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
2252	bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
2253
2254	#ifndef _TARGET_AMD64_
2255	// occasionally longs get here on x86
2256	if (valSize > sizeof(INT32))
2257	valSize = sizeof(INT32);
2258	#endif // !_TARGET_AMD64_
2259
2260	if (id->idIsCnsReloc())
2261	{
2262	valInByte = false; // relocs can't be placed in a byte
2263	assert(valSize == sizeof(INT32));
2264	}
2265
2266	if (valInByte)
2267	{
2268	valSize = sizeof(char);
2269	}
2270
2271	return valSize + emitInsSizeCV(id, code);
2272	}
2273
2274	/*****************************************************************************
2275	*
2276	* Allocate instruction descriptors for instructions with address modes.
2277	*/
2278
2279	inline emitter::instrDesc* emitter::emitNewInstrAmd(emitAttr size, ssize_t dsp)
2280	{
2281	if (dsp < AM_DISP_MIN \|\| dsp > AM_DISP_MAX)
2282	{
2283	instrDescAmd* id = emitAllocInstrAmd(size);
2284
2285	id->idSetIsLargeDsp();
2286	#ifdef DEBUG
2287	id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2288	#endif
2289	id->idaAmdVal = dsp;
2290
2291	return id;
2292	}
2293	else
2294	{
2295	instrDesc* id = emitAllocInstr(size);
2296
2297	id->idAddr()->iiaAddrMode.amDisp = dsp;
2298	assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2299
2300	return id;
2301	}
2302	}
2303
2304	/*****************************************************************************
2305	*
2306	* Set the displacement field in an instruction. Only handles instrDescAmd type.
2307	*/
2308
2309	inline void emitter::emitSetAmdDisp(instrDescAmd* id, ssize_t dsp)
2310	{
2311	if (dsp < AM_DISP_MIN \|\| dsp > AM_DISP_MAX)
2312	{
2313	id->idSetIsLargeDsp();
2314	#ifdef DEBUG
2315	id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2316	#endif
2317	id->idaAmdVal = dsp;
2318	}
2319	else
2320	{
2321	id->idSetIsSmallDsp();
2322	id->idAddr()->iiaAddrMode.amDisp = dsp;
2323	assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2324	}
2325	}
2326
2327	/*****************************************************************************
2328	*
2329	* Allocate an instruction descriptor for an instruction that uses both
2330	* an address mode displacement and a constant.
2331	*/
2332
2333	emitter::instrDesc* emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int cns)
2334	{
2335	if (dsp >= AM_DISP_MIN && dsp <= AM_DISP_MAX)
2336	{
2337	if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
2338	{
2339	instrDesc* id = emitAllocInstr(size);
2340
2341	id->idSmallCns(cns);
2342
2343	id->idAddr()->iiaAddrMode.amDisp = dsp;
2344	assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2345
2346	return id;
2347	}
2348	else
2349	{
2350	instrDescCns* id = emitAllocInstrCns(size);
2351
2352	id->idSetIsLargeCns();
2353	id->idcCnsVal = cns;
2354
2355	id->idAddr()->iiaAddrMode.amDisp = dsp;
2356	assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
2357
2358	return id;
2359	}
2360	}
2361	else
2362	{
2363	if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
2364	{
2365	instrDescAmd* id = emitAllocInstrAmd(size);
2366
2367	id->idSetIsLargeDsp();
2368	#ifdef DEBUG
2369	id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2370	#endif
2371	id->idaAmdVal = dsp;
2372
2373	id->idSmallCns(cns);
2374
2375	return id;
2376	}
2377	else
2378	{
2379	instrDescCnsAmd* id = emitAllocInstrCnsAmd(size);
2380
2381	id->idSetIsLargeCns();
2382	id->idacCnsVal = cns;
2383
2384	id->idSetIsLargeDsp();
2385	#ifdef DEBUG
2386	id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
2387	#endif
2388	id->idacAmdVal = dsp;
2389
2390	return id;
2391	}
2392	}
2393	}
2394
2395	/*****************************************************************************
2396	*
2397	* The next instruction will be a loop head entry point
2398	* So insert a dummy instruction here to ensure that
2399	* the x86 I-cache alignment rule is followed.
2400	*/
2401
2402	void emitter::emitLoopAlign()
2403	{
2404	/ Insert a pseudo-instruction to ensure that we align*
2405	the next instruction properly /*
2406
2407	instrDesc* id = emitNewInstrSmall(EA_1BYTE);
2408	id->idIns(INS_align);
2409	id->idCodeSize(`15`); // We may need to skip up to 15 bytes of code
2410	emitCurIGsize += `15`;
2411	}
2412
2413	/*****************************************************************************
2414	*
2415	* Add a NOP instruction of the given size.
2416	*/
2417
2418	void emitter::emitIns_Nop(unsigned size)
2419	{
2420	assert(size <= `15`);
2421
2422	instrDesc* id = emitNewInstr();
2423	id->idIns(INS_nop);
2424	id->idInsFmt(IF_NONE);
2425	id->idCodeSize(size);
2426
2427	dispIns(id);
2428	emitCurIGsize += size;
2429	}
2430
2431	/*****************************************************************************
2432	*
2433	* Add an instruction with no operands.
2434	*/
2435	void emitter::emitIns(instruction ins)
2436	{
2437	UNATIVE_OFFSET sz;
2438	instrDesc* id = emitNewInstr();
2439	code_t code = insCodeMR(ins);
2440
2441	#ifdef DEBUG
2442	{
2443	// We cannot have #ifdef inside macro expansion.
2444	bool assertCond =
2445	(ins == INS_cdq \|\| ins == INS_int3 \|\| ins == INS_lock \|\| ins == INS_leave \|\| ins == INS_movsb \|\|
2446	ins == INS_movsd \|\| ins == INS_movsp \|\| ins == INS_nop \|\| ins == INS_r_movsb \|\| ins == INS_r_movsd \|\|
2447	ins == INS_r_movsp \|\| ins == INS_r_stosb \|\| ins == INS_r_stosd \|\| ins == INS_r_stosp \|\| ins == INS_ret \|\|
2448	ins == INS_sahf \|\| ins == INS_stosb \|\| ins == INS_stosd \|\| ins == INS_stosp
2449	// These instructions take zero operands
2450	\|\| ins == INS_vzeroupper \|\| ins == INS_lfence \|\| ins == INS_mfence \|\| ins == INS_sfence);
2451
2452	assert(assertCond);
2453	}
2454	#endif // DEBUG
2455
2456	assert(!hasRexPrefix(code)); // Can't have a REX bit with no operands, right?
2457
2458	if (code & `0xFF000000`)
2459	{
2460	sz = `2`; // TODO-XArch-Bug?: Shouldn't this be 4? Or maybe we should assert that we don't see this case.
2461	}
2462	else if (code & `0x00FF0000`)
2463	{
2464	sz = `3`;
2465	}
2466	else if (code & `0x0000FF00`)
2467	{
2468	sz = `2`;
2469	}
2470	else
2471	{
2472	sz = `1`;
2473	}
2474
2475	// vzeroupper includes its 2-byte VEX prefix in its MR code.
2476	assert((ins != INS_vzeroupper) \|\| (sz == `3`));
2477
2478	insFormat fmt = IF_NONE;
2479
2480	id->idIns(ins);
2481	id->idInsFmt(fmt);
2482	id->idCodeSize(sz);
2483
2484	dispIns(id);
2485	emitCurIGsize += sz;
2486	}
2487
2488	// Add an instruction with no operands, but whose encoding depends on the size
2489	// (Only CDQ/CQO currently)
2490	void emitter::emitIns(instruction ins, emitAttr attr)
2491	{
2492	UNATIVE_OFFSET sz;
2493	instrDesc* id = emitNewInstr(attr);
2494	code_t code = insCodeMR(ins);
2495	assert(ins == INS_cdq);
2496	assert((code & `0xFFFFFF00`) == `0`);
2497	sz = `1`;
2498
2499	insFormat fmt = IF_NONE;
2500
2501	sz += emitGetVexPrefixAdjustedSize(ins, attr, code);
2502	if (TakesRexWPrefix(ins, attr))
2503	{
2504	sz += emitGetRexPrefixSize(ins);
2505	}
2506
2507	id->idIns(ins);
2508	id->idInsFmt(fmt);
2509	id->idCodeSize(sz);
2510
2511	dispIns(id);
2512	emitCurIGsize += sz;
2513	}
2514
2515	//------------------------------------------------------------------------
2516	// emitMapFmtForIns: map the instruction format based on the instruction.
2517	// Shift-by-a-constant instructions have a special format.
2518	//
2519	// Arguments:
2520	// fmt - the instruction format to map
2521	// ins - the instruction
2522	//
2523	// Returns:
2524	// The mapped instruction format.
2525	//
2526	emitter::insFormat emitter::emitMapFmtForIns(insFormat fmt, instruction ins)
2527	{
2528	switch (ins)
2529	{
2530	case INS_rol_N:
2531	case INS_ror_N:
2532	case INS_rcl_N:
2533	case INS_rcr_N:
2534	case INS_shl_N:
2535	case INS_shr_N:
2536	case INS_sar_N:
2537	{
2538	switch (fmt)
2539	{
2540	case IF_RRW_CNS:
2541	return IF_RRW_SHF;
2542	case IF_MRW_CNS:
2543	return IF_MRW_SHF;
2544	case IF_SRW_CNS:
2545	return IF_SRW_SHF;
2546	case IF_ARW_CNS:
2547	return IF_ARW_SHF;
2548	default:
2549	unreached();
2550	}
2551	}
2552
2553	default:
2554	return fmt;
2555	}
2556	}
2557
2558	//------------------------------------------------------------------------
2559	// emitMapFmtAtoM: map the address mode formats ARD, ARW, and AWR to their direct address equivalents.
2560	//
2561	// Arguments:
2562	// fmt - the instruction format to map
2563	//
2564	// Returns:
2565	// The mapped instruction format.
2566	//
2567	emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
2568	{
2569	switch (fmt)
2570	{
2571	case IF_ARD:
2572	return IF_MRD;
2573	case IF_AWR:
2574	return IF_MWR;
2575	case IF_ARW:
2576	return IF_MRW;
2577
2578	case IF_RRD_ARD:
2579	return IF_RRD_MRD;
2580	case IF_RWR_ARD:
2581	return IF_RWR_MRD;
2582	case IF_RWR_ARD_CNS:
2583	return IF_RWR_MRD_CNS;
2584	case IF_RRW_ARD:
2585	return IF_RRW_MRD;
2586	case IF_RRW_ARD_CNS:
2587	return IF_RRW_MRD_CNS;
2588	case IF_RWR_RRD_ARD:
2589	return IF_RWR_RRD_MRD;
2590	case IF_RWR_RRD_ARD_CNS:
2591	return IF_RWR_RRD_MRD_CNS;
2592	case IF_RWR_RRD_ARD_RRD:
2593	return IF_RWR_RRD_MRD_RRD;
2594
2595	case IF_ARD_RRD:
2596	return IF_MRD_RRD;
2597	case IF_AWR_RRD:
2598	return IF_MWR_RRD;
2599	case IF_ARW_RRD:
2600	return IF_MRW_RRD;
2601
2602	case IF_ARD_CNS:
2603	return IF_MRD_CNS;
2604	case IF_AWR_CNS:
2605	return IF_MWR_CNS;
2606	case IF_ARW_CNS:
2607	return IF_MRW_CNS;
2608
2609	case IF_AWR_RRD_CNS:
2610	return IF_MWR_RRD_CNS;
2611
2612	case IF_ARW_SHF:
2613	return IF_MRW_SHF;
2614
2615	default:
2616	unreached();
2617	}
2618	}
2619
2620	//------------------------------------------------------------------------
2621	// emitHandleMemOp: For a memory operand, fill in the relevant fields of the instrDesc.
2622	//
2623	// Arguments:
2624	// indir - the memory operand.
2625	// id - the instrDesc to fill in.
2626	// fmt - the instruction format to use. This must be one of the ARD, AWR, or ARW formats. If necessary (such as for
2627	// GT_CLS_VAR_ADDR), this function will map it to the correct format.
2628	// ins - the instruction we are generating. This might affect the instruction format we choose.
2629	//
2630	// Assumptions:
2631	// The correctly sized instrDesc must already be created, e.g., via emitNewInstrAmd() or emitNewInstrAmdCns();
2632	//
2633	// Post-conditions:
2634	// For base address of int constant:
2635	// -- the caller must have added the int constant base to the instrDesc when creating it via
2636	// emitNewInstrAmdCns().
2637	// For simple address modes (base + scale index + offset):*
2638	// -- the base register, index register, and scale factor are set.
2639	// -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via
2640	// emitNewInstrAmdCns().
2641	//
2642	// The instruction format is set.
2643	//
2644	// idSetIsDspReloc() is called if necessary.
2645	//
2646	void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins)
2647	{
2648	assert(fmt != IF_NONE);
2649
2650	GenTree* memBase = indir->Base();
2651
2652	if ((memBase != nullptr) && memBase->isContained() && (memBase->OperGet() == GT_CLS_VAR_ADDR))
2653	{
2654	CORINFO_FIELD_HANDLE fldHnd = memBase->gtClsVar.gtClsVarHnd;
2655
2656	// Static always need relocs
2657	if (!jitStaticFldIsGlobAddr(fldHnd))
2658	{
2659	// Contract:
2660	// fgMorphField() changes any statics that won't fit into 32-bit addresses into
2661	// constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given
2662	// by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable.
2663	//
2664	// Data section constants: these get allocated close to code block of the method and
2665	// always addressable IP relative. These too should be marked as relocatable.
2666
2667	id->idSetIsDspReloc();
2668	}
2669
2670	id->idAddr()->iiaFieldHnd = fldHnd;
2671	id->idInsFmt(emitMapFmtForIns(emitMapFmtAtoM(fmt), ins));
2672	}
2673	else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained())
2674	{
2675	// Absolute addresses marked as contained should fit within the base of addr mode.
2676	assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp));
2677
2678	// Either not generating relocatable code, or addr must be an icon handle, or the
2679	// constant is zero (which we won't generate a relocation for).
2680	assert(!emitComp->opts.compReloc \|\| memBase->IsIconHandle() \|\| memBase->IsIntegralConst(`0`));
2681
2682	if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp))
2683	{
2684	id->idSetIsDspReloc();
2685	}
2686
2687	id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2688	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2689	id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1; // for completeness
2690
2691	id->idInsFmt(emitMapFmtForIns(fmt, ins));
2692
2693	// Absolute address must have already been set in the instrDesc constructor.
2694	assert(emitGetInsAmdAny(id) == memBase->AsIntConCommon()->IconValue());
2695	}
2696	else
2697	{
2698	if (memBase != nullptr)
2699	{
2700	id->idAddr()->iiaAddrMode.amBaseReg = memBase->gtRegNum;
2701	}
2702	else
2703	{
2704	id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
2705	}
2706
2707	if (indir->HasIndex())
2708	{
2709	id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->gtRegNum;
2710	}
2711	else
2712	{
2713	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2714	}
2715	id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale());
2716
2717	id->idInsFmt(emitMapFmtForIns(fmt, ins));
2718
2719	// disp must have already been set in the instrDesc constructor.
2720	assert(emitGetInsAmdAny(id) == indir->Offset()); // make sure "disp" is stored properly
2721	}
2722	}
2723
2724	// Takes care of storing all incoming register parameters
2725	// into its corresponding shadow space (defined by the x64 ABI)
2726	void emitter::spillIntArgRegsToShadowSlots()
2727	{
2728	unsigned argNum;
2729	instrDesc* id;
2730	UNATIVE_OFFSET sz;
2731
2732	assert(emitComp->compGeneratingProlog);
2733
2734	for (argNum = `0`; argNum < MAX_REG_ARG; ++argNum)
2735	{
2736	regNumber argReg = intArgRegs[argNum];
2737
2738	// The offsets for the shadow space start at RSP + 8
2739	// (right before the caller return address)
2740	int offset = (argNum + `1`) * EA_PTRSIZE;
2741
2742	id = emitNewInstrAmd(EA_PTRSIZE, offset);
2743	id->idIns(INS_mov);
2744	id->idInsFmt(IF_AWR_RRD);
2745	id->idAddr()->iiaAddrMode.amBaseReg = REG_SPBASE;
2746	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
2747	id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(`1`);
2748
2749	// The offset has already been set in the intrDsc ctor,
2750	// make sure we got it right.
2751	assert(emitGetInsAmdAny(id) == ssize_t(offset));
2752
2753	id->idReg1(argReg);
2754	sz = emitInsSizeAM(id, insCodeMR(INS_mov));
2755	id->idCodeSize(sz);
2756	emitCurIGsize += sz;
2757	}
2758	}
2759
2760	//------------------------------------------------------------------------
2761	// emitInsLoadInd: Emits a "mov reg, [mem]" (or a variant such as "movzx" or "movss")
2762	// instruction for a GT_IND node.
2763	//
2764	// Arguments:
2765	// ins - the instruction to emit
2766	// attr - the instruction operand size
2767	// dstReg - the destination register
2768	// mem - the GT_IND node
2769	//
2770	void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem)
2771	{
2772	assert(mem->OperIs(GT_IND));
2773
2774	GenTree* addr = mem->Addr();
2775
2776	if (addr->OperGet() == GT_CLS_VAR_ADDR)
2777	{
2778	emitIns_R_C(ins, attr, dstReg, addr->gtClsVar.gtClsVarHnd, `0`);
2779	return;
2780	}
2781
2782	if (addr->OperGet() == GT_LCL_VAR_ADDR)
2783	{
2784	GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2785	emitIns_R_S(ins, attr, dstReg, varNode->GetLclNum(), `0`);
2786	codeGen->genUpdateLife(varNode);
2787	return;
2788	}
2789
2790	assert(addr->OperIsAddrMode() \|\| (addr->IsCnsIntOrI() && addr->isContained()) \|\| !addr->isContained());
2791	ssize_t offset = mem->Offset();
2792	instrDesc* id = emitNewInstrAmd(attr, offset);
2793	id->idIns(ins);
2794	id->idReg1(dstReg);
2795	emitHandleMemOp(mem, id, IF_RWR_ARD, ins);
2796	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
2797	id->idCodeSize(sz);
2798	dispIns(id);
2799	emitCurIGsize += sz;
2800	}
2801
2802	//------------------------------------------------------------------------
2803	// emitInsStoreInd: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
2804	// instruction for a GT_STOREIND node.
2805	//
2806	// Arguments:
2807	// ins - the instruction to emit
2808	// attr - the instruction operand size
2809	// mem - the GT_STOREIND node
2810	//
2811	void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem)
2812	{
2813	assert(mem->OperIs(GT_STOREIND));
2814
2815	GenTree* addr = mem->Addr();
2816	GenTree* data = mem->Data();
2817
2818	if (addr->OperGet() == GT_CLS_VAR_ADDR)
2819	{
2820	if (data->isContainedIntOrIImmed())
2821	{
2822	emitIns_C_I(ins, attr, addr->gtClsVar.gtClsVarHnd, `0`, (int)data->AsIntConCommon()->IconValue());
2823	}
2824	else
2825	{
2826	assert(!data->isContained());
2827	emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, `0`);
2828	}
2829	return;
2830	}
2831
2832	if (addr->OperGet() == GT_LCL_VAR_ADDR)
2833	{
2834	GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
2835	if (data->isContainedIntOrIImmed())
2836	{
2837	emitIns_S_I(ins, attr, varNode->GetLclNum(), `0`, (int)data->AsIntConCommon()->IconValue());
2838	}
2839	else
2840	{
2841	assert(!data->isContained());
2842	emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), `0`);
2843	}
2844	codeGen->genUpdateLife(varNode);
2845	return;
2846	}
2847
2848	ssize_t offset = mem->Offset();
2849	UNATIVE_OFFSET sz;
2850	instrDesc* id;
2851
2852	if (data->isContainedIntOrIImmed())
2853	{
2854	int icon = (int)data->AsIntConCommon()->IconValue();
2855	id = emitNewInstrAmdCns(attr, offset, icon);
2856	id->idIns(ins);
2857	emitHandleMemOp(mem, id, IF_AWR_CNS, ins);
2858	sz = emitInsSizeAM(id, insCodeMI(ins), icon);
2859	id->idCodeSize(sz);
2860	}
2861	else
2862	{
2863	assert(!data->isContained());
2864	id = emitNewInstrAmd(attr, offset);
2865	id->idIns(ins);
2866	emitHandleMemOp(mem, id, IF_AWR_RRD, ins);
2867	id->idReg1(data->gtRegNum);
2868	sz = emitInsSizeAM(id, insCodeMR(ins));
2869	id->idCodeSize(sz);
2870	}
2871
2872	dispIns(id);
2873	emitCurIGsize += sz;
2874	}
2875
2876	//------------------------------------------------------------------------
2877	// emitInsStoreLcl: Emits a "mov [mem], reg/imm" (or a variant such as "movss")
2878	// instruction for a GT_STORE_LCL_VAR node.
2879	//
2880	// Arguments:
2881	// ins - the instruction to emit
2882	// attr - the instruction operand size
2883	// varNode - the GT_STORE_LCL_VAR node
2884	//
2885	void emitter::emitInsStoreLcl(instruction ins, emitAttr attr, GenTreeLclVarCommon* varNode)
2886	{
2887	assert(varNode->OperIs(GT_STORE_LCL_VAR));
2888	assert(varNode->gtRegNum == REG_NA); // stack store
2889
2890	GenTree* data = varNode->gtGetOp1();
2891	codeGen->inst_set_SV_var(varNode);
2892
2893	if (data->isContainedIntOrIImmed())
2894	{
2895	emitIns_S_I(ins, attr, varNode->GetLclNum(), `0`, (int)data->AsIntConCommon()->IconValue());
2896	}
2897	else
2898	{
2899	assert(!data->isContained());
2900	emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), `0`);
2901	}
2902	codeGen->genUpdateLife(varNode);
2903	}
2904
2905	//------------------------------------------------------------------------
2906	// emitInsBinary: Emits an instruction for a node which takes two operands
2907	//
2908	// Arguments:
2909	// ins - the instruction to emit
2910	// attr - the instruction operand size
2911	// dst - the destination and first source operand
2912	// src - the second source operand
2913	//
2914	// Assumptions:
2915	// i) caller of this routine needs to call genConsumeReg()
2916	// ii) caller of this routine needs to call genProduceReg()
2917	regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
2918	{
2919	// We can only have one memory operand and only src can be a constant operand
2920	// However, the handling for a given operand type (mem, cns, or other) is fairly
2921	// consistent regardless of whether they are src or dst. As such, we will find
2922	// the type of each operand and only check them against src/dst where relevant.
2923
2924	GenTree* memOp = nullptr;
2925	GenTree* cnsOp = nullptr;
2926	GenTree* otherOp = nullptr;
2927
2928	if (dst->isContained() \|\| (dst->isLclField() && (dst->gtRegNum == REG_NA)) \|\| dst->isUsedFromSpillTemp())
2929	{
2930	// dst can only be a modrm
2931	// dst on 3opImul isn't really the dst
2932	assert(dst->isUsedFromMemory() \|\| (dst->gtRegNum == REG_NA) \|\| instrIs3opImul(ins));
2933	assert(!src->isUsedFromMemory());
2934
2935	memOp = dst;
2936
2937	if (src->isContained())
2938	{
2939	assert(src->IsCnsIntOrI());
2940	cnsOp = src;
2941	}
2942	else
2943	{
2944	otherOp = src;
2945	}
2946	}
2947	else if (src->isContained() \|\| src->isUsedFromSpillTemp())
2948	{
2949	assert(!dst->isUsedFromMemory());
2950	otherOp = dst;
2951
2952	if ((src->IsCnsIntOrI() \|\| src->IsCnsFltOrDbl()) && !src->isUsedFromSpillTemp())
2953	{
2954	assert(!src->isUsedFromMemory() \|\| src->IsCnsFltOrDbl());
2955	cnsOp = src;
2956	}
2957	else
2958	{
2959	assert(src->isUsedFromMemory());
2960	memOp = src;
2961	}
2962	}
2963
2964	// At this point, we either have a memory operand or we don't.
2965	//
2966	// If we don't then the logic is very simple and we will either be emitting a
2967	// `reg, immed` instruction (if src is a cns) or a `reg, reg` instruction otherwise.
2968	//
2969	// If we do have a memory operand, the logic is a bit more complicated as we need
2970	// to do different things depending on the type of memory operand. These types include:
2971	// Spill temp*
2972	// Indirect access*
2973	// Local variable*
2974	// Class variable*
2975	// Addressing mode [base + index * scale + offset]*
2976	// Local field*
2977	// Local variable*
2978	//
2979	// Most of these types (except Indirect: Class variable and Indirect: Addressing mode)
2980	// give us a a local variable number and an offset and access memory on the stack
2981	//
2982	// Indirect: Class variable is used for access static class variables and gives us a handle
2983	// to the memory location we read from
2984	//
2985	// Indirect: Addressing mode is used for the remaining memory accesses and will give us
2986	// a base address, an index, a scale, and an offset. These are combined to let us easily
2987	// access the given memory location.
2988	//
2989	// In all of the memory access cases, we determine which form to emit (e.g. `reg, [mem]`
2990	// or `[mem], reg`) by comparing memOp to src to determine which `emitIns_` method needs*
2991	// to be called. The exception is for the `[mem], immed` case (for Indirect: Class variable)
2992	// where only src can be the immediate.
2993
2994	if (memOp != nullptr)
2995	{
2996	TempDsc* tmpDsc = nullptr;
2997	unsigned varNum = BAD_VAR_NUM;
2998	unsigned offset = (unsigned)-`1`;
2999
3000	if (memOp->isUsedFromSpillTemp())
3001	{
3002	assert(memOp->IsRegOptional());
3003
3004	tmpDsc = codeGen->getSpillTempDsc(memOp);
3005	varNum = tmpDsc->tdTempNum();
3006	offset = `0`;
3007
3008	codeGen->regSet.tmpRlsTemp(tmpDsc);
3009	}
3010	else if (memOp->isIndir())
3011	{
3012	GenTreeIndir* memIndir = memOp->AsIndir();
3013	GenTree* memBase = memIndir->gtOp1;
3014
3015	switch (memBase->OperGet())
3016	{
3017	case GT_LCL_VAR_ADDR:
3018	{
3019	varNum = memBase->AsLclVarCommon()->GetLclNum();
3020	offset = `0`;
3021
3022	// Ensure that all the GenTreeIndir values are set to their defaults.
3023	assert(!memIndir->HasIndex());
3024	assert(memIndir->Scale() == `1`);
3025	assert(memIndir->Offset() == `0`);
3026
3027	break;
3028	}
3029
3030	case GT_CLS_VAR_ADDR:
3031	{
3032	if (memOp == src)
3033	{
3034	assert(otherOp == dst);
3035	assert(cnsOp == nullptr);
3036
3037	if (instrHasImplicitRegPairDest(ins))
3038	{
3039	// src is a class static variable
3040	// dst is implicit - RDX:RAX
3041	emitIns_C(ins, attr, memBase->gtClsVar.gtClsVarHnd, `0`);
3042	}
3043	else
3044	{
3045	// src is a class static variable
3046	// dst is a register
3047	emitIns_R_C(ins, attr, dst->gtRegNum, memBase->gtClsVar.gtClsVarHnd, `0`);
3048	}
3049	}
3050	else
3051	{
3052	assert(memOp == dst);
3053
3054	if (cnsOp != nullptr)
3055	{
3056	assert(cnsOp == src);
3057	assert(otherOp == nullptr);
3058	assert(src->IsCnsIntOrI());
3059
3060	// src is an contained immediate
3061	// dst is a class static variable
3062	emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, `0`,
3063	(int)src->gtIntConCommon.IconValue());
3064	}
3065	else
3066	{
3067	assert(otherOp == src);
3068
3069	// src is a register
3070	// dst is a class static variable
3071	emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, src->gtRegNum, `0`);
3072	}
3073	}
3074
3075	return dst->gtRegNum;
3076	}
3077
3078	default: // Addressing mode [base + index scale + offset]*
3079	{
3080	instrDesc* id = nullptr;
3081
3082	if (cnsOp != nullptr)
3083	{
3084	assert(memOp == dst);
3085	assert(cnsOp == src);
3086	assert(otherOp == nullptr);
3087	assert(src->IsCnsIntOrI());
3088
3089	id = emitNewInstrAmdCns(attr, memIndir->Offset(), (int)src->gtIntConCommon.IconValue());
3090	}
3091	else
3092	{
3093	ssize_t offset = memIndir->Offset();
3094	id = emitNewInstrAmd(attr, offset);
3095	id->idIns(ins);
3096
3097	GenTree* regTree = (memOp == src) ? dst : src;
3098
3099	// there must be one non-contained op
3100	assert(!regTree->isContained());
3101	id->idReg1(regTree->gtRegNum);
3102	}
3103	assert(id != nullptr);
3104
3105	id->idIns(ins); // Set the instruction.
3106
3107	// Determine the instruction format
3108	insFormat fmt = IF_NONE;
3109
3110	if (memOp == src)
3111	{
3112	assert(cnsOp == nullptr);
3113	assert(otherOp == dst);
3114
3115	if (instrHasImplicitRegPairDest(ins))
3116	{
3117	fmt = emitInsModeFormat(ins, IF_ARD);
3118	}
3119	else
3120	{
3121	fmt = emitInsModeFormat(ins, IF_RRD_ARD);
3122	}
3123	}
3124	else
3125	{
3126	assert(memOp == dst);
3127
3128	if (cnsOp != nullptr)
3129	{
3130	assert(cnsOp == src);
3131	assert(otherOp == nullptr);
3132	assert(src->IsCnsIntOrI());
3133
3134	fmt = emitInsModeFormat(ins, IF_ARD_CNS);
3135	}
3136	else
3137	{
3138	assert(otherOp == src);
3139	fmt = emitInsModeFormat(ins, IF_ARD_RRD);
3140	}
3141	}
3142	assert(fmt != IF_NONE);
3143	emitHandleMemOp(memIndir, id, fmt, ins);
3144
3145	// Determine the instruction size
3146	UNATIVE_OFFSET sz = `0`;
3147
3148	if (memOp == src)
3149	{
3150	assert(otherOp == dst);
3151	assert(cnsOp == nullptr);
3152
3153	if (instrHasImplicitRegPairDest(ins))
3154	{
3155	sz = emitInsSizeAM(id, insCode(ins));
3156	}
3157	else
3158	{
3159	sz = emitInsSizeAM(id, insCodeRM(ins));
3160	}
3161	}
3162	else
3163	{
3164	assert(memOp == dst);
3165
3166	if (cnsOp != nullptr)
3167	{
3168	assert(memOp == dst);
3169	assert(cnsOp == src);
3170	assert(otherOp == nullptr);
3171
3172	sz = emitInsSizeAM(id, insCodeMI(ins), (int)src->gtIntConCommon.IconValue());
3173	}
3174	else
3175	{
3176	assert(otherOp == src);
3177	sz = emitInsSizeAM(id, insCodeMR(ins));
3178	}
3179	}
3180	assert(sz != `0`);
3181
3182	id->idCodeSize(sz);
3183
3184	dispIns(id);
3185	emitCurIGsize += sz;
3186
3187	return (memOp == src) ? dst->gtRegNum : REG_NA;
3188	}
3189	}
3190	}
3191	else
3192	{
3193	switch (memOp->OperGet())
3194	{
3195	case GT_LCL_FLD:
3196	case GT_STORE_LCL_FLD:
3197	{
3198	GenTreeLclFld* lclField = memOp->AsLclFld();
3199	varNum = lclField->GetLclNum();
3200	offset = lclField->gtLclFld.gtLclOffs;
3201	break;
3202	}
3203
3204	case GT_LCL_VAR:
3205	{
3206	assert(memOp->IsRegOptional() \|\| !emitComp->lvaTable[memOp->gtLclVar.gtLclNum].lvIsRegCandidate());
3207	varNum = memOp->AsLclVar()->GetLclNum();
3208	offset = `0`;
3209	break;
3210	}
3211
3212	default:
3213	unreached();
3214	break;
3215	}
3216	}
3217
3218	// Ensure we got a good varNum and offset.
3219	// We also need to check for `tmpDsc != nullptr` since spill temp numbers
3220	// are negative and start with -1, which also happens to be BAD_VAR_NUM.
3221	assert((varNum != BAD_VAR_NUM) \|\| (tmpDsc != nullptr));
3222	assert(offset != (unsigned)-`1`);
3223
3224	if (memOp == src)
3225	{
3226	assert(otherOp == dst);
3227	assert(cnsOp == nullptr);
3228
3229	if (instrHasImplicitRegPairDest(ins))
3230	{
3231	// src is a stack based local variable
3232	// dst is implicit - RDX:RAX
3233	emitIns_S(ins, attr, varNum, offset);
3234	}
3235	else
3236	{
3237	// src is a stack based local variable
3238	// dst is a register
3239	emitIns_R_S(ins, attr, dst->gtRegNum, varNum, offset);
3240	}
3241	}
3242	else
3243	{
3244	assert(memOp == dst);
3245	assert((dst->gtRegNum == REG_NA) \|\| dst->IsRegOptional());
3246
3247	if (cnsOp != nullptr)
3248	{
3249	assert(cnsOp == src);
3250	assert(otherOp == nullptr);
3251	assert(src->IsCnsIntOrI());
3252
3253	// src is an contained immediate
3254	// dst is a stack based local variable
3255	emitIns_S_I(ins, attr, varNum, offset, (int)src->gtIntConCommon.IconValue());
3256	}
3257	else
3258	{
3259	assert(otherOp == src);
3260	assert(!src->isContained());
3261
3262	// src is a register
3263	// dst is a stack based local variable
3264	emitIns_S_R(ins, attr, src->gtRegNum, varNum, offset);
3265	}
3266	}
3267	}
3268	else if (cnsOp != nullptr) // reg, immed
3269	{
3270	assert(cnsOp == src);
3271	assert(otherOp == dst);
3272
3273	if (src->IsCnsIntOrI())
3274	{
3275	assert(!dst->isContained());
3276	GenTreeIntConCommon* intCns = src->AsIntConCommon();
3277	emitIns_R_I(ins, attr, dst->gtRegNum, intCns->IconValue());
3278	}
3279	else
3280	{
3281	assert(src->IsCnsFltOrDbl());
3282	GenTreeDblCon* dblCns = src->AsDblCon();
3283
3284	CORINFO_FIELD_HANDLE hnd = emitFltOrDblConst(dblCns->gtDconVal, emitTypeSize(dblCns));
3285	emitIns_R_C(ins, attr, dst->gtRegNum, hnd, `0`);
3286	}
3287	}
3288	else // reg, reg
3289	{
3290	assert(otherOp == nullptr);
3291	assert(!src->isContained() && !dst->isContained());
3292
3293	if (instrHasImplicitRegPairDest(ins))
3294	{
3295	emitIns_R(ins, attr, src->gtRegNum);
3296	}
3297	else
3298	{
3299	emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
3300	}
3301	}
3302
3303	return dst->gtRegNum;
3304	}
3305
3306	//------------------------------------------------------------------------
3307	// emitInsRMW: Emit logic for Read-Modify-Write binary instructions.
3308	//
3309	// Responsible for emitting a single instruction that will perform an operation of the form:
3310	// addr = addr <BinOp> src
3311	// For example:
3312	// ADD [RAX], RCX
3313	//
3314	// Arguments:
3315	// ins - instruction to generate
3316	// attr - emitter attribute for instruction
3317	// storeInd - indir for RMW addressing mode
3318	// src - source operand of instruction
3319	//
3320	// Assumptions:
3321	// Lowering has taken care of recognizing the StoreInd pattern of:
3322	// StoreInd( AddressTree, BinOp( Ind ( AddressTree ), Operand ) )
3323	// The address to store is already sitting in a register.
3324	//
3325	// Notes:
3326	// This is a no-produce operation, meaning that no register output will
3327	// be produced for future use in the code stream.
3328	//
3329	void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd, GenTree* src)
3330	{
3331	GenTree* addr = storeInd->Addr();
3332	addr = addr->gtSkipReloadOrCopy();
3333	assert(addr->OperGet() == GT_LCL_VAR \|\| addr->OperGet() == GT_LCL_VAR_ADDR \|\| addr->OperGet() == GT_LEA \|\|
3334	addr->OperGet() == GT_CLS_VAR_ADDR \|\| addr->OperGet() == GT_CNS_INT);
3335
3336	instrDesc* id = nullptr;
3337	UNATIVE_OFFSET sz;
3338
3339	ssize_t offset = `0`;
3340	if (addr->OperGet() != GT_CLS_VAR_ADDR)
3341	{
3342	offset = storeInd->Offset();
3343	}
3344
3345	if (src->isContainedIntOrIImmed())
3346	{
3347	GenTreeIntConCommon* intConst = src->AsIntConCommon();
3348	int iconVal = (int)intConst->IconValue();
3349	switch (ins)
3350	{
3351	case INS_rcl_N:
3352	case INS_rcr_N:
3353	case INS_rol_N:
3354	case INS_ror_N:
3355	case INS_shl_N:
3356	case INS_shr_N:
3357	case INS_sar_N:
3358	iconVal &= `0x7F`;
3359	break;
3360	default:
3361	break;
3362	}
3363
3364	id = emitNewInstrAmdCns(attr, offset, iconVal);
3365	emitHandleMemOp(storeInd, id, IF_ARW_CNS, ins);
3366	id->idIns(ins);
3367	sz = emitInsSizeAM(id, insCodeMI(ins), iconVal);
3368	}
3369	else
3370	{
3371	assert(!src->isContained()); // there must be one non-contained src
3372
3373	// ind, reg
3374	id = emitNewInstrAmd(attr, offset);
3375	emitHandleMemOp(storeInd, id, IF_ARW_RRD, ins);
3376	id->idReg1(src->gtRegNum);
3377	id->idIns(ins);
3378	sz = emitInsSizeAM(id, insCodeMR(ins));
3379	}
3380
3381	id->idCodeSize(sz);
3382
3383	dispIns(id);
3384	emitCurIGsize += sz;
3385	}
3386
3387	//------------------------------------------------------------------------
3388	// emitInsRMW: Emit logic for Read-Modify-Write unary instructions.
3389	//
3390	// Responsible for emitting a single instruction that will perform an operation of the form:
3391	// addr = UnaryOp addr
3392	// For example:
3393	// NOT [RAX]
3394	//
3395	// Arguments:
3396	// ins - instruction to generate
3397	// attr - emitter attribute for instruction
3398	// storeInd - indir for RMW addressing mode
3399	//
3400	// Assumptions:
3401	// Lowering has taken care of recognizing the StoreInd pattern of:
3402	// StoreInd( AddressTree, UnaryOp( Ind ( AddressTree ) ) )
3403	// The address to store is already sitting in a register.
3404	//
3405	// Notes:
3406	// This is a no-produce operation, meaning that no register output will
3407	// be produced for future use in the code stream.
3408	//
3409	void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd)
3410	{
3411	GenTree* addr = storeInd->Addr();
3412	addr = addr->gtSkipReloadOrCopy();
3413	assert(addr->OperGet() == GT_LCL_VAR \|\| addr->OperGet() == GT_LCL_VAR_ADDR \|\| addr->OperGet() == GT_CLS_VAR_ADDR \|\|
3414	addr->OperGet() == GT_LEA \|\| addr->OperGet() == GT_CNS_INT);
3415
3416	ssize_t offset = `0`;
3417	if (addr->OperGet() != GT_CLS_VAR_ADDR)
3418	{
3419	offset = storeInd->Offset();
3420	}
3421
3422	instrDesc* id = emitNewInstrAmd(attr, offset);
3423	emitHandleMemOp(storeInd, id, IF_ARW, ins);
3424	id->idIns(ins);
3425	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3426	id->idCodeSize(sz);
3427
3428	dispIns(id);
3429	emitCurIGsize += sz;
3430	}
3431
3432	/*****************************************************************************
3433	*
3434	* Add an instruction referencing a single register.
3435	*/
3436
3437	void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
3438	{
3439	emitAttr size = EA_SIZE(attr);
3440
3441	assert(size <= EA_PTRSIZE);
3442	noway_assert(emitVerifyEncodable(ins, size, reg));
3443
3444	UNATIVE_OFFSET sz;
3445	instrDesc* id = emitNewInstrSmall(attr);
3446
3447	switch (ins)
3448	{
3449	case INS_inc:
3450	case INS_dec:
3451	#ifdef _TARGET_AMD64_
3452
3453	sz = `2`; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix)
3454
3455	#else // !_TARGET_AMD64_
3456
3457	if (size == EA_1BYTE)
3458	sz = `2`; // Use the long form as the small one has no 'w' bit
3459	else
3460	sz = `1`; // Use short form
3461
3462	#endif // !_TARGET_AMD64_
3463
3464	break;
3465
3466	case INS_pop:
3467	case INS_pop_hide:
3468	case INS_push:
3469	case INS_push_hide:
3470
3471	/ We don't currently push/pop small values /
3472
3473	assert(size == EA_PTRSIZE);
3474
3475	sz = `1`;
3476	break;
3477
3478	default:
3479
3480	/ All the sixteen INS_setCCs are contiguous. /
3481
3482	if (INS_seto <= ins && ins <= INS_setg)
3483	{
3484	// Rough check that we used the endpoints for the range check
3485
3486	assert(INS_seto + `0xF` == INS_setg);
3487
3488	// The caller must specify EA_1BYTE for 'attr'
3489
3490	assert(attr == EA_1BYTE);
3491
3492	/ We expect this to always be a 'big' opcode /
3493
3494	assert(insEncodeMRreg(ins, reg, attr, insCodeMR(ins)) & `0x00FF0000`);
3495
3496	size = attr;
3497
3498	sz = `3`;
3499	break;
3500	}
3501	else
3502	{
3503	sz = `2`;
3504	break;
3505	}
3506	}
3507	insFormat fmt = emitInsModeFormat(ins, IF_RRD);
3508
3509	id->idIns(ins);
3510	id->idInsFmt(fmt);
3511	id->idReg1(reg);
3512
3513	// 16-bit operand instructions will need a prefix.
3514	// This refers to 66h size prefix override.
3515	if (size == EA_2BYTE)
3516	{
3517	sz += `1`;
3518	}
3519
3520	// Vex bytes
3521	sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins)));
3522
3523	// REX byte
3524	if (IsExtendedReg(reg, attr) \|\| TakesRexWPrefix(ins, attr))
3525	{
3526	sz += emitGetRexPrefixSize(ins);
3527	}
3528
3529	id->idCodeSize(sz);
3530
3531	dispIns(id);
3532	emitCurIGsize += sz;
3533
3534	emitAdjustStackDepthPushPop(ins);
3535	}
3536
3537	/*****************************************************************************
3538	*
3539	* Add an instruction referencing a register and a constant.
3540	*/
3541
3542	void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val)
3543	{
3544	emitAttr size = EA_SIZE(attr);
3545
3546	// Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE
3547	assert(size <= EA_PTRSIZE \|\| IsSSEOrAVXInstruction(ins));
3548
3549	noway_assert(emitVerifyEncodable(ins, size, reg));
3550
3551	#ifdef _TARGET_AMD64_
3552	// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3553	// all other opcodes take a sign-extended 4-byte immediate
3554	noway_assert(size < EA_8BYTE \|\| ins == INS_mov \|\| ((int)val == val && !EA_IS_CNS_RELOC(attr)));
3555	#endif
3556
3557	UNATIVE_OFFSET sz;
3558	instrDesc* id;
3559	insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS);
3560	bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
3561
3562	// BT reg,imm might be useful but it requires special handling of the immediate value
3563	// (it is always encoded in a byte). Let's not complicate things until this is needed.
3564	assert(ins != INS_bt);
3565
3566	// Figure out the size of the instruction
3567	switch (ins)
3568	{
3569	case INS_mov:
3570	#ifdef _TARGET_AMD64_
3571	// mov reg, imm64 is equivalent to mov reg, imm32 if the high order bits are all 0
3572	// and this isn't a reloc constant.
3573	if (((size > EA_4BYTE) && (`0` == (val & `0xFFFFFFFF00000000LL`))) && !EA_IS_CNS_RELOC(attr))
3574	{
3575	attr = size = EA_4BYTE;
3576	}
3577
3578	if (size > EA_4BYTE)
3579	{
3580	sz = `9`; // Really it is 10, but we'll add one more later
3581	break;
3582	}
3583	#endif // _TARGET_AMD64_
3584	sz = `5`;
3585	break;
3586
3587	case INS_rcl_N:
3588	case INS_rcr_N:
3589	case INS_rol_N:
3590	case INS_ror_N:
3591	case INS_shl_N:
3592	case INS_shr_N:
3593	case INS_sar_N:
3594	assert(val != `1`);
3595	fmt = IF_RRW_SHF;
3596	sz = `3`;
3597	val &= `0x7F`;
3598	valInByte = true; // shift amount always placed in a byte
3599	break;
3600
3601	default:
3602
3603	if (EA_IS_CNS_RELOC(attr))
3604	{
3605	valInByte = false; // relocs can't be placed in a byte
3606	}
3607
3608	if (valInByte)
3609	{
3610	if (IsSSEOrAVXInstruction(ins))
3611	{
3612	sz = `5`;
3613	}
3614	else if (size == EA_1BYTE && reg == REG_EAX && !instrIs3opImul(ins))
3615	{
3616	sz = `2`;
3617	}
3618	else
3619	{
3620	sz = `3`;
3621	}
3622	}
3623	else
3624	{
3625	if (reg == REG_EAX && !instrIs3opImul(ins))
3626	{
3627	sz = `1`;
3628	}
3629	else
3630	{
3631	sz = `2`;
3632	}
3633
3634	#ifdef _TARGET_AMD64_
3635	if (size > EA_4BYTE)
3636	{
3637	// We special-case anything that takes a full 8-byte constant.
3638	sz += `4`;
3639	}
3640	else
3641	#endif // _TARGET_AMD64_
3642	{
3643	sz += EA_SIZE_IN_BYTES(attr);
3644	}
3645	}
3646	break;
3647	}
3648
3649	// Vex prefix size
3650	sz += emitGetVexPrefixSize(ins, attr);
3651
3652	// Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
3653	// 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
3654	// register. So we also need to check if that built-in register is an extended register.
3655	if (IsExtendedReg(reg, attr) \|\| TakesRexWPrefix(ins, size) \|\| instrIsExtendedReg3opImul(ins))
3656	{
3657	sz += emitGetRexPrefixSize(ins);
3658	}
3659
3660	id = emitNewInstrSC(attr, val);
3661	id->idIns(ins);
3662	id->idInsFmt(fmt);
3663	id->idReg1(reg);
3664
3665	// 16-bit operand instructions will need a prefix
3666	if (size == EA_2BYTE)
3667	{
3668	sz += `1`;
3669	}
3670
3671	id->idCodeSize(sz);
3672
3673	dispIns(id);
3674	emitCurIGsize += sz;
3675
3676	if (reg == REG_ESP)
3677	{
3678	emitAdjustStackDepth(ins, val);
3679	}
3680	}
3681
3682	/*****************************************************************************
3683	*
3684	* Add an instruction referencing an integer constant.
3685	*/
3686
3687	void emitter::emitIns_I(instruction ins, emitAttr attr, int val)
3688	{
3689	UNATIVE_OFFSET sz;
3690	instrDesc* id;
3691	bool valInByte = ((signed char)val == val);
3692
3693	#ifdef _TARGET_AMD64_
3694	// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3695	// all other opcodes take a sign-extended 4-byte immediate
3696	noway_assert(EA_SIZE(attr) < EA_8BYTE \|\| !EA_IS_CNS_RELOC(attr));
3697	#endif
3698
3699	if (EA_IS_CNS_RELOC(attr))
3700	{
3701	valInByte = false; // relocs can't be placed in a byte
3702	}
3703
3704	switch (ins)
3705	{
3706	case INS_loop:
3707	case INS_jge:
3708	sz = `2`;
3709	break;
3710
3711	case INS_ret:
3712	sz = `3`;
3713	break;
3714
3715	case INS_push_hide:
3716	case INS_push:
3717	sz = valInByte ? `2` : `5`;
3718	break;
3719
3720	default:
3721	NO_WAY("unexpected instruction");
3722	}
3723
3724	id = emitNewInstrSC(attr, val);
3725	id->idIns(ins);
3726	id->idInsFmt(IF_CNS);
3727	id->idCodeSize(sz);
3728
3729	dispIns(id);
3730	emitCurIGsize += sz;
3731
3732	emitAdjustStackDepthPushPop(ins);
3733	}
3734
3735	/*****************************************************************************
3736	*
3737	* Add a "jump through a table" instruction.
3738	*/
3739
3740	void emitter::emitIns_IJ(emitAttr attr, regNumber reg, unsigned base)
3741	{
3742	assert(EA_SIZE(attr) == EA_4BYTE);
3743
3744	UNATIVE_OFFSET sz = `3` + `4`;
3745	const instruction ins = INS_i_jmp;
3746
3747	if (IsExtendedReg(reg, attr))
3748	{
3749	sz += emitGetRexPrefixSize(ins);
3750	}
3751
3752	instrDesc* id = emitNewInstrAmd(attr, base);
3753
3754	id->idIns(ins);
3755	id->idInsFmt(IF_ARD);
3756	id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
3757	id->idAddr()->iiaAddrMode.amIndxReg = reg;
3758	id->idAddr()->iiaAddrMode.amScale = emitter::OPSZP;
3759
3760	#ifdef DEBUG
3761	id->idDebugOnlyInfo()->idMemCookie = base;
3762	#endif
3763
3764	id->idCodeSize(sz);
3765
3766	dispIns(id);
3767	emitCurIGsize += sz;
3768	}
3769
3770	/*****************************************************************************
3771	*
3772	* Add an instruction with a static data member operand. If 'size' is 0, the
3773	* instruction operates on the address of the static member instead of its
3774	* value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
3775	*/
3776
3777	void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
3778	{
3779	// Static always need relocs
3780	if (!jitStaticFldIsGlobAddr(fldHnd))
3781	{
3782	attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
3783	}
3784
3785	UNATIVE_OFFSET sz;
3786	instrDesc* id;
3787
3788	/ Are we pushing the offset of the class variable? /
3789
3790	if (EA_IS_OFFSET(attr))
3791	{
3792	assert(ins == INS_push);
3793	sz = `1` + TARGET_POINTER_SIZE;
3794
3795	id = emitNewInstrDsp(EA_1BYTE, offs);
3796	id->idIns(ins);
3797	id->idInsFmt(IF_MRD_OFF);
3798	}
3799	else
3800	{
3801	insFormat fmt = emitInsModeFormat(ins, IF_MRD);
3802
3803	id = emitNewInstrDsp(attr, offs);
3804	id->idIns(ins);
3805	id->idInsFmt(fmt);
3806	sz = emitInsSizeCV(id, insCodeMR(ins));
3807	}
3808
3809	// Vex prefix size
3810	sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
3811
3812	if (TakesRexWPrefix(ins, attr))
3813	{
3814	// REX.W prefix
3815	sz += emitGetRexPrefixSize(ins);
3816	}
3817
3818	id->idAddr()->iiaFieldHnd = fldHnd;
3819
3820	id->idCodeSize(sz);
3821
3822	dispIns(id);
3823	emitCurIGsize += sz;
3824
3825	emitAdjustStackDepthPushPop(ins);
3826	}
3827
3828	/*****************************************************************************
3829	*
3830	* Add an instruction with two register operands.
3831	*/
3832
3833	void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2)
3834	{
3835	emitAttr size = EA_SIZE(attr);
3836
3837	/ We don't want to generate any useless mov instructions! /
3838	CLANG_FORMAT_COMMENT_ANCHOR;
3839
3840	#ifdef _TARGET_AMD64_
3841	// Same-reg 4-byte mov can be useful because it performs a
3842	// zero-extension to 8 bytes.
3843	assert(ins != INS_mov \|\| reg1 != reg2 \|\| size == EA_4BYTE);
3844	#else
3845	assert(ins != INS_mov \|\| reg1 != reg2);
3846	#endif // _TARGET_AMD64_
3847
3848	assert(size <= EA_32BYTE);
3849	noway_assert(emitVerifyEncodable(ins, size, reg1, reg2));
3850
3851	UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr);
3852
3853	if (Is4ByteSSEInstruction(ins))
3854	{
3855	// The 4-Byte SSE instructions require one additional byte
3856	sz += `1`;
3857	}
3858
3859	/ Special case: "XCHG" uses a different format /
3860	insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD);
3861
3862	instrDesc* id = emitNewInstrSmall(attr);
3863	id->idIns(ins);
3864	id->idInsFmt(fmt);
3865	id->idReg1(reg1);
3866	id->idReg2(reg2);
3867	id->idCodeSize(sz);
3868
3869	dispIns(id);
3870	emitCurIGsize += sz;
3871	}
3872
3873	/*****************************************************************************
3874	*
3875	* Add an instruction with two register operands and an integer constant.
3876	*/
3877
3878	void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival)
3879	{
3880	// SSE2 version requires 5 bytes and some SSE/AVX version 6 bytes
3881	UNATIVE_OFFSET sz = `4`;
3882	if (IsSSEOrAVXInstruction(ins))
3883	{
3884	// AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate
3885	// SSE: 4 byte opcode + 1 byte ModR/M + 1 byte immediate
3886	// SSE: 3 byte opcode + 1 byte ModR/M + 1 byte immediate
3887	sz = (UseVEXEncoding() \|\| Is4ByteSSEInstruction(ins)) ? `6` : `5`;
3888	}
3889
3890	#ifdef _TARGET_AMD64_
3891	// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
3892	// all other opcodes take a sign-extended 4-byte immediate
3893	noway_assert(EA_SIZE(attr) < EA_8BYTE \|\| !EA_IS_CNS_RELOC(attr));
3894	#endif
3895
3896	instrDesc* id = emitNewInstrSC(attr, ival);
3897
3898	// REX prefix
3899	if (IsExtendedReg(reg1, attr) \|\| IsExtendedReg(reg2, attr))
3900	{
3901	sz += emitGetRexPrefixSize(ins);
3902	}
3903
3904	if ((ins == INS_pextrq \|\| ins == INS_pinsrq) && !UseVEXEncoding())
3905	{
3906	sz += `1`;
3907	}
3908
3909	id->idIns(ins);
3910	id->idInsFmt(IF_RRW_RRW_CNS);
3911	id->idReg1(reg1);
3912	id->idReg2(reg2);
3913	id->idCodeSize(sz);
3914
3915	dispIns(id);
3916	emitCurIGsize += sz;
3917	}
3918
3919	void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs)
3920	{
3921	assert(ins == INS_prefetcht0 \|\| ins == INS_prefetcht1 \|\| ins == INS_prefetcht2 \|\| ins == INS_prefetchnta);
3922
3923	instrDesc* id = emitNewInstrAmd(attr, offs);
3924
3925	id->idIns(ins);
3926
3927	id->idInsFmt(IF_ARD);
3928	id->idAddr()->iiaAddrMode.amBaseReg = base;
3929	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
3930
3931	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3932	id->idCodeSize(sz);
3933
3934	dispIns(id);
3935	emitCurIGsize += sz;
3936	}
3937
3938	//------------------------------------------------------------------------
3939	// emitIns_AR_R_R: emits the code for an instruction that takes a base memory register, two register operands
3940	// and that does not return a value
3941	//
3942	// Arguments:
3943	// ins -- The instruction being emitted
3944	// attr -- The emit attribute
3945	// targetReg -- The target register
3946	// op2Reg -- The register of the second operand
3947	// op3Reg -- The register of the third operand
3948	// base -- The base register used for the memory address (first operand)
3949	// offs -- The offset from base
3950	//
3951	void emitter::emitIns_AR_R_R(
3952	instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs)
3953	{
3954	assert(IsSSEOrAVXInstruction(ins));
3955	assert(IsThreeOperandAVXInstruction(ins));
3956
3957	instrDesc* id = emitNewInstrAmd(attr, offs);
3958
3959	id->idIns(ins);
3960	id->idReg1(op2Reg);
3961	id->idReg2(op3Reg);
3962
3963	id->idInsFmt(IF_AWR_RRD_RRD);
3964	id->idAddr()->iiaAddrMode.amBaseReg = base;
3965	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
3966
3967	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
3968	id->idCodeSize(sz);
3969
3970	dispIns(id);
3971	emitCurIGsize += sz;
3972	}
3973
3974	void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir)
3975	{
3976	ssize_t offs = indir->Offset();
3977	instrDesc* id = emitNewInstrAmd(attr, offs);
3978
3979	id->idIns(ins);
3980	id->idReg1(reg1);
3981
3982	emitHandleMemOp(indir, id, IF_RRW_ARD, ins);
3983
3984	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
3985	id->idCodeSize(sz);
3986
3987	dispIns(id);
3988	emitCurIGsize += sz;
3989	}
3990
3991	void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival)
3992	{
3993	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
3994	assert(IsSSEOrAVXInstruction(ins));
3995
3996	ssize_t offs = indir->Offset();
3997	instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
3998
3999	id->idIns(ins);
4000	id->idReg1(reg1);
4001
4002	emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins);
4003
4004	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4005
4006	if (Is4ByteSSEInstruction(ins))
4007	{
4008	// The 4-Byte SSE instructions require two additional bytes
4009	sz += `2`;
4010	}
4011
4012	id->idCodeSize(sz);
4013
4014	dispIns(id);
4015	emitCurIGsize += sz;
4016	}
4017
4018	void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival)
4019	{
4020	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4021	assert(IsSSEOrAVXInstruction(ins));
4022
4023	instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4024
4025	id->idIns(ins);
4026	id->idReg1(reg1);
4027
4028	id->idInsFmt(IF_RRW_ARD_CNS);
4029	id->idAddr()->iiaAddrMode.amBaseReg = base;
4030	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4031
4032	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4033
4034	if (Is4ByteSSEInstruction(ins))
4035	{
4036	// The 4-Byte SSE instructions require two additional bytes
4037	sz += `2`;
4038	}
4039
4040	id->idCodeSize(sz);
4041
4042	dispIns(id);
4043	emitCurIGsize += sz;
4044	}
4045
4046	void emitter::emitIns_R_C_I(
4047	instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4048	{
4049	// Static always need relocs
4050	if (!jitStaticFldIsGlobAddr(fldHnd))
4051	{
4052	attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4053	}
4054
4055	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4056	assert(IsSSEOrAVXInstruction(ins));
4057
4058	instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4059
4060	id->idIns(ins);
4061	id->idInsFmt(IF_RRW_MRD_CNS);
4062	id->idReg1(reg1);
4063	id->idAddr()->iiaFieldHnd = fldHnd;
4064
4065	UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4066
4067	if (Is4ByteSSEInstruction(ins))
4068	{
4069	// The 4-Byte SSE instructions require two additional bytes
4070	sz += `2`;
4071	}
4072
4073	id->idCodeSize(sz);
4074
4075	dispIns(id);
4076	emitCurIGsize += sz;
4077	}
4078
4079	void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival)
4080	{
4081	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
4082	assert(IsSSEOrAVXInstruction(ins));
4083
4084	instrDesc* id = emitNewInstrCns(attr, ival);
4085
4086	id->idIns(ins);
4087	id->idInsFmt(IF_RRW_SRD_CNS);
4088	id->idReg1(reg1);
4089	id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4090
4091	#ifdef DEBUG
4092	id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4093	#endif
4094
4095	UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4096
4097	if (Is4ByteSSEInstruction(ins))
4098	{
4099	// The 4-Byte SSE instructions require two additional bytes
4100	sz += `2`;
4101	}
4102
4103	id->idCodeSize(sz);
4104
4105	dispIns(id);
4106	emitCurIGsize += sz;
4107	}
4108
4109	void emitter::emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir)
4110	{
4111	assert(IsSSEOrAVXInstruction(ins));
4112	assert(IsThreeOperandAVXInstruction(ins));
4113
4114	ssize_t offs = indir->Offset();
4115	instrDesc* id = emitNewInstrAmd(attr, offs);
4116
4117	id->idIns(ins);
4118	id->idReg1(reg1);
4119	id->idReg2(reg2);
4120
4121	emitHandleMemOp(indir, id, IF_RWR_RRD_ARD, ins);
4122
4123	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4124	id->idCodeSize(sz);
4125
4126	dispIns(id);
4127	emitCurIGsize += sz;
4128	}
4129
4130	void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs)
4131	{
4132	assert(IsSSEOrAVXInstruction(ins));
4133	assert(IsThreeOperandAVXInstruction(ins));
4134
4135	instrDesc* id = emitNewInstrAmd(attr, offs);
4136
4137	id->idIns(ins);
4138	id->idReg1(reg1);
4139	id->idReg2(reg2);
4140
4141	id->idInsFmt(IF_RWR_RRD_ARD);
4142	id->idAddr()->iiaAddrMode.amBaseReg = base;
4143	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4144
4145	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4146	id->idCodeSize(sz);
4147
4148	dispIns(id);
4149	emitCurIGsize += sz;
4150	}
4151
4152	//------------------------------------------------------------------------
4153	// IsAVX2GatherInstruction: return true if the instruction is AVX2 Gather
4154	//
4155	// Arguments:
4156	// ins - the instruction to check
4157	// Return Value:
4158	// true if the instruction is AVX2 Gather
4159	//
4160	bool IsAVX2GatherInstruction(instruction ins)
4161	{
4162	switch (ins)
4163	{
4164	case INS_vpgatherdd:
4165	case INS_vpgatherdq:
4166	case INS_vpgatherqd:
4167	case INS_vpgatherqq:
4168	case INS_vgatherdps:
4169	case INS_vgatherdpd:
4170	case INS_vgatherqps:
4171	case INS_vgatherqpd:
4172	return true;
4173	default:
4174	return false;
4175	}
4176	}
4177
4178	//------------------------------------------------------------------------
4179	// emitIns_R_AR_R: Emits an AVX2 Gather instructions
4180	//
4181	// Arguments:
4182	// ins - the instruction to emit
4183	// attr - the instruction operand size
4184	// reg1 - the destination and first source operand
4185	// reg2 - the mask operand (encoded in VEX.vvvv)
4186	// base - the base register of address to load
4187	// index - the index register of VSIB
4188	// scale - the scale number of VSIB
4189	// offs - the offset added to the memory address from base
4190	//
4191	void emitter::emitIns_R_AR_R(instruction ins,
4192	emitAttr attr,
4193	regNumber reg1,
4194	regNumber reg2,
4195	regNumber base,
4196	regNumber index,
4197	int scale,
4198	int offs)
4199	{
4200	assert(IsAVX2GatherInstruction(ins));
4201
4202	instrDesc* id = emitNewInstrAmd(attr, offs);
4203
4204	id->idIns(ins);
4205	id->idReg1(reg1);
4206	id->idReg2(reg2);
4207
4208	id->idInsFmt(IF_RWR_ARD_RRD);
4209	id->idAddr()->iiaAddrMode.amBaseReg = base;
4210	id->idAddr()->iiaAddrMode.amIndxReg = index;
4211	id->idAddr()->iiaAddrMode.amScale = emitEncodeSize((emitAttr)scale);
4212
4213	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4214	id->idCodeSize(sz);
4215
4216	dispIns(id);
4217	emitCurIGsize += sz;
4218	}
4219
4220	void emitter::emitIns_R_R_C(
4221	instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs)
4222	{
4223	assert(IsSSEOrAVXInstruction(ins));
4224	assert(IsThreeOperandAVXInstruction(ins));
4225
4226	// Static always need relocs
4227	if (!jitStaticFldIsGlobAddr(fldHnd))
4228	{
4229	attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4230	}
4231
4232	instrDesc* id = emitNewInstrDsp(attr, offs);
4233
4234	id->idIns(ins);
4235	id->idInsFmt(IF_RWR_RRD_MRD);
4236	id->idReg1(reg1);
4237	id->idReg2(reg2);
4238	id->idAddr()->iiaFieldHnd = fldHnd;
4239
4240	UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
4241	id->idCodeSize(sz);
4242
4243	dispIns(id);
4244	emitCurIGsize += sz;
4245	}
4246
4247	/*****************************************************************************
4248	*
4249	* Add an instruction with three register operands.
4250	*/
4251
4252	void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2)
4253	{
4254	assert(IsSSEOrAVXInstruction(ins));
4255	assert(IsThreeOperandAVXInstruction(ins));
4256	// Currently vex prefix only use three bytes mode.
4257	// size = vex + opcode + ModR/M = 3 + 1 + 1 = 5
4258	// TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4259	UNATIVE_OFFSET sz = `5`;
4260
4261	instrDesc* id = emitNewInstr(attr);
4262	id->idIns(ins);
4263	id->idInsFmt(IF_RWR_RRD_RRD);
4264	id->idReg1(targetReg);
4265	id->idReg2(reg1);
4266	id->idReg3(reg2);
4267
4268	id->idCodeSize(sz);
4269	dispIns(id);
4270	emitCurIGsize += sz;
4271	}
4272
4273	void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs)
4274	{
4275	assert(IsSSEOrAVXInstruction(ins));
4276	assert(IsThreeOperandAVXInstruction(ins));
4277
4278	instrDesc* id = emitNewInstr(attr);
4279
4280	id->idIns(ins);
4281	id->idInsFmt(IF_RWR_RRD_SRD);
4282	id->idReg1(reg1);
4283	id->idReg2(reg2);
4284	id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4285
4286	#ifdef DEBUG
4287	id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4288	#endif
4289
4290	UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs);
4291	id->idCodeSize(sz);
4292
4293	dispIns(id);
4294	emitCurIGsize += sz;
4295	}
4296
4297	void emitter::emitIns_R_R_A_I(
4298	instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt)
4299	{
4300	assert(IsSSEOrAVXInstruction(ins));
4301	assert(IsThreeOperandAVXInstruction(ins));
4302
4303	ssize_t offs = indir->Offset();
4304	instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4305
4306	id->idIns(ins);
4307	id->idReg1(reg1);
4308	id->idReg2(reg2);
4309
4310	emitHandleMemOp(indir, id, fmt, ins);
4311
4312	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4313	id->idCodeSize(sz);
4314
4315	dispIns(id);
4316	emitCurIGsize += sz;
4317	}
4318
4319	void emitter::emitIns_R_R_AR_I(
4320	instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival)
4321	{
4322	assert(IsSSEOrAVXInstruction(ins));
4323	assert(IsThreeOperandAVXInstruction(ins));
4324
4325	instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4326
4327	id->idIns(ins);
4328	id->idReg1(reg1);
4329	id->idReg2(reg2);
4330
4331	id->idInsFmt(IF_RWR_RRD_ARD_CNS);
4332	id->idAddr()->iiaAddrMode.amBaseReg = base;
4333	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4334
4335	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4336	id->idCodeSize(sz);
4337
4338	dispIns(id);
4339	emitCurIGsize += sz;
4340	}
4341
4342	void emitter::emitIns_R_R_C_I(
4343	instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
4344	{
4345	assert(IsSSEOrAVXInstruction(ins));
4346	assert(IsThreeOperandAVXInstruction(ins));
4347
4348	// Static always need relocs
4349	if (!jitStaticFldIsGlobAddr(fldHnd))
4350	{
4351	attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4352	}
4353
4354	instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4355
4356	id->idIns(ins);
4357	id->idInsFmt(IF_RWR_RRD_MRD_CNS);
4358	id->idReg1(reg1);
4359	id->idReg2(reg2);
4360	id->idAddr()->iiaFieldHnd = fldHnd;
4361
4362	UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4363	id->idCodeSize(sz);
4364
4365	dispIns(id);
4366	emitCurIGsize += sz;
4367	}
4368
4369	/**********************************************************************************
4370	* emitIns_R_R_R_I: Add an instruction with three register operands and an immediate.
4371	*
4372	* Arguments:
4373	* ins - the instruction to add
4374	* attr - the emitter attribute for instruction
4375	* targetReg - the target (destination) register
4376	* reg1 - the first source register
4377	* reg2 - the second source register
4378	* ival - the immediate value
4379	*/
4380
4381	void emitter::emitIns_R_R_R_I(
4382	instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, int ival)
4383	{
4384	assert(IsSSEOrAVXInstruction(ins));
4385	assert(IsThreeOperandAVXInstruction(ins));
4386	// Currently vex prefix only use three bytes mode.
4387	// size = vex + opcode + ModR/M + 1-byte-cns = 3 + 1 + 1 + 1 = 6
4388	// TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4389	UNATIVE_OFFSET sz = `6`;
4390
4391	instrDesc* id = emitNewInstrCns(attr, ival);
4392	id->idIns(ins);
4393	id->idInsFmt(IF_RWR_RRD_RRD_CNS);
4394	id->idReg1(targetReg);
4395	id->idReg2(reg1);
4396	id->idReg3(reg2);
4397
4398	id->idCodeSize(sz);
4399	dispIns(id);
4400	emitCurIGsize += sz;
4401	}
4402
4403	void emitter::emitIns_R_R_S_I(
4404	instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival)
4405	{
4406	assert(IsSSEOrAVXInstruction(ins));
4407	assert(IsThreeOperandAVXInstruction(ins));
4408
4409	instrDesc* id = emitNewInstrCns(attr, ival);
4410
4411	id->idIns(ins);
4412	id->idInsFmt(IF_RWR_RRD_SRD_CNS);
4413	id->idReg1(reg1);
4414	id->idReg2(reg2);
4415	id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4416
4417	#ifdef DEBUG
4418	id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4419	#endif
4420
4421	UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4422	id->idCodeSize(sz);
4423
4424	dispIns(id);
4425	emitCurIGsize += sz;
4426	}
4427
4428	//------------------------------------------------------------------------
4429	// encodeXmmRegAsIval: Encodes a XMM register into imm[7:4] for use by a SIMD instruction
4430	//
4431	// Arguments
4432	// opReg -- The register being encoded
4433	//
4434	// Returns:
4435	// opReg encoded in imm[7:4]
4436	static int encodeXmmRegAsIval(regNumber opReg)
4437	{
4438	// AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb,
4439	// which encodes the fourth register into imm8[7:4]
4440	assert(opReg >= XMMBASE);
4441	int ival = (opReg - XMMBASE) << `4`;
4442
4443	assert((ival >= `0`) && (ival <= `255`));
4444	return (int8_t)ival;
4445	}
4446
4447	//------------------------------------------------------------------------
4448	// emitIns_R_R_A_R: emits the code for an instruction that takes a register operand, a GenTreeIndir address,
4449	// another register operand, and that returns a value in register
4450	//
4451	// Arguments:
4452	// ins -- The instruction being emitted
4453	// attr -- The emit attribute
4454	// targetReg -- The target register
4455	// op1Reg -- The register of the first operand
4456	// op3Reg -- The register of the third operand
4457	// indir -- The GenTreeIndir used for the memory address
4458	//
4459	// Remarks:
4460	// op2 is built from indir
4461	//
4462	void emitter::emitIns_R_R_A_R(
4463	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir)
4464	{
4465	assert(isAvxBlendv(ins));
4466	assert(UseVEXEncoding());
4467
4468	int ival = encodeXmmRegAsIval(op3Reg);
4469	ssize_t offs = indir->Offset();
4470	instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4471
4472	id->idIns(ins);
4473	id->idReg1(targetReg);
4474	id->idReg2(op1Reg);
4475
4476	emitHandleMemOp(indir, id, IF_RWR_RRD_ARD_RRD, ins);
4477
4478	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4479	id->idCodeSize(sz);
4480
4481	dispIns(id);
4482	emitCurIGsize += sz;
4483	}
4484
4485	//------------------------------------------------------------------------
4486	// emitIns_R_R_AR_R: emits the code for an instruction that takes a register operand, a base memory
4487	// register, another register operand, and that returns a value in register
4488	//
4489	// Arguments:
4490	// ins -- The instruction being emitted
4491	// attr -- The emit attribute
4492	// targetReg -- The target register
4493	// op1Reg -- The register of the first operands
4494	// op3Reg -- The register of the third operand
4495	// base -- The base register used for the memory address
4496	// offs -- The offset added to the memory address from base
4497	//
4498	// Remarks:
4499	// op2 is built from base + offs
4500	//
4501	void emitter::emitIns_R_R_AR_R(
4502	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base, int offs)
4503	{
4504	assert(isAvxBlendv(ins));
4505	assert(UseVEXEncoding());
4506
4507	int ival = encodeXmmRegAsIval(op3Reg);
4508	instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
4509
4510	id->idIns(ins);
4511	id->idReg1(targetReg);
4512	id->idReg2(op1Reg);
4513
4514	id->idInsFmt(IF_RWR_RRD_ARD_RRD);
4515	id->idAddr()->iiaAddrMode.amBaseReg = base;
4516	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
4517
4518	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
4519	id->idCodeSize(sz);
4520
4521	dispIns(id);
4522	emitCurIGsize += sz;
4523	}
4524
4525	//------------------------------------------------------------------------
4526	// emitIns_R_R_C_R: emits the code for an instruction that takes a register operand, a field handle +
4527	// offset, another register operand, and that returns a value in register
4528	//
4529	// Arguments:
4530	// ins -- The instruction being emitted
4531	// attr -- The emit attribute
4532	// targetReg -- The target register
4533	// op1Reg -- The register of the first operand
4534	// op3Reg -- The register of the third operand
4535	// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
4536	// offs -- The offset added to the memory address from fldHnd
4537	//
4538	// Remarks:
4539	// op2 is built from fldHnd + offs
4540	//
4541	void emitter::emitIns_R_R_C_R(instruction ins,
4542	emitAttr attr,
4543	regNumber targetReg,
4544	regNumber op1Reg,
4545	regNumber op3Reg,
4546	CORINFO_FIELD_HANDLE fldHnd,
4547	int offs)
4548	{
4549	assert(isAvxBlendv(ins));
4550	assert(UseVEXEncoding());
4551
4552	// Static always need relocs
4553	if (!jitStaticFldIsGlobAddr(fldHnd))
4554	{
4555	attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4556	}
4557
4558	int ival = encodeXmmRegAsIval(op3Reg);
4559	instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
4560
4561	id->idIns(ins);
4562	id->idReg1(targetReg);
4563	id->idReg2(op1Reg);
4564
4565	id->idInsFmt(IF_RWR_RRD_MRD_RRD);
4566	id->idAddr()->iiaFieldHnd = fldHnd;
4567
4568	UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
4569	id->idCodeSize(sz);
4570
4571	dispIns(id);
4572	emitCurIGsize += sz;
4573	}
4574
4575	//------------------------------------------------------------------------
4576	// emitIns_R_R_R_S: emits the code for a instruction that takes a register operand, a variable index +
4577	// offset, another register operand, and that returns a value in register
4578	//
4579	// Arguments:
4580	// ins -- The instruction being emitted
4581	// attr -- The emit attribute
4582	// targetReg -- The target register
4583	// op1Reg -- The register of the first operand
4584	// op3Reg -- The register of the third operand
4585	// varx -- The variable index used for the memory address
4586	// offs -- The offset added to the memory address from varx
4587	//
4588	// Remarks:
4589	// op2 is built from varx + offs
4590	//
4591	void emitter::emitIns_R_R_S_R(
4592	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs)
4593	{
4594	assert(isAvxBlendv(ins));
4595	assert(UseVEXEncoding());
4596
4597	int ival = encodeXmmRegAsIval(op3Reg);
4598	instrDesc* id = emitNewInstrCns(attr, ival);
4599
4600	id->idIns(ins);
4601	id->idReg1(targetReg);
4602	id->idReg2(op1Reg);
4603
4604	id->idInsFmt(IF_RWR_RRD_SRD_RRD);
4605	id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
4606
4607	UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
4608	id->idCodeSize(sz);
4609
4610	dispIns(id);
4611	emitCurIGsize += sz;
4612	}
4613
4614	void emitter::emitIns_R_R_R_R(
4615	instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, regNumber reg3)
4616	{
4617	assert(isAvxBlendv(ins));
4618	assert(UseVEXEncoding());
4619	// Currently vex prefix only use three bytes mode.
4620	// size = vex + opcode + ModR/M + 1-byte-cns(Reg) = 3 + 1 + 1 + 1 = 6
4621	// TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
4622	UNATIVE_OFFSET sz = `6`;
4623
4624	int ival = encodeXmmRegAsIval(reg3);
4625	instrDesc* id = emitNewInstrCns(attr, ival);
4626
4627	id->idIns(ins);
4628	id->idInsFmt(IF_RWR_RRD_RRD_RRD);
4629	id->idReg1(targetReg);
4630	id->idReg2(reg1);
4631	id->idReg3(reg2);
4632	id->idReg4(reg3);
4633
4634	id->idCodeSize(sz);
4635	dispIns(id);
4636	emitCurIGsize += sz;
4637	}
4638
4639	/*****************************************************************************
4640	*
4641	* Add an instruction with a register + static member operands.
4642	*/
4643	void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
4644	{
4645	// Static always need relocs
4646	if (!jitStaticFldIsGlobAddr(fldHnd))
4647	{
4648	attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4649	}
4650
4651	emitAttr size = EA_SIZE(attr);
4652
4653	assert(size <= EA_32BYTE);
4654	noway_assert(emitVerifyEncodable(ins, size, reg));
4655
4656	UNATIVE_OFFSET sz;
4657	instrDesc* id;
4658
4659	// Are we MOV'ing the offset of the class variable into EAX?
4660	if (EA_IS_OFFSET(attr))
4661	{
4662	id = emitNewInstrDsp(EA_1BYTE, offs);
4663	id->idIns(ins);
4664	id->idInsFmt(IF_RWR_MRD_OFF);
4665
4666	assert(ins == INS_mov && reg == REG_EAX);
4667
4668	// Special case: "mov eax, [addr]" is smaller
4669	sz = `1` + TARGET_POINTER_SIZE;
4670	}
4671	else
4672	{
4673	insFormat fmt = emitInsModeFormat(ins, IF_RRD_MRD);
4674
4675	id = emitNewInstrDsp(attr, offs);
4676	id->idIns(ins);
4677	id->idInsFmt(fmt);
4678
4679	#ifdef _TARGET_X86_
4680	// Special case: "mov eax, [addr]" is smaller.
4681	// This case is not enabled for amd64 as it always uses RIP relative addressing
4682	// and it results in smaller instruction size than encoding 64-bit addr in the
4683	// instruction.
4684	if (ins == INS_mov && reg == REG_EAX)
4685	{
4686	sz = `1` + TARGET_POINTER_SIZE;
4687	if (size == EA_2BYTE)
4688	sz += `1`;
4689	}
4690	else
4691	#endif //_TARGET_X86_
4692	{
4693	sz = emitInsSizeCV(id, insCodeRM(ins));
4694	}
4695
4696	// Special case: mov reg, fs:[ddd]
4697	if (fldHnd == FLD_GLOBAL_FS)
4698	{
4699	sz += `1`;
4700	}
4701	}
4702
4703	// VEX prefix
4704	sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
4705
4706	// REX prefix
4707	if (TakesRexWPrefix(ins, attr) \|\| IsExtendedReg(reg, attr))
4708	{
4709	sz += emitGetRexPrefixSize(ins);
4710	}
4711
4712	id->idReg1(reg);
4713	id->idCodeSize(sz);
4714
4715	id->idAddr()->iiaFieldHnd = fldHnd;
4716
4717	dispIns(id);
4718	emitCurIGsize += sz;
4719	}
4720
4721	/*****************************************************************************
4722	*
4723	* Add an instruction with a static member + register operands.
4724	*/
4725
4726	void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
4727	{
4728	// Static always need relocs
4729	if (!jitStaticFldIsGlobAddr(fldHnd))
4730	{
4731	attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4732	}
4733
4734	emitAttr size = EA_SIZE(attr);
4735
4736	#if defined(_TARGET_X86_)
4737	// For x86 it is valid to storeind a double sized operand in an xmm reg to memory
4738	assert(size <= EA_8BYTE);
4739	#else
4740	assert(size <= EA_PTRSIZE);
4741	#endif
4742
4743	noway_assert(emitVerifyEncodable(ins, size, reg));
4744
4745	instrDesc* id = emitNewInstrDsp(attr, offs);
4746	insFormat fmt = emitInsModeFormat(ins, IF_MRD_RRD);
4747
4748	id->idIns(ins);
4749	id->idInsFmt(fmt);
4750
4751	UNATIVE_OFFSET sz;
4752
4753	#ifdef _TARGET_X86_
4754	// Special case: "mov [addr], EAX" is smaller.
4755	// This case is not enable for amd64 as it always uses RIP relative addressing
4756	// and it will result in smaller instruction size than encoding 64-bit addr in
4757	// the instruction.
4758	if (ins == INS_mov && reg == REG_EAX)
4759	{
4760	sz = `1` + TARGET_POINTER_SIZE;
4761	if (size == EA_2BYTE)
4762	sz += `1`;
4763	}
4764	else
4765	#endif //_TARGET_X86_
4766	{
4767	sz = emitInsSizeCV(id, insCodeMR(ins));
4768	}
4769
4770	// Special case: mov reg, fs:[ddd]
4771	if (fldHnd == FLD_GLOBAL_FS)
4772	{
4773	sz += `1`;
4774	}
4775
4776	// VEX prefix
4777	sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
4778
4779	// REX prefix
4780	if (TakesRexWPrefix(ins, attr) \|\| IsExtendedReg(reg, attr))
4781	{
4782	sz += emitGetRexPrefixSize(ins);
4783	}
4784
4785	id->idReg1(reg);
4786	id->idCodeSize(sz);
4787
4788	id->idAddr()->iiaFieldHnd = fldHnd;
4789
4790	dispIns(id);
4791	emitCurIGsize += sz;
4792	}
4793
4794	/*****************************************************************************
4795	*
4796	* Add an instruction with a static member + constant.
4797	*/
4798
4799	void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val)
4800	{
4801	// Static always need relocs
4802	if (!jitStaticFldIsGlobAddr(fldHnd))
4803	{
4804	attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4805	}
4806
4807	insFormat fmt;
4808
4809	switch (ins)
4810	{
4811	case INS_rcl_N:
4812	case INS_rcr_N:
4813	case INS_rol_N:
4814	case INS_ror_N:
4815	case INS_shl_N:
4816	case INS_shr_N:
4817	case INS_sar_N:
4818	assert(val != `1`);
4819	fmt = IF_MRW_SHF;
4820	val &= `0x7F`;
4821	break;
4822
4823	default:
4824	fmt = emitInsModeFormat(ins, IF_MRD_CNS);
4825	break;
4826	}
4827
4828	instrDesc* id = emitNewInstrCnsDsp(attr, val, offs);
4829	id->idIns(ins);
4830	id->idInsFmt(fmt);
4831
4832	code_t code = insCodeMI(ins);
4833	UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val);
4834
4835	// Vex prefix
4836	sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
4837
4838	// REX prefix, if not already included in "code"
4839	if (TakesRexWPrefix(ins, attr) && !hasRexPrefix(code))
4840	{
4841	sz += emitGetRexPrefixSize(ins);
4842	}
4843
4844	id->idAddr()->iiaFieldHnd = fldHnd;
4845	id->idCodeSize(sz);
4846
4847	dispIns(id);
4848	emitCurIGsize += sz;
4849	}
4850
4851	void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs)
4852	{
4853	assert(ins == INS_mov);
4854	assert(dst->bbFlags & BBF_JMP_TARGET);
4855
4856	instrDescLbl* id = emitNewInstrLbl();
4857
4858	id->idIns(ins);
4859	id->idInsFmt(IF_SWR_LABEL);
4860	id->idAddr()->iiaBBlabel = dst;
4861
4862	/ The label reference is always long /
4863
4864	id->idjShort = `0`;
4865	id->idjKeepLong = `1`;
4866
4867	/ Record the current IG and offset within it /
4868
4869	id->idjIG = emitCurIG;
4870	id->idjOffs = emitCurIGsize;
4871
4872	/ Append this instruction to this IG's jump list /
4873
4874	id->idjNext = emitCurIGjmpList;
4875	emitCurIGjmpList = id;
4876
4877	UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(id, insCodeMI(ins), varx, offs);
4878	id->dstLclVar.initLclVarAddr(varx, offs);
4879	#ifdef DEBUG
4880	id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
4881	#endif
4882
4883	#if EMITTER_STATS
4884	emitTotalIGjmps++;
4885	#endif
4886
4887	#ifndef _TARGET_AMD64_
4888	// Storing the address of a basicBlock will need a reloc
4889	// as the instruction uses the absolute address,
4890	// not a relative address.
4891	//
4892	// On Amd64, Absolute code addresses should always go through a reloc to
4893	// to be encoded as RIP rel32 offset.
4894	if (emitComp->opts.compReloc)
4895	#endif
4896	{
4897	id->idSetIsDspReloc();
4898	}
4899
4900	id->idCodeSize(sz);
4901
4902	dispIns(id);
4903	emitCurIGsize += sz;
4904	}
4905
4906	/*****************************************************************************
4907	*
4908	* Add a label instruction.
4909	*/
4910	void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
4911	{
4912	assert(ins == INS_lea);
4913	assert(dst->bbFlags & BBF_JMP_TARGET);
4914
4915	instrDescJmp* id = emitNewInstrJmp();
4916
4917	id->idIns(ins);
4918	id->idReg1(reg);
4919	id->idInsFmt(IF_RWR_LABEL);
4920	id->idOpSize(EA_SIZE(attr)); // emitNewInstrJmp() sets the size (incorrectly) to EA_1BYTE
4921	id->idAddr()->iiaBBlabel = dst;
4922
4923	/ The label reference is always long /
4924
4925	id->idjShort = `0`;
4926	id->idjKeepLong = `1`;
4927
4928	/ Record the current IG and offset within it /
4929
4930	id->idjIG = emitCurIG;
4931	id->idjOffs = emitCurIGsize;
4932
4933	/ Append this instruction to this IG's jump list /
4934
4935	id->idjNext = emitCurIGjmpList;
4936	emitCurIGjmpList = id;
4937
4938	#ifdef DEBUG
4939	// Mark the catch return
4940	if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
4941	{
4942	id->idDebugOnlyInfo()->idCatchRet = true;
4943	}
4944	#endif // DEBUG
4945
4946	#if EMITTER_STATS
4947	emitTotalIGjmps++;
4948	#endif
4949
4950	// Set the relocation flags - these give hint to zap to perform
4951	// relocation of the specified 32bit address.
4952	//
4953	// Note the relocation flags influence the size estimate.
4954	id->idSetRelocFlags(attr);
4955
4956	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
4957	id->idCodeSize(sz);
4958
4959	dispIns(id);
4960	emitCurIGsize += sz;
4961	}
4962
4963	/*****************************************************************************
4964	*
4965	* The following adds instructions referencing address modes.
4966	*/
4967
4968	void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int disp)
4969	{
4970	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
4971
4972	#ifdef _TARGET_AMD64_
4973	// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
4974	// all other opcodes take a sign-extended 4-byte immediate
4975	noway_assert(EA_SIZE(attr) < EA_8BYTE \|\| !EA_IS_CNS_RELOC(attr));
4976	#endif
4977
4978	insFormat fmt;
4979
4980	switch (ins)
4981	{
4982	case INS_rcl_N:
4983	case INS_rcr_N:
4984	case INS_rol_N:
4985	case INS_ror_N:
4986	case INS_shl_N:
4987	case INS_shr_N:
4988	case INS_sar_N:
4989	assert(val != `1`);
4990	fmt = IF_ARW_SHF;
4991	val &= `0x7F`;
4992	break;
4993
4994	default:
4995	fmt = emitInsModeFormat(ins, IF_ARD_CNS);
4996	break;
4997	}
4998
4999	/*
5000	Useful if you want to trap moves with 0 constant
5001	if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
5002	{
5003	printf("MOV 0\n");
5004	}
5005	*/
5006
5007	UNATIVE_OFFSET sz;
5008	instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5009	id->idIns(ins);
5010	id->idInsFmt(fmt);
5011
5012	id->idAddr()->iiaAddrMode.amBaseReg = reg;
5013	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5014
5015	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5016
5017	sz = emitInsSizeAM(id, insCodeMI(ins), val);
5018	id->idCodeSize(sz);
5019
5020	dispIns(id);
5021	emitCurIGsize += sz;
5022	}
5023
5024	void emitter::emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp)
5025	{
5026	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5027
5028	#ifdef _TARGET_AMD64_
5029	// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5030	// all other opcodes take a sign-extended 4-byte immediate
5031	noway_assert(EA_SIZE(attr) < EA_8BYTE \|\| !EA_IS_CNS_RELOC(attr));
5032	#endif
5033
5034	insFormat fmt;
5035
5036	switch (ins)
5037	{
5038	case INS_rcl_N:
5039	case INS_rcr_N:
5040	case INS_rol_N:
5041	case INS_ror_N:
5042	case INS_shl_N:
5043	case INS_shr_N:
5044	case INS_sar_N:
5045	assert(val != `1`);
5046	fmt = IF_ARW_SHF;
5047	val &= `0x7F`;
5048	break;
5049
5050	default:
5051	fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5052	break;
5053	}
5054
5055	/*
5056	Useful if you want to trap moves with 0 constant
5057	if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
5058	{
5059	printf("MOV 0\n");
5060	}
5061	*/
5062
5063	UNATIVE_OFFSET sz;
5064	instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5065	id->idIns(ins);
5066	id->idInsFmt(fmt);
5067
5068	id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5069	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5070
5071	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5072
5073	sz = emitInsSizeAM(id, insCodeMI(ins), val);
5074	id->idCodeSize(sz);
5075
5076	dispIns(id);
5077	emitCurIGsize += sz;
5078	}
5079
5080	void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
5081	{
5082	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE) && (ireg != REG_NA));
5083	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5084
5085	if (ins == INS_lea)
5086	{
5087	if (ireg == base && disp == `0`)
5088	{
5089	// Maybe the emitter is not the common place for this optimization, but it's a better choke point
5090	// for all the emitIns(ins, tree), we would have to be analyzing at each call site
5091	//
5092	return;
5093	}
5094	}
5095
5096	UNATIVE_OFFSET sz;
5097	instrDesc* id = emitNewInstrAmd(attr, disp);
5098	insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5099
5100	id->idIns(ins);
5101	id->idInsFmt(fmt);
5102	id->idReg1(ireg);
5103
5104	id->idAddr()->iiaAddrMode.amBaseReg = base;
5105	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5106
5107	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5108
5109	sz = emitInsSizeAM(id, insCodeRM(ins));
5110
5111	if (Is4ByteSSEInstruction(ins))
5112	{
5113	// The 4-Byte SSE instructions require two additional bytes
5114	sz += `2`;
5115	}
5116
5117	id->idCodeSize(sz);
5118
5119	dispIns(id);
5120	emitCurIGsize += sz;
5121	}
5122
5123	void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5124	{
5125	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5126	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5127
5128	UNATIVE_OFFSET sz;
5129	instrDesc* id = emitNewInstrAmd(attr, disp);
5130	insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5131
5132	id->idIns(ins);
5133	id->idInsFmt(fmt);
5134	id->idReg1(ireg);
5135
5136	id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5137	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5138
5139	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5140
5141	sz = emitInsSizeAM(id, insCodeRM(ins));
5142	id->idCodeSize(sz);
5143
5144	dispIns(id);
5145	emitCurIGsize += sz;
5146	}
5147
5148	void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp)
5149	{
5150	UNATIVE_OFFSET sz;
5151	instrDesc* id = emitNewInstrAmd(attr, disp);
5152	insFormat fmt;
5153
5154	if (ireg == REG_NA)
5155	{
5156	fmt = emitInsModeFormat(ins, IF_ARD);
5157	}
5158	else
5159	{
5160	fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5161
5162	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE));
5163	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5164
5165	id->idReg1(ireg);
5166	}
5167
5168	id->idIns(ins);
5169	id->idInsFmt(fmt);
5170
5171	id->idAddr()->iiaAddrMode.amBaseReg = base;
5172	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5173
5174	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5175
5176	sz = emitInsSizeAM(id, insCodeMR(ins));
5177	id->idCodeSize(sz);
5178
5179	dispIns(id);
5180	emitCurIGsize += sz;
5181
5182	emitAdjustStackDepthPushPop(ins);
5183	}
5184
5185	void emitter::emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int disp, regNumber ireg, int ival)
5186	{
5187	assert(ins == INS_vextracti128 \|\| ins == INS_vextractf128);
5188	assert(base != REG_NA);
5189	assert(ireg != REG_NA);
5190	instrDesc* id = emitNewInstrAmdCns(attr, disp, ival);
5191
5192	id->idIns(ins);
5193	id->idInsFmt(IF_AWR_RRD_CNS);
5194	id->idAddr()->iiaAddrMode.amBaseReg = base;
5195	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5196	id->idReg1(ireg);
5197
5198	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5199
5200	UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins), ival);
5201	id->idCodeSize(sz);
5202
5203	dispIns(id);
5204	emitCurIGsize += sz;
5205	}
5206
5207	void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
5208	{
5209	UNATIVE_OFFSET sz;
5210	instrDesc* id = emitNewInstrAmd(attr, disp);
5211	insFormat fmt;
5212
5213	if (ireg == REG_NA)
5214	{
5215	fmt = emitInsModeFormat(ins, IF_ARD);
5216	}
5217	else
5218	{
5219	fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5220
5221	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5222	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5223
5224	id->idReg1(ireg);
5225	}
5226
5227	id->idIns(ins);
5228	id->idInsFmt(fmt);
5229
5230	id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5231	id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
5232
5233	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5234
5235	sz = emitInsSizeAM(id, insCodeMR(ins));
5236	id->idCodeSize(sz);
5237
5238	dispIns(id);
5239	emitCurIGsize += sz;
5240
5241	emitAdjustStackDepthPushPop(ins);
5242	}
5243
5244	void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp)
5245	{
5246	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5247
5248	#ifdef _TARGET_AMD64_
5249	// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5250	// all other opcodes take a sign-extended 4-byte immediate
5251	noway_assert(EA_SIZE(attr) < EA_8BYTE \|\| !EA_IS_CNS_RELOC(attr));
5252	#endif
5253
5254	insFormat fmt;
5255
5256	switch (ins)
5257	{
5258	case INS_rcl_N:
5259	case INS_rcr_N:
5260	case INS_rol_N:
5261	case INS_ror_N:
5262	case INS_shl_N:
5263	case INS_shr_N:
5264	case INS_sar_N:
5265	assert(val != `1`);
5266	fmt = IF_ARW_SHF;
5267	val &= `0x7F`;
5268	break;
5269
5270	default:
5271	fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5272	break;
5273	}
5274
5275	UNATIVE_OFFSET sz;
5276	instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5277	id->idIns(ins);
5278	id->idInsFmt(fmt);
5279
5280	id->idAddr()->iiaAddrMode.amBaseReg = reg;
5281	id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5282	id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1;
5283
5284	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5285
5286	sz = emitInsSizeAM(id, insCodeMI(ins), val);
5287	id->idCodeSize(sz);
5288
5289	dispIns(id);
5290	emitCurIGsize += sz;
5291	}
5292
5293	void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, int disp)
5294	{
5295	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5296	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5297
5298	UNATIVE_OFFSET sz;
5299	instrDesc* id = emitNewInstrAmd(attr, disp);
5300	insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5301
5302	id->idIns(ins);
5303	id->idInsFmt(fmt);
5304	id->idReg1(ireg);
5305
5306	id->idAddr()->iiaAddrMode.amBaseReg = base;
5307	id->idAddr()->iiaAddrMode.amIndxReg = index;
5308	id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1;
5309
5310	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5311
5312	sz = emitInsSizeAM(id, insCodeRM(ins));
5313	id->idCodeSize(sz);
5314
5315	dispIns(id);
5316	emitCurIGsize += sz;
5317	}
5318
5319	void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber index, int disp)
5320	{
5321	UNATIVE_OFFSET sz;
5322	instrDesc* id = emitNewInstrAmd(attr, disp);
5323	insFormat fmt;
5324
5325	if (ireg == REG_NA)
5326	{
5327	fmt = emitInsModeFormat(ins, IF_ARD);
5328	}
5329	else
5330	{
5331	fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5332
5333	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5334	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5335
5336	id->idReg1(ireg);
5337	}
5338
5339	id->idIns(ins);
5340	id->idInsFmt(fmt);
5341
5342	id->idAddr()->iiaAddrMode.amBaseReg = reg;
5343	id->idAddr()->iiaAddrMode.amIndxReg = index;
5344	id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(`1`);
5345
5346	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5347
5348	sz = emitInsSizeAM(id, insCodeMR(ins));
5349	id->idCodeSize(sz);
5350
5351	dispIns(id);
5352	emitCurIGsize += sz;
5353
5354	emitAdjustStackDepthPushPop(ins);
5355	}
5356
5357	void emitter::emitIns_I_ARX(
5358	instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp)
5359	{
5360	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5361
5362	#ifdef _TARGET_AMD64_
5363	// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5364	// all other opcodes take a sign-extended 4-byte immediate
5365	noway_assert(EA_SIZE(attr) < EA_8BYTE \|\| !EA_IS_CNS_RELOC(attr));
5366	#endif
5367
5368	insFormat fmt;
5369
5370	switch (ins)
5371	{
5372	case INS_rcl_N:
5373	case INS_rcr_N:
5374	case INS_rol_N:
5375	case INS_ror_N:
5376	case INS_shl_N:
5377	case INS_shr_N:
5378	case INS_sar_N:
5379	assert(val != `1`);
5380	fmt = IF_ARW_SHF;
5381	val &= `0x7F`;
5382	break;
5383
5384	default:
5385	fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5386	break;
5387	}
5388
5389	UNATIVE_OFFSET sz;
5390	instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5391
5392	id->idIns(ins);
5393	id->idInsFmt(fmt);
5394
5395	id->idAddr()->iiaAddrMode.amBaseReg = reg;
5396	id->idAddr()->iiaAddrMode.amIndxReg = rg2;
5397	id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5398
5399	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5400
5401	sz = emitInsSizeAM(id, insCodeMI(ins), val);
5402	id->idCodeSize(sz);
5403
5404	dispIns(id);
5405	emitCurIGsize += sz;
5406	}
5407
5408	void emitter::emitIns_R_ARX(
5409	instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5410	{
5411	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5412	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5413
5414	UNATIVE_OFFSET sz;
5415	instrDesc* id = emitNewInstrAmd(attr, disp);
5416	insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5417
5418	id->idIns(ins);
5419	id->idInsFmt(fmt);
5420	id->idReg1(ireg);
5421
5422	id->idAddr()->iiaAddrMode.amBaseReg = base;
5423	id->idAddr()->iiaAddrMode.amIndxReg = index;
5424	id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5425
5426	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5427
5428	sz = emitInsSizeAM(id, insCodeRM(ins));
5429	id->idCodeSize(sz);
5430
5431	dispIns(id);
5432	emitCurIGsize += sz;
5433	}
5434
5435	void emitter::emitIns_ARX_R(
5436	instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
5437	{
5438	UNATIVE_OFFSET sz;
5439	instrDesc* id = emitNewInstrAmd(attr, disp);
5440	insFormat fmt;
5441
5442	if (ireg == REG_NA)
5443	{
5444	fmt = emitInsModeFormat(ins, IF_ARD);
5445	}
5446	else
5447	{
5448	fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5449
5450	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5451	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5452
5453	id->idReg1(ireg);
5454	}
5455
5456	id->idIns(ins);
5457	id->idInsFmt(fmt);
5458
5459	id->idAddr()->iiaAddrMode.amBaseReg = base;
5460	id->idAddr()->iiaAddrMode.amIndxReg = index;
5461	id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5462
5463	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5464
5465	sz = emitInsSizeAM(id, insCodeMR(ins));
5466	id->idCodeSize(sz);
5467
5468	dispIns(id);
5469	emitCurIGsize += sz;
5470
5471	emitAdjustStackDepthPushPop(ins);
5472	}
5473
5474	void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp)
5475	{
5476	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5477
5478	#ifdef _TARGET_AMD64_
5479	// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
5480	// all other opcodes take a sign-extended 4-byte immediate
5481	noway_assert(EA_SIZE(attr) < EA_8BYTE \|\| !EA_IS_CNS_RELOC(attr));
5482	#endif
5483
5484	insFormat fmt;
5485
5486	switch (ins)
5487	{
5488	case INS_rcl_N:
5489	case INS_rcr_N:
5490	case INS_rol_N:
5491	case INS_ror_N:
5492	case INS_shl_N:
5493	case INS_shr_N:
5494	case INS_sar_N:
5495	assert(val != `1`);
5496	fmt = IF_ARW_SHF;
5497	val &= `0x7F`;
5498	break;
5499
5500	default:
5501	fmt = emitInsModeFormat(ins, IF_ARD_CNS);
5502	break;
5503	}
5504
5505	UNATIVE_OFFSET sz;
5506	instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
5507	id->idIns(ins);
5508	id->idInsFmt(fmt);
5509
5510	id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5511	id->idAddr()->iiaAddrMode.amIndxReg = reg;
5512	id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5513
5514	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5515
5516	sz = emitInsSizeAM(id, insCodeMI(ins), val);
5517	id->idCodeSize(sz);
5518
5519	dispIns(id);
5520	emitCurIGsize += sz;
5521	}
5522
5523	void emitter::emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5524	{
5525	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
5526	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5527
5528	UNATIVE_OFFSET sz;
5529	instrDesc* id = emitNewInstrAmd(attr, disp);
5530	insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
5531
5532	id->idIns(ins);
5533	id->idInsFmt(fmt);
5534	id->idReg1(ireg);
5535
5536	id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5537	id->idAddr()->iiaAddrMode.amIndxReg = reg;
5538	id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5539
5540	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5541
5542	sz = emitInsSizeAM(id, insCodeRM(ins));
5543	id->idCodeSize(sz);
5544
5545	dispIns(id);
5546	emitCurIGsize += sz;
5547	}
5548
5549	void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
5550	{
5551	UNATIVE_OFFSET sz;
5552	instrDesc* id = emitNewInstrAmd(attr, disp);
5553	insFormat fmt;
5554
5555	if (ireg == REG_NA)
5556	{
5557	fmt = emitInsModeFormat(ins, IF_ARD);
5558	}
5559	else
5560	{
5561	fmt = emitInsModeFormat(ins, IF_ARD_RRD);
5562	noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
5563	assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
5564
5565	id->idReg1(ireg);
5566	}
5567
5568	id->idIns(ins);
5569	id->idInsFmt(fmt);
5570
5571	id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
5572	id->idAddr()->iiaAddrMode.amIndxReg = reg;
5573	id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
5574
5575	assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
5576
5577	sz = emitInsSizeAM(id, insCodeMR(ins));
5578	id->idCodeSize(sz);
5579
5580	dispIns(id);
5581	emitCurIGsize += sz;
5582
5583	emitAdjustStackDepthPushPop(ins);
5584	}
5585
5586	#ifdef FEATURE_HW_INTRINSICS
5587	//------------------------------------------------------------------------
5588	// emitIns_SIMD_R_R_I: emits the code for a SIMD instruction that takes a register operand, an immediate operand
5589	// and that returns a value in register
5590	//
5591	// Arguments:
5592	// ins -- The instruction being emitted
5593	// attr -- The emit attribute
5594	// targetReg -- The target register
5595	// op1Reg -- The register of the first operand
5596	// ival -- The immediate value
5597	//
5598	void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival)
5599	{
5600	if (UseVEXEncoding() \|\| IsDstSrcImmAvxInstruction(ins))
5601	{
5602	emitIns_R_R_I(ins, attr, targetReg, op1Reg, ival);
5603	}
5604	else
5605	{
5606	if (op1Reg != targetReg)
5607	{
5608	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5609	}
5610	emitIns_R_I(ins, attr, targetReg, ival);
5611	}
5612	}
5613
5614	//------------------------------------------------------------------------
5615	// emitIns_SIMD_R_R_A: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
5616	// and that returns a value in register
5617	//
5618	// Arguments:
5619	// ins -- The instruction being emitted
5620	// attr -- The emit attribute
5621	// targetReg -- The target register
5622	// op1Reg -- The register of the first operand
5623	// indir -- The GenTreeIndir used for the memory address
5624	//
5625	void emitter::emitIns_SIMD_R_R_A(
5626	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir)
5627	{
5628	if (UseVEXEncoding())
5629	{
5630	emitIns_R_R_A(ins, attr, targetReg, op1Reg, indir);
5631	}
5632	else
5633	{
5634	if (op1Reg != targetReg)
5635	{
5636	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5637	}
5638	emitIns_R_A(ins, attr, targetReg, indir);
5639	}
5640	}
5641
5642	//------------------------------------------------------------------------
5643	// emitIns_SIMD_R_R_AR: emits the code for a SIMD instruction that takes a register operand, a base memory register,
5644	// and that returns a value in register
5645	//
5646	// Arguments:
5647	// ins -- The instruction being emitted
5648	// attr -- The emit attribute
5649	// targetReg -- The target register
5650	// op1Reg -- The register of the first operand
5651	// base -- The base register used for the memory address
5652	//
5653	void emitter::emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base)
5654	{
5655	if (UseVEXEncoding())
5656	{
5657	emitIns_R_R_AR(ins, attr, targetReg, op1Reg, base, `0`);
5658	}
5659	else
5660	{
5661	if (op1Reg != targetReg)
5662	{
5663	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5664	}
5665	emitIns_R_AR(ins, attr, targetReg, base, `0`);
5666	}
5667	}
5668
5669	//------------------------------------------------------------------------
5670	// emitIns_SIMD_R_R_C: emits the code for a SIMD instruction that takes a register operand, a field handle + offset,
5671	// and that returns a value in register
5672	//
5673	// Arguments:
5674	// ins -- The instruction being emitted
5675	// attr -- The emit attribute
5676	// targetReg -- The target register
5677	// op1Reg -- The register of the first operand
5678	// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
5679	// offs -- The offset added to the memory address from fldHnd
5680	//
5681	void emitter::emitIns_SIMD_R_R_C(
5682	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
5683	{
5684	if (UseVEXEncoding())
5685	{
5686	emitIns_R_R_C(ins, attr, targetReg, op1Reg, fldHnd, offs);
5687	}
5688	else
5689	{
5690	if (op1Reg != targetReg)
5691	{
5692	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5693	}
5694	emitIns_R_C(ins, attr, targetReg, fldHnd, offs);
5695	}
5696	}
5697
5698	//------------------------------------------------------------------------
5699	// emitIns_SIMD_R_R_R: emits the code for a SIMD instruction that takes two register operands, and that returns a
5700	// value in register
5701	//
5702	// Arguments:
5703	// ins -- The instruction being emitted
5704	// attr -- The emit attribute
5705	// targetReg -- The target register
5706	// op1Reg -- The register of the first operand
5707	// op2Reg -- The register of the second operand
5708	//
5709	void emitter::emitIns_SIMD_R_R_R(
5710	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg)
5711	{
5712	if (UseVEXEncoding())
5713	{
5714	emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg);
5715	}
5716	else
5717	{
5718	if (op1Reg != targetReg)
5719	{
5720	// Ensure we aren't overwriting op2
5721	assert(op2Reg != targetReg);
5722
5723	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5724	}
5725	emitIns_R_R(ins, attr, targetReg, op2Reg);
5726	}
5727	}
5728
5729	//------------------------------------------------------------------------
5730	// emitIns_SIMD_R_R_S: emits the code for a SIMD instruction that takes a register operand, a variable index + offset,
5731	// and that returns a value in register
5732	//
5733	// Arguments:
5734	// ins -- The instruction being emitted
5735	// attr -- The emit attribute
5736	// targetReg -- The target register
5737	// op1Reg -- The register of the first operand
5738	// varx -- The variable index used for the memory address
5739	// offs -- The offset added to the memory address from varx
5740	//
5741	void emitter::emitIns_SIMD_R_R_S(
5742	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs)
5743	{
5744	if (UseVEXEncoding())
5745	{
5746	emitIns_R_R_S(ins, attr, targetReg, op1Reg, varx, offs);
5747	}
5748	else
5749	{
5750	if (op1Reg != targetReg)
5751	{
5752	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5753	}
5754	emitIns_R_S(ins, attr, targetReg, varx, offs);
5755	}
5756	}
5757
5758	//------------------------------------------------------------------------
5759	// emitIns_SIMD_R_R_A_I: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
5760	// an immediate operand, and that returns a value in register
5761	//
5762	// Arguments:
5763	// ins -- The instruction being emitted
5764	// attr -- The emit attribute
5765	// targetReg -- The target register
5766	// op1Reg -- The register of the first operand
5767	// indir -- The GenTreeIndir used for the memory address
5768	// ival -- The immediate value
5769	//
5770	void emitter::emitIns_SIMD_R_R_A_I(
5771	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir, int ival)
5772	{
5773	if (UseVEXEncoding())
5774	{
5775	emitIns_R_R_A_I(ins, attr, targetReg, op1Reg, indir, ival, IF_RWR_RRD_ARD_CNS);
5776	}
5777	else
5778	{
5779	if (op1Reg != targetReg)
5780	{
5781	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5782	}
5783	emitIns_R_A_I(ins, attr, targetReg, indir, ival);
5784	}
5785	}
5786
5787	//------------------------------------------------------------------------
5788	// emitIns_SIMD_R_R_AR_I: emits the code for a SIMD instruction that takes a register operand, a base memory register,
5789	// an immediate operand, and that returns a value in register
5790	//
5791	// Arguments:
5792	// ins -- The instruction being emitted
5793	// attr -- The emit attribute
5794	// targetReg -- The target register
5795	// op1Reg -- The register of the first operand
5796	// base -- The base register used for the memory address
5797	// ival -- The immediate value
5798	//
5799	void emitter::emitIns_SIMD_R_R_AR_I(
5800	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber base, int ival)
5801	{
5802	if (UseVEXEncoding())
5803	{
5804	emitIns_R_R_AR_I(ins, attr, targetReg, op1Reg, base, `0`, ival);
5805	}
5806	else
5807	{
5808	if (op1Reg != targetReg)
5809	{
5810	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5811	}
5812	emitIns_R_AR_I(ins, attr, targetReg, base, `0`, ival);
5813	}
5814	}
5815
5816	//------------------------------------------------------------------------
5817	// emitIns_SIMD_R_R_C_I: emits the code for a SIMD instruction that takes a register operand, a field handle + offset,
5818	// an immediate operand, and that returns a value in register
5819	//
5820	// Arguments:
5821	// ins -- The instruction being emitted
5822	// attr -- The emit attribute
5823	// targetReg -- The target register
5824	// op1Reg -- The register of the first operand
5825	// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
5826	// offs -- The offset added to the memory address from fldHnd
5827	// ival -- The immediate value
5828	//
5829	void emitter::emitIns_SIMD_R_R_C_I(instruction ins,
5830	emitAttr attr,
5831	regNumber targetReg,
5832	regNumber op1Reg,
5833	CORINFO_FIELD_HANDLE fldHnd,
5834	int offs,
5835	int ival)
5836	{
5837	if (UseVEXEncoding())
5838	{
5839	emitIns_R_R_C_I(ins, attr, targetReg, op1Reg, fldHnd, offs, ival);
5840	}
5841	else
5842	{
5843	if (op1Reg != targetReg)
5844	{
5845	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5846	}
5847	emitIns_R_C_I(ins, attr, targetReg, fldHnd, offs, ival);
5848	}
5849	}
5850
5851	//------------------------------------------------------------------------
5852	// emitIns_SIMD_R_R_R_I: emits the code for a SIMD instruction that takes two register operands, an immediate operand,
5853	// and that returns a value in register
5854	//
5855	// Arguments:
5856	// ins -- The instruction being emitted
5857	// attr -- The emit attribute
5858	// targetReg -- The target register
5859	// op1Reg -- The register of the first operand
5860	// op2Reg -- The register of the second operand
5861	// ival -- The immediate value
5862	//
5863	void emitter::emitIns_SIMD_R_R_R_I(
5864	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int ival)
5865	{
5866	if (UseVEXEncoding())
5867	{
5868	emitIns_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, ival);
5869	}
5870	else
5871	{
5872	if (op1Reg != targetReg)
5873	{
5874	// Ensure we aren't overwriting op2
5875	assert(op2Reg != targetReg);
5876
5877	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5878	}
5879	emitIns_R_R_I(ins, attr, targetReg, op2Reg, ival);
5880	}
5881	}
5882
5883	//------------------------------------------------------------------------
5884	// emitIns_SIMD_R_R_S_I: emits the code for a SIMD instruction that takes a register operand, a variable index + offset,
5885	// an imediate operand, and that returns a value in register
5886	//
5887	// Arguments:
5888	// ins -- The instruction being emitted
5889	// attr -- The emit attribute
5890	// targetReg -- The target register
5891	// op1Reg -- The register of the first operand
5892	// varx -- The variable index used for the memory address
5893	// offs -- The offset added to the memory address from varx
5894	// ival -- The immediate value
5895	//
5896	void emitter::emitIns_SIMD_R_R_S_I(
5897	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs, int ival)
5898	{
5899	if (UseVEXEncoding())
5900	{
5901	emitIns_R_R_S_I(ins, attr, targetReg, op1Reg, varx, offs, ival);
5902	}
5903	else
5904	{
5905	if (op1Reg != targetReg)
5906	{
5907	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5908	}
5909	emitIns_R_S_I(ins, attr, targetReg, varx, offs, ival);
5910	}
5911	}
5912
5913	//------------------------------------------------------------------------
5914	// emitIns_SIMD_R_R_R_A: emits the code for a SIMD instruction that takes two register operands, a GenTreeIndir address,
5915	// and that returns a value in register
5916	//
5917	// Arguments:
5918	// ins -- The instruction being emitted
5919	// attr -- The emit attribute
5920	// targetReg -- The target register
5921	// op1Reg -- The register of the first operand
5922	// op2Reg -- The register of the second operand
5923	// indir -- The GenTreeIndir used for the memory address
5924	//
5925	void emitter::emitIns_SIMD_R_R_R_A(
5926	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir)
5927	{
5928	assert(IsFMAInstruction(ins));
5929	assert(UseVEXEncoding());
5930
5931	if (op1Reg != targetReg)
5932	{
5933	// Ensure we aren't overwriting op2
5934	assert(op2Reg != targetReg);
5935
5936	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5937	}
5938
5939	emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir);
5940	}
5941
5942	//------------------------------------------------------------------------
5943	// emitIns_SIMD_R_R_R_AR: emits the code for a SIMD instruction that takes two register operands, a base memory
5944	// register, and that returns a value in register
5945	//
5946	// Arguments:
5947	// ins -- The instruction being emitted
5948	// attr -- The emit attribute
5949	// targetReg -- The target register
5950	// op1Reg -- The register of the first operands
5951	// op2Reg -- The register of the second operand
5952	// base -- The base register used for the memory address
5953	//
5954	void emitter::emitIns_SIMD_R_R_R_AR(
5955	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber base)
5956	{
5957	assert(IsFMAInstruction(ins));
5958	assert(UseVEXEncoding());
5959
5960	if (op1Reg != targetReg)
5961	{
5962	// Ensure we aren't overwriting op2
5963	assert(op2Reg != targetReg);
5964
5965	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
5966	}
5967
5968	emitIns_R_R_AR(ins, attr, targetReg, op2Reg, base, `0`);
5969	}
5970
5971	//------------------------------------------------------------------------
5972	// emitIns_SIMD_R_R_R_C: emits the code for a SIMD instruction that takes two register operands, a field handle +
5973	// offset, and that returns a value in register
5974	//
5975	// Arguments:
5976	// ins -- The instruction being emitted
5977	// attr -- The emit attribute
5978	// targetReg -- The target register
5979	// op1Reg -- The register of the first operand
5980	// op2Reg -- The register of the second operand
5981	// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
5982	// offs -- The offset added to the memory address from fldHnd
5983	//
5984	void emitter::emitIns_SIMD_R_R_R_C(instruction ins,
5985	emitAttr attr,
5986	regNumber targetReg,
5987	regNumber op1Reg,
5988	regNumber op2Reg,
5989	CORINFO_FIELD_HANDLE fldHnd,
5990	int offs)
5991	{
5992	assert(IsFMAInstruction(ins));
5993	assert(UseVEXEncoding());
5994
5995	if (op1Reg != targetReg)
5996	{
5997	// Ensure we aren't overwriting op2
5998	assert(op2Reg != targetReg);
5999
6000	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6001	}
6002
6003	emitIns_R_R_C(ins, attr, targetReg, op2Reg, fldHnd, offs);
6004	}
6005
6006	//------------------------------------------------------------------------
6007	// emitIns_SIMD_R_R_R_R: emits the code for a SIMD instruction that takes three register operands, and that returns a
6008	// value in register
6009	//
6010	// Arguments:
6011	// ins -- The instruction being emitted
6012	// attr -- The emit attribute
6013	// targetReg -- The target register
6014	// op1Reg -- The register of the first operand
6015	// op2Reg -- The register of the second operand
6016	// op3Reg -- The register of the second operand
6017	//
6018	void emitter::emitIns_SIMD_R_R_R_R(
6019	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg)
6020	{
6021	if (IsFMAInstruction(ins))
6022	{
6023	assert(UseVEXEncoding());
6024
6025	if (op1Reg != targetReg)
6026	{
6027	// Ensure we aren't overwriting op2 or op3
6028
6029	assert(op2Reg != targetReg);
6030	assert(op3Reg != targetReg);
6031
6032	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6033	}
6034
6035	emitIns_R_R_R(ins, attr, targetReg, op2Reg, op3Reg);
6036	}
6037	else if (UseVEXEncoding())
6038	{
6039	assert(isAvxBlendv(ins) \|\| isSse41Blendv(ins));
6040
6041	// convert SSE encoding of SSE4.1 instructions to VEX encoding
6042	switch (ins)
6043	{
6044	case INS_blendvps:
6045	ins = INS_vblendvps;
6046	break;
6047	case INS_blendvpd:
6048	ins = INS_vblendvpd;
6049	break;
6050	case INS_pblendvb:
6051	ins = INS_vpblendvb;
6052	break;
6053	default:
6054	break;
6055	}
6056	emitIns_R_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, op3Reg);
6057	}
6058	else
6059	{
6060	assert(isSse41Blendv(ins));
6061	// SSE4.1 blendv hardcode the mask vector (op3) in XMM0*
6062	if (op3Reg != REG_XMM0)
6063	{
6064	// Ensure we aren't overwriting op1 or op2
6065	assert(op1Reg != REG_XMM0);
6066	assert(op2Reg != REG_XMM0);
6067
6068	emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6069	}
6070	if (op1Reg != targetReg)
6071	{
6072	// Ensure we aren't overwriting op2 or oop3 (which should be REG_XMM0)
6073	assert(op2Reg != targetReg);
6074	assert(targetReg != REG_XMM0);
6075
6076	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6077	}
6078	emitIns_R_R(ins, attr, targetReg, op2Reg);
6079	}
6080	}
6081
6082	//------------------------------------------------------------------------
6083	// emitIns_SIMD_R_R_R_S: emits the code for a SIMD instruction that takes two register operands, a variable index +
6084	// offset, and that returns a value in register
6085	//
6086	// Arguments:
6087	// ins -- The instruction being emitted
6088	// attr -- The emit attribute
6089	// targetReg -- The target register
6090	// op1Reg -- The register of the first operand
6091	// op2Reg -- The register of the second operand
6092	// varx -- The variable index used for the memory address
6093	// offs -- The offset added to the memory address from varx
6094	//
6095	void emitter::emitIns_SIMD_R_R_R_S(
6096	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs)
6097	{
6098	assert(IsFMAInstruction(ins));
6099	assert(UseVEXEncoding());
6100
6101	if (op1Reg != targetReg)
6102	{
6103	// Ensure we aren't overwriting op2
6104	assert(op2Reg != targetReg);
6105
6106	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6107	}
6108
6109	emitIns_R_R_S(ins, attr, targetReg, op2Reg, varx, offs);
6110	}
6111
6112	//------------------------------------------------------------------------
6113	// emitIns_SIMD_R_R_A_R: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
6114	// another register operand, and that returns a value in register
6115	//
6116	// Arguments:
6117	// ins -- The instruction being emitted
6118	// attr -- The emit attribute
6119	// targetReg -- The target register
6120	// op1Reg -- The register of the first operand
6121	// op3Reg -- The register of the third operand
6122	// indir -- The GenTreeIndir used for the memory address
6123	//
6124	void emitter::emitIns_SIMD_R_R_A_R(
6125	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir)
6126	{
6127	if (UseVEXEncoding())
6128	{
6129	assert(isAvxBlendv(ins) \|\| isSse41Blendv(ins));
6130
6131	// convert SSE encoding of SSE4.1 instructions to VEX encoding
6132	switch (ins)
6133	{
6134	case INS_blendvps:
6135	{
6136	ins = INS_vblendvps;
6137	break;
6138	}
6139
6140	case INS_blendvpd:
6141	{
6142	ins = INS_vblendvpd;
6143	break;
6144	}
6145
6146	case INS_pblendvb:
6147	{
6148	ins = INS_vpblendvb;
6149	break;
6150	}
6151
6152	default:
6153	{
6154	break;
6155	}
6156	}
6157
6158	emitIns_R_R_A_R(ins, attr, targetReg, op1Reg, op3Reg, indir);
6159	}
6160	else
6161	{
6162	assert(isSse41Blendv(ins));
6163
6164	// SSE4.1 blendv hardcode the mask vector (op3) in XMM0*
6165	if (op3Reg != REG_XMM0)
6166	{
6167	// Ensure we aren't overwriting op1
6168	assert(op1Reg != REG_XMM0);
6169
6170	emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6171	}
6172	if (op1Reg != targetReg)
6173	{
6174	// Ensure we aren't overwriting op3 (which should be REG_XMM0)
6175	assert(targetReg != REG_XMM0);
6176
6177	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6178	}
6179
6180	emitIns_R_A(ins, attr, targetReg, indir);
6181	}
6182	}
6183
6184	//------------------------------------------------------------------------
6185	// emitIns_SIMD_R_R_AR_R: emits the code for a SIMD instruction that takes a register operand, a base memory
6186	// register, another register operand, and that returns a value in register
6187	//
6188	// Arguments:
6189	// ins -- The instruction being emitted
6190	// attr -- The emit attribute
6191	// targetReg -- The target register
6192	// op1Reg -- The register of the first operands
6193	// op3Reg -- The register of the third operand
6194	// base -- The base register used for the memory address
6195	//
6196	void emitter::emitIns_SIMD_R_R_AR_R(
6197	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base)
6198	{
6199	if (UseVEXEncoding())
6200	{
6201	assert(isAvxBlendv(ins) \|\| isSse41Blendv(ins));
6202
6203	// convert SSE encoding of SSE4.1 instructions to VEX encoding
6204	switch (ins)
6205	{
6206	case INS_blendvps:
6207	{
6208	ins = INS_vblendvps;
6209	break;
6210	}
6211
6212	case INS_blendvpd:
6213	{
6214	ins = INS_vblendvpd;
6215	break;
6216	}
6217
6218	case INS_pblendvb:
6219	{
6220	ins = INS_vpblendvb;
6221	break;
6222	}
6223
6224	default:
6225	{
6226	break;
6227	}
6228	}
6229
6230	emitIns_R_R_AR_R(ins, attr, targetReg, op1Reg, op3Reg, base, `0`);
6231	}
6232	else
6233	{
6234	assert(isSse41Blendv(ins));
6235
6236	// SSE4.1 blendv hardcode the mask vector (op3) in XMM0*
6237	if (op3Reg != REG_XMM0)
6238	{
6239	// Ensure we aren't overwriting op1
6240	assert(op1Reg != REG_XMM0);
6241
6242	emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6243	}
6244	if (op1Reg != targetReg)
6245	{
6246	// Ensure we aren't overwriting op3 (which should be REG_XMM0)
6247	assert(targetReg != REG_XMM0);
6248
6249	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6250	}
6251
6252	emitIns_R_AR(ins, attr, targetReg, base, `0`);
6253	}
6254	}
6255
6256	//------------------------------------------------------------------------
6257	// emitIns_SIMD_R_R_C_R: emits the code for a SIMD instruction that takes a register operand, a field handle +
6258	// offset, another register operand, and that returns a value in register
6259	//
6260	// Arguments:
6261	// ins -- The instruction being emitted
6262	// attr -- The emit attribute
6263	// targetReg -- The target register
6264	// op1Reg -- The register of the first operand
6265	// op3Reg -- The register of the third operand
6266	// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address
6267	// offs -- The offset added to the memory address from fldHnd
6268	//
6269	void emitter::emitIns_SIMD_R_R_C_R(instruction ins,
6270	emitAttr attr,
6271	regNumber targetReg,
6272	regNumber op1Reg,
6273	regNumber op3Reg,
6274	CORINFO_FIELD_HANDLE fldHnd,
6275	int offs)
6276	{
6277	if (UseVEXEncoding())
6278	{
6279	assert(isAvxBlendv(ins) \|\| isSse41Blendv(ins));
6280
6281	// convert SSE encoding of SSE4.1 instructions to VEX encoding
6282	switch (ins)
6283	{
6284	case INS_blendvps:
6285	{
6286	ins = INS_vblendvps;
6287	break;
6288	}
6289
6290	case INS_blendvpd:
6291	{
6292	ins = INS_vblendvpd;
6293	break;
6294	}
6295
6296	case INS_pblendvb:
6297	{
6298	ins = INS_vpblendvb;
6299	break;
6300	}
6301
6302	default:
6303	{
6304	break;
6305	}
6306	}
6307
6308	emitIns_R_R_C_R(ins, attr, targetReg, op1Reg, op3Reg, fldHnd, offs);
6309	}
6310	else
6311	{
6312	assert(isSse41Blendv(ins));
6313
6314	// SSE4.1 blendv hardcode the mask vector (op3) in XMM0*
6315	if (op3Reg != REG_XMM0)
6316	{
6317	// Ensure we aren't overwriting op1
6318	assert(op1Reg != REG_XMM0);
6319
6320	emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6321	}
6322	if (op1Reg != targetReg)
6323	{
6324	// Ensure we aren't overwriting op3 (which should be REG_XMM0)
6325	assert(targetReg != REG_XMM0);
6326
6327	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6328	}
6329
6330	emitIns_R_C(ins, attr, targetReg, fldHnd, offs);
6331	}
6332	}
6333
6334	//------------------------------------------------------------------------
6335	// emitIns_SIMD_R_R_S_R: emits the code for a SIMD instruction that takes a register operand, a variable index +
6336	// offset, another register operand, and that returns a value in register
6337	//
6338	// Arguments:
6339	// ins -- The instruction being emitted
6340	// attr -- The emit attribute
6341	// targetReg -- The target register
6342	// op1Reg -- The register of the first operand
6343	// op3Reg -- The register of the third operand
6344	// varx -- The variable index used for the memory address
6345	// offs -- The offset added to the memory address from varx
6346	//
6347	void emitter::emitIns_SIMD_R_R_S_R(
6348	instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs)
6349	{
6350	if (UseVEXEncoding())
6351	{
6352	assert(isAvxBlendv(ins) \|\| isSse41Blendv(ins));
6353
6354	// convert SSE encoding of SSE4.1 instructions to VEX encoding
6355	switch (ins)
6356	{
6357	case INS_blendvps:
6358	{
6359	ins = INS_vblendvps;
6360	break;
6361	}
6362
6363	case INS_blendvpd:
6364	{
6365	ins = INS_vblendvpd;
6366	break;
6367	}
6368
6369	case INS_pblendvb:
6370	{
6371	ins = INS_vpblendvb;
6372	break;
6373	}
6374
6375	default:
6376	{
6377	break;
6378	}
6379	}
6380
6381	emitIns_R_R_S_R(ins, attr, targetReg, op1Reg, op3Reg, varx, offs);
6382	}
6383	else
6384	{
6385	assert(isSse41Blendv(ins));
6386
6387	// SSE4.1 blendv hardcode the mask vector (op3) in XMM0*
6388	if (op3Reg != REG_XMM0)
6389	{
6390	// Ensure we aren't overwriting op1
6391	assert(op1Reg != REG_XMM0);
6392
6393	emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
6394	}
6395	if (op1Reg != targetReg)
6396	{
6397	// Ensure we aren't overwriting op3 (which should be REG_XMM0)
6398	assert(targetReg != REG_XMM0);
6399
6400	emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
6401	}
6402
6403	emitIns_R_S(ins, attr, targetReg, varx, offs);
6404	}
6405	}
6406	#endif // FEATURE_HW_INTRINSICS
6407
6408	/*****************************************************************************
6409	*
6410	* The following add instructions referencing stack-based local variables.
6411	*/
6412
6413	void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
6414	{
6415	instrDesc* id = emitNewInstr(attr);
6416	UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs);
6417	insFormat fmt = emitInsModeFormat(ins, IF_SRD);
6418
6419	// 16-bit operand instructions will need a prefix
6420	if (EA_SIZE(attr) == EA_2BYTE)
6421	{
6422	sz += `1`;
6423	}
6424
6425	// VEX prefix
6426	sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
6427
6428	// 64-bit operand instructions will need a REX.W prefix
6429	if (TakesRexWPrefix(ins, attr))
6430	{
6431	sz += emitGetRexPrefixSize(ins);
6432	}
6433
6434	id->idIns(ins);
6435	id->idInsFmt(fmt);
6436	id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6437	id->idCodeSize(sz);
6438
6439	#ifdef DEBUG
6440	id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6441	#endif
6442	dispIns(id);
6443	emitCurIGsize += sz;
6444
6445	emitAdjustStackDepthPushPop(ins);
6446	}
6447
6448	void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
6449	{
6450	instrDesc* id = emitNewInstr(attr);
6451	UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs);
6452	insFormat fmt = emitInsModeFormat(ins, IF_SRD_RRD);
6453
6454	#ifdef _TARGET_X86_
6455	if (attr == EA_1BYTE)
6456	{
6457	assert(isByteReg(ireg));
6458	}
6459	#endif
6460	// 16-bit operand instructions will need a prefix
6461	if (EA_SIZE(attr) == EA_2BYTE)
6462	{
6463	sz++;
6464	}
6465
6466	// VEX prefix
6467	sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
6468
6469	// 64-bit operand instructions will need a REX.W prefix
6470	if (TakesRexWPrefix(ins, attr) \|\| IsExtendedReg(ireg, attr))
6471	{
6472	sz += emitGetRexPrefixSize(ins);
6473	}
6474
6475	id->idIns(ins);
6476	id->idInsFmt(fmt);
6477	id->idReg1(ireg);
6478	id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6479	id->idCodeSize(sz);
6480	#ifdef DEBUG
6481	id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6482	#endif
6483	dispIns(id);
6484	emitCurIGsize += sz;
6485	}
6486
6487	void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
6488	{
6489	emitAttr size = EA_SIZE(attr);
6490	noway_assert(emitVerifyEncodable(ins, size, ireg));
6491
6492	instrDesc* id = emitNewInstr(attr);
6493	UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs);
6494	insFormat fmt = emitInsModeFormat(ins, IF_RRD_SRD);
6495
6496	// Most 16-bit operand instructions need a prefix
6497	if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
6498	{
6499	sz++;
6500	}
6501
6502	// VEX prefix
6503	sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
6504
6505	// 64-bit operand instructions will need a REX.W prefix
6506	if (TakesRexWPrefix(ins, attr) \|\| IsExtendedReg(ireg, attr))
6507	{
6508	sz += emitGetRexPrefixSize(ins);
6509	}
6510
6511	if (ins == INS_crc32)
6512	{
6513	sz += `1`;
6514	}
6515
6516	id->idIns(ins);
6517	id->idInsFmt(fmt);
6518	id->idReg1(ireg);
6519	id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6520	id->idCodeSize(sz);
6521	#ifdef DEBUG
6522	id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6523	#endif
6524	dispIns(id);
6525	emitCurIGsize += sz;
6526	}
6527
6528	void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
6529	{
6530	#ifdef _TARGET_AMD64_
6531	// mov reg, imm64 is the only opcode which takes a full 8 byte immediate
6532	// all other opcodes take a sign-extended 4-byte immediate
6533	noway_assert(EA_SIZE(attr) < EA_8BYTE \|\| !EA_IS_CNS_RELOC(attr));
6534	#endif
6535
6536	insFormat fmt;
6537
6538	switch (ins)
6539	{
6540	case INS_rcl_N:
6541	case INS_rcr_N:
6542	case INS_rol_N:
6543	case INS_ror_N:
6544	case INS_shl_N:
6545	case INS_shr_N:
6546	case INS_sar_N:
6547	assert(val != `1`);
6548	fmt = IF_SRW_SHF;
6549	val &= `0x7F`;
6550	break;
6551
6552	default:
6553	fmt = emitInsModeFormat(ins, IF_SRD_CNS);
6554	break;
6555	}
6556
6557	instrDesc* id = emitNewInstrCns(attr, val);
6558	id->idIns(ins);
6559	id->idInsFmt(fmt);
6560	UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMI(ins), varx, offs, val);
6561
6562	// VEX prefix
6563	sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
6564
6565	// 64-bit operand instructions will need a REX.W prefix
6566	if (TakesRexWPrefix(ins, attr))
6567	{
6568	sz += emitGetRexPrefixSize(ins);
6569	}
6570
6571	id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
6572	id->idCodeSize(sz);
6573	#ifdef DEBUG
6574	id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
6575	#endif
6576	dispIns(id);
6577	emitCurIGsize += sz;
6578	}
6579
6580	/*****************************************************************************
6581	*
6582	* Record that a jump instruction uses the short encoding
6583	*
6584	*/
6585	void emitter::emitSetShortJump(instrDescJmp* id)
6586	{
6587	if (id->idjKeepLong)
6588	{
6589	return;
6590	}
6591
6592	id->idjShort = true;
6593	}
6594
6595	/*****************************************************************************
6596	*
6597	* Add a jmp instruction.
6598	*/
6599
6600	void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount / = 0 /)
6601	{
6602	UNATIVE_OFFSET sz;
6603	instrDescJmp* id = emitNewInstrJmp();
6604
6605	assert(dst->bbFlags & BBF_JMP_TARGET);
6606
6607	id->idIns(ins);
6608	id->idInsFmt(IF_LABEL);
6609	id->idAddr()->iiaBBlabel = dst;
6610
6611	#ifdef DEBUG
6612	// Mark the finally call
6613	if (ins == INS_call && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
6614	{
6615	id->idDebugOnlyInfo()->idFinallyCall = true;
6616	}
6617	#endif // DEBUG
6618
6619	/ Assume the jump will be long /
6620
6621	id->idjShort = `0`;
6622	id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
6623
6624	/ Record the jump's IG and offset within it /
6625
6626	id->idjIG = emitCurIG;
6627	id->idjOffs = emitCurIGsize;
6628
6629	/ Append this jump to this IG's jump list /
6630
6631	id->idjNext = emitCurIGjmpList;
6632	emitCurIGjmpList = id;
6633
6634	#if EMITTER_STATS
6635	emitTotalIGjmps++;
6636	#endif
6637
6638	/ Figure out the max. size of the jump/call instruction /
6639
6640	if (ins == INS_call)
6641	{
6642	sz = CALL_INST_SIZE;
6643	}
6644	else if (ins == INS_push \|\| ins == INS_push_hide)
6645	{
6646	// Pushing the address of a basicBlock will need a reloc
6647	// as the instruction uses the absolute address,
6648	// not a relative address
6649	if (emitComp->opts.compReloc)
6650	{
6651	id->idSetIsDspReloc();
6652	}
6653	sz = PUSH_INST_SIZE;
6654	}
6655	else
6656	{
6657	insGroup* tgt;
6658
6659	/ This is a jump - assume the worst /
6660
6661	sz = (ins == INS_jmp) ? JMP_SIZE_LARGE : JCC_SIZE_LARGE;
6662
6663	/ Can we guess at the jump distance? /
6664
6665	tgt = (insGroup*)emitCodeGetCookie(dst);
6666
6667	if (tgt)
6668	{
6669	int extra;
6670	UNATIVE_OFFSET srcOffs;
6671	int jmpDist;
6672
6673	assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
6674
6675	/ This is a backward jump - figure out the distance /
6676
6677	srcOffs = emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL;
6678
6679	/ Compute the distance estimate /
6680
6681	jmpDist = srcOffs - tgt->igOffs;
6682	assert((int)jmpDist > `0`);
6683
6684	/ How much beyond the max. short distance does the jump go? /
6685
6686	extra = jmpDist + JMP_DIST_SMALL_MAX_NEG;
6687
6688	#if DEBUG_EMIT
6689	if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM \|\| INTERESTING_JUMP_NUM == `0`)
6690	{
6691	if (INTERESTING_JUMP_NUM == `0`)
6692	{
6693	printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6694	}
6695	printf("[0] Jump source is at %08X\n", srcOffs);
6696	printf("[0] Label block is at %08X\n", tgt->igOffs);
6697	printf("[0] Jump distance - %04X\n", jmpDist);
6698	if (extra > `0`)
6699	{
6700	printf("[0] Distance excess = %d \n", extra);
6701	}
6702	}
6703	#endif
6704
6705	if (extra <= `0` && !id->idjKeepLong)
6706	{
6707	/ Wonderful - this jump surely will be short /
6708
6709	emitSetShortJump(id);
6710	sz = JMP_SIZE_SMALL;
6711	}
6712	}
6713	#if DEBUG_EMIT
6714	else
6715	{
6716	if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM \|\| INTERESTING_JUMP_NUM == `0`)
6717	{
6718	if (INTERESTING_JUMP_NUM == `0`)
6719	{
6720	printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
6721	}
6722	printf("[0] Jump source is at %04X/%08X\n", emitCurIGsize,
6723	emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL);
6724	printf("[0] Label block is unknown\n");
6725	}
6726	}
6727	#endif
6728	}
6729
6730	id->idCodeSize(sz);
6731
6732	dispIns(id);
6733	emitCurIGsize += sz;
6734
6735	emitAdjustStackDepthPushPop(ins);
6736	}
6737
6738	#if !FEATURE_FIXED_OUT_ARGS
6739
6740	//------------------------------------------------------------------------
6741	// emitAdjustStackDepthPushPop: Adjust the current and maximum stack depth.
6742	//
6743	// Arguments:
6744	// ins - the instruction. Only INS_push and INS_pop adjust the stack depth.
6745	//
6746	// Notes:
6747	// 1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6748	// 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6749	//
6750	void emitter::emitAdjustStackDepthPushPop(instruction ins)
6751	{
6752	if (ins == INS_push)
6753	{
6754	emitCurStackLvl += emitCntStackDepth;
6755
6756	if (emitMaxStackDepth < emitCurStackLvl)
6757	{
6758	JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6759	emitMaxStackDepth = emitCurStackLvl;
6760	}
6761	}
6762	else if (ins == INS_pop)
6763	{
6764	emitCurStackLvl -= emitCntStackDepth;
6765	assert((int)emitCurStackLvl >= `0`);
6766	}
6767	}
6768
6769	//------------------------------------------------------------------------
6770	// emitAdjustStackDepth: Adjust the current and maximum stack depth.
6771	//
6772	// Arguments:
6773	// ins - the instruction. Only INS_add and INS_sub adjust the stack depth.
6774	// It is assumed that the add/sub is on the stack pointer.
6775	// val - the number of bytes to add to or subtract from the stack pointer.
6776	//
6777	// Notes:
6778	// 1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
6779	// 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
6780	//
6781	void emitter::emitAdjustStackDepth(instruction ins, ssize_t val)
6782	{
6783	// If we're in the prolog or epilog, or otherwise not tracking the stack depth, just return.
6784	if (emitCntStackDepth == `0`)
6785	return;
6786
6787	if (ins == INS_sub)
6788	{
6789	S_UINT32 newStackLvl(emitCurStackLvl);
6790	newStackLvl += S_UINT32(val);
6791	noway_assert(!newStackLvl.IsOverflow());
6792
6793	emitCurStackLvl = newStackLvl.Value();
6794
6795	if (emitMaxStackDepth < emitCurStackLvl)
6796	{
6797	JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
6798	emitMaxStackDepth = emitCurStackLvl;
6799	}
6800	}
6801	else if (ins == INS_add)
6802	{
6803	S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val);
6804	noway_assert(!newStackLvl.IsOverflow());
6805
6806	emitCurStackLvl = newStackLvl.Value();
6807	}
6808	}
6809
6810	#endif // EMIT_TRACK_STACK_DEPTH
6811
6812	/*****************************************************************************
6813	*
6814	* Add a call instruction (direct or indirect).
6815	* argSize<0 means that the caller will pop the arguments
6816	*
6817	* The other arguments are interpreted depending on callType as shown:
6818	* Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
6819	*
6820	* EC_FUNC_TOKEN : addr is the method address
6821	* EC_FUNC_TOKEN_INDIR : addr is the indirect method address
6822	* EC_FUNC_ADDR : addr is the absolute address of the function
6823	* EC_FUNC_VIRTUAL : "call [ireg+disp]"
6824	*
6825	* If callType is one of these emitCallTypes, addr has to be NULL.
6826	* EC_INDIR_R : "call ireg".
6827	* EC_INDIR_SR : "call lcl<disp>" (eg. call [ebp-8]).
6828	* EC_INDIR_C : "call clsVar<disp>" (eg. call [clsVarAddr])
6829	* EC_INDIR_ARD : "call [ireg+xreg*xmul+disp]"
6830	*
6831	*/
6832
6833	// clang-format off
6834	void emitter::emitIns_Call(EmitCallType callType,
6835	CORINFO_METHOD_HANDLE methHnd,
6836	INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
6837	void* addr,
6838	ssize_t argSize,
6839	emitAttr retSize
6840	MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
6841	VARSET_VALARG_TP ptrVars,
6842	regMaskTP gcrefRegs,
6843	regMaskTP byrefRegs,
6844	IL_OFFSETX ilOffset, // = BAD_IL_OFFSET
6845	regNumber ireg, // = REG_NA
6846	regNumber xreg, // = REG_NA
6847	unsigned xmul, // = 0
6848	ssize_t disp, // = 0
6849	bool isJump) // = false
6850	// clang-format on
6851	{
6852	/ Sanity check the arguments depending on callType /
6853
6854	assert(callType < EC_COUNT);
6855	assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR && callType != EC_FUNC_ADDR) \|\|
6856	(ireg == REG_NA && xreg == REG_NA && xmul == `0` && disp == `0`));
6857	assert(callType != EC_FUNC_VIRTUAL \|\| (ireg < REG_COUNT && xreg == REG_NA && xmul == `0`));
6858	assert(callType < EC_INDIR_R \|\| callType == EC_INDIR_ARD \|\| callType == EC_INDIR_C \|\| addr == nullptr);
6859	assert(callType != EC_INDIR_R \|\| (ireg < REG_COUNT && xreg == REG_NA && xmul == `0` && disp == `0`));
6860	assert(callType != EC_INDIR_SR \|\|
6861	(ireg == REG_NA && xreg == REG_NA && xmul == `0` && disp < (int)emitComp->lvaCount));
6862	assert(callType != EC_INDIR_C \|\| (ireg == REG_NA && xreg == REG_NA && xmul == `0` && disp != `0`));
6863
6864	// Our stack level should be always greater than the bytes of arguments we push. Just
6865	// a sanity test.
6866	assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel);
6867
6868	#if STACK_PROBES
6869	if (emitComp->opts.compNeedStackProbes)
6870	{
6871	// If we've pushed more than JIT_RESERVED_STACK allows, do an additional stack probe
6872	// Else, just make sure the prolog does a probe for us. Invariant we're trying
6873	// to get is that at any point we go out to unmanaged code, there is at least
6874	// CORINFO_STACKPROBE_DEPTH bytes of stack available.
6875	//
6876	// The reason why we are not doing one probe for the max size at the prolog
6877	// is that when don't have the max depth precomputed (it can depend on codegen),
6878	// and we need it at the time we generate locallocs
6879	//
6880	// Compiler::lvaAssignFrameOffsets sets up compLclFrameSize, which takes in
6881	// account everything except for the arguments of a callee.
6882	//
6883	//
6884	//
6885	if ((TARGET_POINTER_SIZE + // return address for call
6886	emitComp->genStackLevel +
6887	// Current stack level. This gets resetted on every
6888	// localloc and on the prolog (invariant is that
6889	// genStackLevel is 0 on basic block entry and exit and
6890	// after any alloca). genStackLevel will include any arguments
6891	// to the call, so we will insert an additional probe if
6892	// we've consumed more than JIT_RESERVED_STACK bytes
6893	// of stack, which is what the prolog probe covers (in
6894	// addition to the EE requested size)
6895	(emitComp->compHndBBtabCount * TARGET_POINTER_SIZE)
6896	// Hidden slots for calling finallys
6897	) >= JIT_RESERVED_STACK)
6898	{
6899	// This happens when you have a call with a lot of arguments or a call is done
6900	// when there's a lot of stuff pushed on the stack (for example a call whos returned
6901	// value is an argument of another call that has pushed stuff on the stack)
6902	// This should't be very frequent.
6903	// For different values of JIT_RESERVED_STACK
6904	//
6905	// For mscorlib (109605 calls)
6906	//
6907	// 14190 probes in prologs (56760 bytes of code)
6908	//
6909	// JIT_RESERVED_STACK = 16 : 5452 extra probes
6910	// JIT_RESERVED_STACK = 32 : 1084 extra probes
6911	// JIT_RESERVED_STACK = 64 : 1 extra probes
6912	// JIT_RESERVED_STACK = 96 : 0 extra probes
6913	emitComp->genGenerateStackProbe();
6914	}
6915	else
6916	{
6917	if (emitComp->compGeneratingProlog \|\| emitComp->compGeneratingEpilog)
6918	{
6919	if (emitComp->compStackProbePrologDone)
6920	{
6921	// We already generated a probe and this call is not happening
6922	// at a depth >= JIT_RESERVED_STACK, so nothing to do here
6923	}
6924	else
6925	{
6926	// 3 possible ways to get here:
6927	// - We are in an epilog and haven't generated a probe in the prolog.
6928	// This shouldn't happen as we don't generate any calls in epilog.
6929	// - We are in the prolog, but doing a call before generating the probe.
6930	// This shouldn't happen at all.
6931	// - We are in the prolog, did not generate a probe but now we need
6932	// to generate a probe because we need a call (eg: profiler). We'll
6933	// need a probe.
6934	//
6935	// In any case, we need a probe
6936
6937	// Ignore the profiler callback for now.
6938	if (!emitComp->compIsProfilerHookNeeded())
6939	{
6940	assert(!"We do not expect to get here");
6941	emitComp->genGenerateStackProbe();
6942	}
6943	}
6944	}
6945	else
6946	{
6947	// We will need a probe and will generate it in the prolog
6948	emitComp->genNeedPrologStackProbe = true;
6949	}
6950	}
6951	}
6952	#endif // STACK_PROBES
6953
6954	// Trim out any callee-trashed registers from the live set.
6955	regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd);
6956	gcrefRegs &= savedSet;
6957	byrefRegs &= savedSet;
6958
6959	#ifdef DEBUG
6960	if (EMIT_GC_VERBOSE)
6961	{
6962	printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
6963	dumpConvertedVarSet(emitComp, ptrVars);
6964	printf(", gcrefRegs=");
6965	printRegMaskInt(gcrefRegs);
6966	emitDispRegSet(gcrefRegs);
6967	printf(", byrefRegs=");
6968	printRegMaskInt(byrefRegs);
6969	emitDispRegSet(byrefRegs);
6970	printf("\n");
6971	}
6972	#endif
6973
6974	/ Managed RetVal: emit sequence point for the call /
6975	if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
6976	{
6977	codeGen->genIPmappingAdd(ilOffset, false);
6978	}
6979
6980	/*
6981	We need to allocate the appropriate instruction descriptor based
6982	on whether this is a direct/indirect call, and whether we need to
6983	record an updated set of live GC variables.
6984
6985	The stats for a ton of classes is as follows:
6986
6987	Direct call w/o GC vars 220,216
6988	Indir. call w/o GC vars 144,781
6989
6990	Direct call with GC vars 9,440
6991	Indir. call with GC vars 5,768
6992	*/
6993
6994	instrDesc* id;
6995
6996	assert(argSize % REGSIZE_BYTES == `0`);
6997	int argCnt = (int)(argSize / (int)REGSIZE_BYTES); // we need a signed-divide
6998
6999	if (callType >= EC_FUNC_VIRTUAL)
7000	{
7001	/ Indirect call, virtual calls /
7002
7003	assert(callType == EC_FUNC_VIRTUAL \|\| callType == EC_INDIR_R \|\| callType == EC_INDIR_SR \|\|
7004	callType == EC_INDIR_C \|\| callType == EC_INDIR_ARD);
7005
7006	id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs,
7007	retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
7008	}
7009	else
7010	{
7011	// Helper/static/nonvirtual/function calls (direct or through handle),
7012	// and calls to an absolute addr.
7013
7014	assert(callType == EC_FUNC_TOKEN \|\| callType == EC_FUNC_TOKEN_INDIR \|\| callType == EC_FUNC_ADDR);
7015
7016	id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs,
7017	retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
7018	}
7019
7020	/ Update the emitter's live GC ref sets /
7021
7022	VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
7023	emitThisGCrefRegs = gcrefRegs;
7024	emitThisByrefRegs = byrefRegs;
7025
7026	/ Set the instruction - special case jumping a function /
7027	instruction ins = INS_call;
7028
7029	if (isJump)
7030	{
7031	assert(callType == EC_FUNC_TOKEN \|\| callType == EC_FUNC_TOKEN_INDIR);
7032	if (callType == EC_FUNC_TOKEN)
7033	{
7034	ins = INS_l_jmp;
7035	}
7036	else
7037	{
7038	ins = INS_i_jmp;
7039	}
7040	}
7041	id->idIns(ins);
7042
7043	id->idSetIsNoGC(emitNoGChelper(methHnd));
7044
7045	UNATIVE_OFFSET sz;
7046
7047	// Record the address: method, indirection, or funcptr
7048	if (callType >= EC_FUNC_VIRTUAL)
7049	{
7050	// This is an indirect call (either a virtual call or func ptr call)
7051
7052	switch (callType)
7053	{
7054	case EC_INDIR_C:
7055	// Indirect call using an absolute code address.
7056	// Must be marked as relocatable and is done at the
7057	// branch target location.
7058	goto CALL_ADDR_MODE;
7059
7060	case EC_INDIR_R: // the address is in a register
7061
7062	id->idSetIsCallRegPtr();
7063
7064	__fallthrough;
7065
7066	case EC_INDIR_ARD: // the address is an indirection
7067
7068	goto CALL_ADDR_MODE;
7069
7070	case EC_INDIR_SR: // the address is in a lcl var
7071
7072	id->idInsFmt(IF_SRD);
7073	// disp is really a lclVarNum
7074	noway_assert((unsigned)disp == (size_t)disp);
7075	id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, `0`);
7076	sz = emitInsSizeSV(id, insCodeMR(INS_call), (unsigned)disp, `0`);
7077
7078	break;
7079
7080	case EC_FUNC_VIRTUAL:
7081
7082	CALL_ADDR_MODE:
7083
7084	// fall-through
7085
7086	// The function is "ireg" if id->idIsCallRegPtr(),
7087	// else [ireg+xmulxreg+disp]*
7088
7089	id->idInsFmt(IF_ARD);
7090
7091	id->idAddr()->iiaAddrMode.amBaseReg = ireg;
7092	id->idAddr()->iiaAddrMode.amIndxReg = xreg;
7093	id->idAddr()->iiaAddrMode.amScale = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1;
7094
7095	sz = emitInsSizeAM(id, insCodeMR(INS_call));
7096
7097	if (ireg == REG_NA && xreg == REG_NA)
7098	{
7099	if (codeGen->genCodeIndirAddrNeedsReloc(disp))
7100	{
7101	id->idSetIsDspReloc();
7102	}
7103	#ifdef _TARGET_AMD64_
7104	else
7105	{
7106	// An absolute indir address that doesn't need reloc should fit within 32-bits
7107	// to be encoded as offset relative to zero. This addr mode requires an extra
7108	// SIB byte
7109	noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
7110	sz++;
7111	}
7112	#endif //_TARGET_AMD64_
7113	}
7114
7115	break;
7116
7117	default:
7118	NO_WAY("unexpected instruction");
7119	break;
7120	}
7121	}
7122	else if (callType == EC_FUNC_TOKEN_INDIR)
7123	{
7124	/ "call [method_addr]" /
7125
7126	assert(addr != nullptr);
7127
7128	id->idInsFmt(IF_METHPTR);
7129	id->idAddr()->iiaAddr = (BYTE*)addr;
7130	sz = `6`;
7131
7132	// Since this is an indirect call through a pointer and we don't
7133	// currently pass in emitAttr into this function, we query codegen
7134	// whether addr needs a reloc.
7135	if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr))
7136	{
7137	id->idSetIsDspReloc();
7138	}
7139	#ifdef _TARGET_AMD64_
7140	else
7141	{
7142	// An absolute indir address that doesn't need reloc should fit within 32-bits
7143	// to be encoded as offset relative to zero. This addr mode requires an extra
7144	// SIB byte
7145	noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
7146	sz++;
7147	}
7148	#endif //_TARGET_AMD64_
7149	}
7150	else
7151	{
7152	/ This is a simple direct call: "call helper/method/addr" /
7153
7154	assert(callType == EC_FUNC_TOKEN \|\| callType == EC_FUNC_ADDR);
7155
7156	assert(addr != nullptr);
7157
7158	id->idInsFmt(IF_METHOD);
7159	sz = `5`;
7160
7161	id->idAddr()->iiaAddr = (BYTE*)addr;
7162
7163	if (callType == EC_FUNC_ADDR)
7164	{
7165	id->idSetIsCallAddr();
7166	}
7167
7168	// Direct call to a method and no addr indirection is needed.
7169	if (codeGen->genCodeAddrNeedsReloc((size_t)addr))
7170	{
7171	id->idSetIsDspReloc();
7172	}
7173	}
7174
7175	#ifdef DEBUG
7176	if (emitComp->verbose && `0`)
7177	{
7178	if (id->idIsLargeCall())
7179	{
7180	if (callType >= EC_FUNC_VIRTUAL)
7181	{
7182	printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
7183	VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
7184	}
7185	else
7186	{
7187	printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
7188	VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
7189	}
7190	}
7191	}
7192
7193	id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
7194	id->idDebugOnlyInfo()->idCallSig = sigInfo;
7195	#endif // DEBUG
7196
7197	#ifdef LATE_DISASM
7198	if (addr != nullptr)
7199	{
7200	codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
7201	}
7202	#endif // LATE_DISASM
7203
7204	id->idCodeSize(sz);
7205
7206	dispIns(id);
7207	emitCurIGsize += sz;
7208
7209	#if !FEATURE_FIXED_OUT_ARGS
7210
7211	/ The call will pop the arguments /
7212
7213	if (emitCntStackDepth && argSize > `0`)
7214	{
7215	noway_assert((ssize_t)emitCurStackLvl >= argSize);
7216	emitCurStackLvl -= (int)argSize;
7217	assert((int)emitCurStackLvl >= `0`);
7218	}
7219
7220	#endif // !FEATURE_FIXED_OUT_ARGS
7221	}
7222
7223	#ifdef DEBUG
7224	/*****************************************************************************
7225	*
7226	* The following called for each recorded instruction -- use for debugging.
7227	*/
7228	void emitter::emitInsSanityCheck(instrDesc* id)
7229	{
7230	// make certain you only try to put relocs on things that can have them.
7231	ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
7232	if ((idOp == ID_OP_SCNS) && id->idIsLargeCns())
7233	{
7234	idOp = ID_OP_CNS;
7235	}
7236
7237	if (id->idIsDspReloc())
7238	{
7239	assert(idOp == ID_OP_NONE \|\| idOp == ID_OP_AMD \|\| idOp == ID_OP_DSP \|\| idOp == ID_OP_DSP_CNS \|\|
7240	idOp == ID_OP_AMD_CNS \|\| idOp == ID_OP_SPEC \|\| idOp == ID_OP_CALL \|\| idOp == ID_OP_JMP \|\|
7241	idOp == ID_OP_LBL);
7242	}
7243
7244	if (id->idIsCnsReloc())
7245	{
7246	assert(idOp == ID_OP_CNS \|\| idOp == ID_OP_AMD_CNS \|\| idOp == ID_OP_DSP_CNS \|\| idOp == ID_OP_SPEC \|\|
7247	idOp == ID_OP_CALL \|\| idOp == ID_OP_JMP);
7248	}
7249	}
7250	#endif
7251
7252	/*****************************************************************************
7253	*
7254	* Return the allocated size (in bytes) of the given instruction descriptor.
7255	*/
7256
7257	size_t emitter::emitSizeOfInsDsc(instrDesc* id)
7258	{
7259	if (emitIsScnsInsDsc(id))
7260	{
7261	return SMALL_IDSC_SIZE;
7262	}
7263
7264	assert((unsigned)id->idInsFmt() < emitFmtCount);
7265
7266	ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
7267
7268	// An INS_call instruction may use a "fat" direct/indirect call descriptor
7269	// except for a local call to a label (i.e. call to a finally)
7270	// Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the
7271	// INS_call instruction always uses one of these idOps
7272
7273	if (id->idIns() == INS_call)
7274	{
7275	assert(idOp == ID_OP_CALL \|\| // is a direct call
7276	idOp == ID_OP_SPEC \|\| // is a indirect call
7277	idOp == ID_OP_JMP); // is a local call to finally clause
7278	}
7279
7280	switch (idOp)
7281	{
7282	case ID_OP_NONE:
7283	break;
7284
7285	case ID_OP_LBL:
7286	return sizeof(instrDescLbl);
7287
7288	case ID_OP_JMP:
7289	return sizeof(instrDescJmp);
7290
7291	case ID_OP_CALL:
7292	case ID_OP_SPEC:
7293	if (id->idIsLargeCall())
7294	{
7295	/ Must be a "fat" indirect call descriptor /
7296	return sizeof(instrDescCGCA);
7297	}
7298
7299	__fallthrough;
7300
7301	case ID_OP_SCNS:
7302	case ID_OP_CNS:
7303	case ID_OP_DSP:
7304	case ID_OP_DSP_CNS:
7305	case ID_OP_AMD:
7306	case ID_OP_AMD_CNS:
7307	if (id->idIsLargeCns())
7308	{
7309	if (id->idIsLargeDsp())
7310	{
7311	return sizeof(instrDescCnsDsp);
7312	}
7313	else
7314	{
7315	return sizeof(instrDescCns);
7316	}
7317	}
7318	else
7319	{
7320	if (id->idIsLargeDsp())
7321	{
7322	return sizeof(instrDescDsp);
7323	}
7324	else
7325	{
7326	return sizeof(instrDesc);
7327	}
7328	}
7329
7330	default:
7331	NO_WAY("unexpected instruction descriptor format");
7332	break;
7333	}
7334
7335	return sizeof(instrDesc);
7336	}
7337
7338	/***************************************************************************/
7339	#ifdef DEBUG
7340	/*****************************************************************************
7341	*
7342	* Return a string that represents the given register.
7343	*/
7344
7345	const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
7346	{
7347	static char rb[`2`][`128`];
7348	static unsigned char rbc = `0`;
7349
7350	const char* rn = emitComp->compRegVarName(reg, varName);
7351
7352	#ifdef _TARGET_AMD64_
7353	char suffix = `'\0'`;
7354
7355	switch (EA_SIZE(attr))
7356	{
7357	case EA_32BYTE:
7358	return emitYMMregName(reg);
7359
7360	case EA_16BYTE:
7361	return emitXMMregName(reg);
7362
7363	case EA_8BYTE:
7364	if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
7365	{
7366	return emitXMMregName(reg);
7367	}
7368	break;
7369
7370	case EA_4BYTE:
7371	if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
7372	{
7373	return emitXMMregName(reg);
7374	}
7375
7376	if (reg > REG_R15)
7377	{
7378	break;
7379	}
7380
7381	if (reg > REG_RDI)
7382	{
7383	suffix = `'d'`;
7384	goto APPEND_SUFFIX;
7385	}
7386	rbc = (rbc + `1`) % `2`;
7387	rb[rbc][`0`] = `'e'`;
7388	rb[rbc][`1`] = rn[`1`];
7389	rb[rbc][`2`] = rn[`2`];
7390	rb[rbc][`3`] = `0`;
7391	rn = rb[rbc];
7392	break;
7393
7394	case EA_2BYTE:
7395	if (reg > REG_RDI)
7396	{
7397	suffix = `'w'`;
7398	goto APPEND_SUFFIX;
7399	}
7400	rn++;
7401	break;
7402
7403	case EA_1BYTE:
7404	if (reg > REG_RDI)
7405	{
7406	suffix = `'b'`;
7407	APPEND_SUFFIX:
7408	rbc = (rbc + `1`) % `2`;
7409	rb[rbc][`0`] = rn[`0`];
7410	rb[rbc][`1`] = rn[`1`];
7411	if (rn[`2`])
7412	{
7413	assert(rn[`3`] == `0`);
7414	rb[rbc][`2`] = rn[`2`];
7415	rb[rbc][`3`] = suffix;
7416	rb[rbc][`4`] = `0`;
7417	}
7418	else
7419	{
7420	rb[rbc][`2`] = suffix;
7421	rb[rbc][`3`] = `0`;
7422	}
7423	}
7424	else
7425	{
7426	rbc = (rbc + `1`) % `2`;
7427	rb[rbc][`0`] = rn[`1`];
7428	if (reg < `4`)
7429	{
7430	rb[rbc][`1`] = `'l'`;
7431	rb[rbc][`2`] = `0`;
7432	}
7433	else
7434	{
7435	rb[rbc][`1`] = rn[`2`];
7436	rb[rbc][`2`] = `'l'`;
7437	rb[rbc][`3`] = `0`;
7438	}
7439	}
7440
7441	rn = rb[rbc];
7442	break;
7443
7444	default:
7445	break;
7446	}
7447	#endif // _TARGET_AMD64_
7448
7449	#ifdef _TARGET_X86_
7450	assert(strlen(rn) >= `3`);
7451
7452	switch (EA_SIZE(attr))
7453	{
7454	case EA_32BYTE:
7455	return emitYMMregName(reg);
7456
7457	case EA_16BYTE:
7458	return emitXMMregName(reg);
7459
7460	case EA_8BYTE:
7461	if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
7462	{
7463	return emitXMMregName(reg);
7464	}
7465	break;
7466
7467	case EA_4BYTE:
7468	if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
7469	{
7470	return emitXMMregName(reg);
7471	}
7472	break;
7473
7474	case EA_2BYTE:
7475	rn++;
7476	break;
7477
7478	case EA_1BYTE:
7479	rbc = (rbc + `1`) % `2`;
7480	rb[rbc][`0`] = rn[`1`];
7481	rb[rbc][`1`] = `'l'`;
7482	strcpy_s(&rb[rbc][`2`], sizeof(rb[`0`]) - `2`, rn + `3`);
7483
7484	rn = rb[rbc];
7485	break;
7486
7487	default:
7488	break;
7489	}
7490	#endif // _TARGET_X86_
7491
7492	#if 0
7493	// The following is useful if you want register names to be tagged with or ^ representing gcref or byref, respectively,*
7494	// however it's possibly not interesting most of the time.
7495	if (EA_IS_GCREF(attr) \|\| EA_IS_BYREF(attr))
7496	{
7497	if (rn != rb[rbc])
7498	{
7499	rbc = (rbc+`1`)%`2`;
7500	strcpy_s(rb[rbc], sizeof(rb[rbc]), rn);
7501	rn = rb[rbc];
7502	}
7503
7504	if (EA_IS_GCREF(attr))
7505	{
7506	strcat_s(rb[rbc], sizeof(rb[rbc]), "*");
7507	}
7508	else if (EA_IS_BYREF(attr))
7509	{
7510	strcat_s(rb[rbc], sizeof(rb[rbc]), "^");
7511	}
7512	}
7513	#endif // 0
7514
7515	return rn;
7516	}
7517
7518	/*****************************************************************************
7519	*
7520	* Return a string that represents the given FP register.
7521	*/
7522
7523	const char* emitter::emitFPregName(unsigned reg, bool varName)
7524	{
7525	assert(reg < REG_COUNT);
7526
7527	return emitComp->compFPregVarName((regNumber)(reg), varName);
7528	}
7529
7530	/*****************************************************************************
7531	*
7532	* Return a string that represents the given XMM register.
7533	*/
7534
7535	const char* emitter::emitXMMregName(unsigned reg)
7536	{
7537	static const char* const regNames[] = {
7538	#define REGDEF(name, rnum, mask, sname) "x" sname,
7539	#include "register.h"
7540	};
7541
7542	assert(reg < REG_COUNT);
7543	assert(reg < _countof(regNames));
7544
7545	return regNames[reg];
7546	}
7547
7548	/*****************************************************************************
7549	*
7550	* Return a string that represents the given YMM register.
7551	*/
7552
7553	const char* emitter::emitYMMregName(unsigned reg)
7554	{
7555	static const char* const regNames[] = {
7556	#define REGDEF(name, rnum, mask, sname) "y" sname,
7557	#include "register.h"
7558	};
7559
7560	assert(reg < REG_COUNT);
7561	assert(reg < _countof(regNames));
7562
7563	return regNames[reg];
7564	}
7565
7566	/*****************************************************************************
7567	*
7568	* Display a static data member reference.
7569	*/
7570
7571	void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc / = false /)
7572	{
7573	int doffs;
7574
7575	/ Filter out the special case of fs:[offs] /
7576
7577	// Munge any pointers if we want diff-able disassembly
7578	if (emitComp->opts.disDiffable)
7579	{
7580	ssize_t top12bits = (offs >> `20`);
7581	if ((top12bits != `0`) && (top12bits != -`1`))
7582	{
7583	offs = `0xD1FFAB1E`;
7584	}
7585	}
7586
7587	if (fldHnd == FLD_GLOBAL_FS)
7588	{
7589	printf("FS:[0x%04X]", offs);
7590	return;
7591	}
7592
7593	if (fldHnd == FLD_GLOBAL_DS)
7594	{
7595	printf("[0x%04X]", offs);
7596	return;
7597	}
7598
7599	printf("[");
7600
7601	doffs = Compiler::eeGetJitDataOffs(fldHnd);
7602
7603	if (reloc)
7604	{
7605	printf("reloc ");
7606	}
7607
7608	if (doffs >= `0`)
7609	{
7610	if (doffs & `1`)
7611	{
7612	printf("@CNS%02u", doffs - `1`);
7613	}
7614	else
7615	{
7616	printf("@RWD%02u", doffs);
7617	}
7618
7619	if (offs)
7620	{
7621	printf("%+Id", offs);
7622	}
7623	}
7624	else
7625	{
7626	printf("classVar[%#x]", emitComp->dspPtr(fldHnd));
7627
7628	if (offs)
7629	{
7630	printf("%+Id", offs);
7631	}
7632	}
7633
7634	printf("]");
7635
7636	if (emitComp->opts.varNames && offs < `0`)
7637	{
7638	printf("'%s", emitComp->eeGetFieldName(fldHnd));
7639	if (offs)
7640	{
7641	printf("%+Id", offs);
7642	}
7643	printf("'");
7644	}
7645	}
7646
7647	/*****************************************************************************
7648	*
7649	* Display a stack frame reference.
7650	*/
7651
7652	void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
7653	{
7654	int addr;
7655	bool bEBP;
7656
7657	printf("[");
7658
7659	if (!asmfm \|\| emitComp->lvaDoneFrameLayout == Compiler::NO_FRAME_LAYOUT)
7660	{
7661	if (varx < `0`)
7662	{
7663	printf("TEMP_%02u", -varx);
7664	}
7665	else
7666	{
7667	printf("V%02u", +varx);
7668	}
7669
7670	if (disp < `0`)
7671	{
7672	printf("-0x%X", -disp);
7673	}
7674	else if (disp > `0`)
7675	{
7676	printf("+0x%X", +disp);
7677	}
7678	}
7679
7680	if (emitComp->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
7681	{
7682	if (!asmfm)
7683	{
7684	printf(" ");
7685	}
7686
7687	addr = emitComp->lvaFrameAddress(varx, &bEBP) + disp;
7688
7689	if (bEBP)
7690	{
7691	printf(STR_FPBASE);
7692
7693	if (addr < `0`)
7694	{
7695	printf("-%02XH", -addr);
7696	}
7697	else if (addr > `0`)
7698	{
7699	printf("+%02XH", addr);
7700	}
7701	}
7702	else
7703	{
7704	/ Adjust the offset by amount currently pushed on the stack /
7705
7706	printf(STR_SPBASE);
7707
7708	if (addr < `0`)
7709	{
7710	printf("-%02XH", -addr);
7711	}
7712	else if (addr > `0`)
7713	{
7714	printf("+%02XH", addr);
7715	}
7716
7717	#if !FEATURE_FIXED_OUT_ARGS
7718
7719	if (emitCurStackLvl)
7720	printf("+%02XH", emitCurStackLvl);
7721
7722	#endif // !FEATURE_FIXED_OUT_ARGS
7723	}
7724	}
7725
7726	printf("]");
7727
7728	if (varx >= `0` && emitComp->opts.varNames)
7729	{
7730	LclVarDsc* varDsc;
7731	const char* varName;
7732
7733	assert((unsigned)varx < emitComp->lvaCount);
7734	varDsc = emitComp->lvaTable + varx;
7735	varName = emitComp->compLocalVarName(varx, offs);
7736
7737	if (varName)
7738	{
7739	printf("'%s", varName);
7740
7741	if (disp < `0`)
7742	{
7743	printf("-%d", -disp);
7744	}
7745	else if (disp > `0`)
7746	{
7747	printf("+%d", +disp);
7748	}
7749
7750	printf("'");
7751	}
7752	}
7753	}
7754
7755	/*****************************************************************************
7756	*
7757	* Display an reloc value
7758	* If we are formatting for an assembly listing don't print the hex value
7759	* since it will prevent us from doing assembly diffs
7760	*/
7761	void emitter::emitDispReloc(ssize_t value)
7762	{
7763	if (emitComp->opts.disAsm)
7764	{
7765	printf("(reloc)");
7766	}
7767	else
7768	{
7769	printf("(reloc 0x%Ix)", emitComp->dspPtr(value));
7770	}
7771	}
7772
7773	/*****************************************************************************
7774	*
7775	* Display an address mode.
7776	*/
7777
7778	void emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
7779	{
7780	bool nsep = false;
7781	ssize_t disp;
7782
7783	unsigned jtno = `0`;
7784	dataSection* jdsc = nullptr;
7785
7786	/ The displacement field is in an unusual place for calls /
7787
7788	disp = (id->idIns() == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
7789
7790	/ Display a jump table label if this is a switch table jump /
7791
7792	if (id->idIns() == INS_i_jmp)
7793	{
7794	UNATIVE_OFFSET offs = `0`;
7795
7796	/ Find the appropriate entry in the data section list /
7797
7798	for (jdsc = emitConsDsc.dsdList, jtno = `0`; jdsc; jdsc = jdsc->dsNext)
7799	{
7800	UNATIVE_OFFSET size = jdsc->dsSize;
7801
7802	/ Is this a label table? /
7803
7804	if (size & `1`)
7805	{
7806	size--;
7807	jtno++;
7808
7809	if (offs == id->idDebugOnlyInfo()->idMemCookie)
7810	{
7811	break;
7812	}
7813	}
7814
7815	offs += size;
7816	}
7817
7818	/ If we've found a matching entry then is a table jump /
7819
7820	if (jdsc)
7821	{
7822	if (id->idIsDspReloc())
7823	{
7824	printf("reloc ");
7825	}
7826	printf("J_M%03u_DS%02u", Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie);
7827	}
7828
7829	disp -= id->idDebugOnlyInfo()->idMemCookie;
7830	}
7831
7832	bool frameRef = false;
7833
7834	printf("[");
7835
7836	if (id->idAddr()->iiaAddrMode.amBaseReg != REG_NA)
7837	{
7838	printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
7839	nsep = true;
7840	if (id->idAddr()->iiaAddrMode.amBaseReg == REG_ESP)
7841	{
7842	frameRef = true;
7843	}
7844	else if (emitComp->isFramePointerUsed() && id->idAddr()->iiaAddrMode.amBaseReg == REG_EBP)
7845	{
7846	frameRef = true;
7847	}
7848	}
7849
7850	if (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA)
7851	{
7852	size_t scale = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
7853
7854	if (nsep)
7855	{
7856	printf("+");
7857	}
7858	if (scale > `1`)
7859	{
7860	printf("%u*", scale);
7861	}
7862	printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amIndxReg));
7863	nsep = true;
7864	}
7865
7866	if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp))
7867	{
7868	if (nsep)
7869	{
7870	printf("+");
7871	}
7872	emitDispReloc(disp);
7873	}
7874	else
7875	{
7876	// Munge any pointers if we want diff-able disassembly
7877	// It's assumed to be a pointer when disp is outside of the range (-1M, +1M); top bits are not 0 or -1
7878	if (!frameRef && emitComp->opts.disDiffable && (static_cast<size_t>((disp >> `20`) + `1`) > `1`))
7879	{
7880	if (nsep)
7881	{
7882	printf("+");
7883	}
7884	printf("D1FFAB1EH");
7885	}
7886	else if (disp > `0`)
7887	{
7888	if (nsep)
7889	{
7890	printf("+");
7891	}
7892	if (frameRef)
7893	{
7894	printf("%02XH", disp);
7895	}
7896	else if (disp < `1000`)
7897	{
7898	printf("%d", disp);
7899	}
7900	else if (disp <= `0xFFFF`)
7901	{
7902	printf("%04XH", disp);
7903	}
7904	else
7905	{
7906	printf("%08XH", disp);
7907	}
7908	}
7909	else if (disp < `0`)
7910	{
7911	if (frameRef)
7912	{
7913	printf("-%02XH", -disp);
7914	}
7915	else if (disp > -`1000`)
7916	{
7917	printf("-%d", -disp);
7918	}
7919	else if (disp >= -`0xFFFF`)
7920	{
7921	printf("-%04XH", -disp);
7922	}
7923	else if (disp < -`0xFFFFFF`)
7924	{
7925	if (nsep)
7926	{
7927	printf("+");
7928	}
7929	printf("%08XH", disp);
7930	}
7931	else
7932	{
7933	printf("-%08XH", -disp);
7934	}
7935	}
7936	else if (!nsep)
7937	{
7938	printf("%04XH", disp);
7939	}
7940	}
7941
7942	printf("]");
7943
7944	// pretty print string if it looks like one
7945	if ((id->idGCref() == GCT_GCREF) && (id->idIns() == INS_mov) && (id->idAddr()->iiaAddrMode.amBaseReg == REG_NA))
7946	{
7947	const wchar_t* str = emitComp->eeGetCPString(disp);
7948	if (str != nullptr)
7949	{
7950	printf(" '%S'", str);
7951	}
7952	}
7953
7954	if (jdsc && !noDetail)
7955	{
7956	unsigned cnt = (jdsc->dsSize - `1`) / TARGET_POINTER_SIZE;
7957	BasicBlock bbp = (BasicBlock)jdsc->dsCont;
7958
7959	#ifdef _TARGET_AMD64_
7960	#define SIZE_LETTER "Q"
7961	#else
7962	#define SIZE_LETTER "D"
7963	#endif
7964	printf("\n\n J_M%03u_DS%02u LABEL " SIZE_LETTER "WORD", Compiler::s_compMethodsCount, jtno);
7965
7966	/ Display the label table (it's stored as "BasicBlock" values) /*
7967
7968	do
7969	{
7970	insGroup* lab;
7971
7972	/ Convert the BasicBlock* value to an IG address /
7973
7974	lab = (insGroup)emitCodeGetCookie(bbp++);
7975	assert(lab);
7976
7977	printf("\n D" SIZE_LETTER " G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum);
7978	} while (--cnt);
7979	}
7980	}
7981
7982	/*****************************************************************************
7983	*
7984	* If the given instruction is a shift, display the 2nd operand.
7985	*/
7986
7987	void emitter::emitDispShift(instruction ins, int cnt)
7988	{
7989	switch (ins)
7990	{
7991	case INS_rcl_1:
7992	case INS_rcr_1:
7993	case INS_rol_1:
7994	case INS_ror_1:
7995	case INS_shl_1:
7996	case INS_shr_1:
7997	case INS_sar_1:
7998	printf(", 1");
7999	break;
8000
8001	case INS_rcl:
8002	case INS_rcr:
8003	case INS_rol:
8004	case INS_ror:
8005	case INS_shl:
8006	case INS_shr:
8007	case INS_sar:
8008	printf(", cl");
8009	break;
8010
8011	case INS_rcl_N:
8012	case INS_rcr_N:
8013	case INS_rol_N:
8014	case INS_ror_N:
8015	case INS_shl_N:
8016	case INS_shr_N:
8017	case INS_sar_N:
8018	printf(", %d", cnt);
8019	break;
8020
8021	default:
8022	break;
8023	}
8024	}
8025
8026	/*****************************************************************************
8027	*
8028	* Display (optionally) the bytes for the instruction encoding in hex
8029	*/
8030
8031	void emitter::emitDispInsHex(BYTE* code, size_t sz)
8032	{
8033	// We do not display the instruction hex if we want diff-able disassembly
8034	if (!emitComp->opts.disDiffable)
8035	{
8036	#ifdef _TARGET_AMD64_
8037	// how many bytes per instruction we format for
8038	const size_t digits = `10`;
8039	#else // _TARGET_X86
8040	const size_t digits = `6`;
8041	#endif
8042	printf(" ");
8043	for (unsigned i = `0`; i < sz; i++)
8044	{
8045	printf("%02X", (((BYTE)(code + i))));
8046	}
8047
8048	if (sz < digits)
8049	{
8050	printf("%.s", `2` (digits - sz), " ");
8051	}
8052	}
8053	}
8054
8055	/*****************************************************************************
8056	*
8057	* Display the given instruction.
8058	*/
8059
8060	void emitter::emitDispIns(
8061	instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
8062	{
8063	emitAttr attr;
8064	const char* sstr;
8065
8066	instruction ins = id->idIns();
8067
8068	if (emitComp->verbose)
8069	{
8070	unsigned idNum = id->idDebugOnlyInfo()->idNum;
8071	printf("IN%04x: ", idNum);
8072	}
8073
8074	#define ID_INFO_DSP_RELOC ((bool)(id->idIsDspReloc()))
8075
8076	/ Display a constant value if the instruction references one /
8077
8078	if (!isNew)
8079	{
8080	switch (id->idInsFmt())
8081	{
8082	int offs;
8083
8084	case IF_MRD_RRD:
8085	case IF_MWR_RRD:
8086	case IF_MRW_RRD:
8087
8088	case IF_RRD_MRD:
8089	case IF_RWR_MRD:
8090	case IF_RRW_MRD:
8091
8092	case IF_MRD_CNS:
8093	case IF_MWR_CNS:
8094	case IF_MRW_CNS:
8095	case IF_MRW_SHF:
8096
8097	case IF_MRD:
8098	case IF_MWR:
8099	case IF_MRW:
8100
8101	case IF_MRD_OFF:
8102
8103	/ Is this actually a reference to a data section? /
8104
8105	offs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
8106
8107	if (offs >= `0`)
8108	{
8109	void* addr;
8110
8111	/ Display a data section reference /
8112
8113	assert((unsigned)offs < emitConsDsc.dsdOffs);
8114	addr = emitConsBlock ? emitConsBlock + offs : nullptr;
8115
8116	#if 0
8117	// TODO-XArch-Cleanup: Fix or remove this code.
8118	/ Is the operand an integer or floating-point value? /
8119
8120	bool isFP = false;
8121
8122	if (CodeGen::instIsFP(id->idIns()))
8123	{
8124	switch (id->idIns())
8125	{
8126	case INS_fild:
8127	case INS_fildl:
8128	break;
8129
8130	default:
8131	isFP = true;
8132	break;
8133	}
8134	}
8135
8136	if (offs & `1`)
8137	printf("@CNS%02u", offs);
8138	else
8139	printf("@RWD%02u", offs);
8140
8141	printf(" ");
8142
8143	if (addr)
8144	{
8145	addr = `0`;
8146	// TODO-XArch-Bug?:
8147	// This was busted by switching the order
8148	// in which we output the code block vs.
8149	// the data blocks -- when we get here,
8150	// the data block has not been filled in
8151	// yet, so we'll display garbage.
8152
8153	if (isFP)
8154	{
8155	if (id->idOpSize() == EA_4BYTE)
8156	printf("DF %f \n", addr ? (float* *)addr : `0`);
8157	else
8158	printf("DQ %lf\n", addr ? (double* *)addr : `0`);
8159	}
8160	else
8161	{
8162	if (id->idOpSize() <= EA_4BYTE)
8163	printf("DD %d \n", addr ? (int* *)addr : `0`);
8164	else
8165	printf("DQ %D \n", addr ? (__int64* *)addr : `0`);
8166	}
8167	}
8168	#endif
8169	}
8170	break;
8171
8172	default:
8173	break;
8174	}
8175	}
8176
8177	// printf("[F=%s] " , emitIfName(id->idInsFmt()));
8178	// printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
8179	// printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
8180	// printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
8181	// printf("[A=%08X] " , emitSimpleStkMask);
8182	// printf("[A=%08X] " , emitSimpleByrefStkMask);
8183	// printf("[L=%02u] " , id->idCodeSize());
8184
8185	if (!emitComp->opts.dspEmit && !isNew && !asmfm)
8186	{
8187	doffs = true;
8188	}
8189
8190	/ Display the instruction offset /
8191
8192	emitDispInsOffs(offset, doffs);
8193
8194	if (code != nullptr)
8195	{
8196	/ Display the instruction hex code /
8197
8198	emitDispInsHex(code, sz);
8199	}
8200
8201	/ Display the instruction name /
8202
8203	sstr = codeGen->genInsName(ins);
8204
8205	if (IsAVXInstruction(ins) && !IsBMIInstruction(ins))
8206	{
8207	printf(" v%-8s", sstr);
8208	}
8209	else
8210	{
8211	printf(" %-9s", sstr);
8212	}
8213	#ifndef FEATURE_PAL
8214	if (strnlen_s(sstr, `10`) >= `8`)
8215	#else // FEATURE_PAL
8216	if (strnlen(sstr, `10`) >= `8`)
8217	#endif // FEATURE_PAL
8218	{
8219	printf(" ");
8220	}
8221
8222	/ By now the size better be set to something /
8223
8224	assert(emitInstCodeSz(id) \|\| emitInstHasNoCode(ins));
8225
8226	/ Figure out the operand size /
8227
8228	if (id->idGCref() == GCT_GCREF)
8229	{
8230	attr = EA_GCREF;
8231	sstr = "gword ptr ";
8232	}
8233	else if (id->idGCref() == GCT_BYREF)
8234	{
8235	attr = EA_BYREF;
8236	sstr = "bword ptr ";
8237	}
8238	else
8239	{
8240	attr = id->idOpSize();
8241	sstr = codeGen->genSizeStr(attr);
8242
8243	if (ins == INS_lea)
8244	{
8245	#ifdef _TARGET_AMD64_
8246	assert((attr == EA_4BYTE) \|\| (attr == EA_8BYTE));
8247	#else
8248	assert(attr == EA_4BYTE);
8249	#endif
8250	sstr = "";
8251	}
8252	}
8253
8254	/ Now see what instruction format we've got /
8255
8256	// First print the implicit register usage
8257	if (instrHasImplicitRegPairDest(ins))
8258	{
8259	printf("%s:%s, ", emitRegName(REG_EDX, id->idOpSize()), emitRegName(REG_EAX, id->idOpSize()));
8260	}
8261	else if (instrIs3opImul(ins))
8262	{
8263	regNumber tgtReg = inst3opImulReg(ins);
8264	printf("%s, ", emitRegName(tgtReg, id->idOpSize()));
8265	}
8266
8267	switch (id->idInsFmt())
8268	{
8269	ssize_t val;
8270	ssize_t offs;
8271	CnsVal cnsVal;
8272	const char* methodName;
8273
8274	case IF_CNS:
8275	val = emitGetInsSC(id);
8276	#ifdef _TARGET_AMD64_
8277	// no 8-byte immediates allowed here!
8278	assert((val >= (ssize_t)`0xFFFFFFFF80000000LL`) && (val <= `0x000000007FFFFFFFLL`));
8279	#endif
8280	if (id->idIsCnsReloc())
8281	{
8282	emitDispReloc(val);
8283	}
8284	else
8285	{
8286	PRINT_CONSTANT:
8287	// Munge any pointers if we want diff-able disassembly
8288	if (emitComp->opts.disDiffable)
8289	{
8290	ssize_t top14bits = (val >> `18`);
8291	if ((top14bits != `0`) && (top14bits != -`1`))
8292	{
8293	val = `0xD1FFAB1E`;
8294	}
8295	}
8296	if ((val > -`1000`) && (val < `1000`))
8297	{
8298	printf("%d", val);
8299	}
8300	else if ((val > `0`) \|\| (val < -`0xFFFFFF`))
8301	{
8302	printf("0x%IX", val);
8303	}
8304	else
8305	{ // (val < 0)
8306	printf("-0x%IX", -val);
8307	}
8308	}
8309	break;
8310
8311	case IF_ARD:
8312	case IF_AWR:
8313	case IF_ARW:
8314
8315	if (ins == INS_call && id->idIsCallRegPtr())
8316	{
8317	printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
8318	break;
8319	}
8320
8321	printf("%s", sstr);
8322	emitDispAddrMode(id, isNew);
8323	emitDispShift(ins);
8324
8325	if (ins == INS_call)
8326	{
8327	assert(id->idInsFmt() == IF_ARD);
8328
8329	/ Ignore indirect calls /
8330
8331	if (id->idDebugOnlyInfo()->idMemCookie == `0`)
8332	{
8333	break;
8334	}
8335
8336	assert(id->idDebugOnlyInfo()->idMemCookie);
8337
8338	/ This is a virtual call /
8339
8340	methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
8341	printf("%s", methodName);
8342	}
8343	break;
8344
8345	case IF_RRD_ARD:
8346	case IF_RWR_ARD:
8347	case IF_RRW_ARD:
8348	#ifdef _TARGET_AMD64_
8349	if (ins == INS_movsxd)
8350	{
8351	printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
8352	}
8353	else
8354	#endif
8355	if (ins == INS_movsx \|\| ins == INS_movzx)
8356	{
8357	printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
8358	}
8359	else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8360	{
8361	// The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8362	// This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8363	printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8364	}
8365	else
8366	{
8367	printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8368	}
8369	emitDispAddrMode(id);
8370	break;
8371
8372	case IF_RRW_ARD_CNS:
8373	case IF_RWR_ARD_CNS:
8374	{
8375	printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8376	emitDispAddrMode(id);
8377	emitGetInsAmdCns(id, &cnsVal);
8378
8379	val = cnsVal.cnsVal;
8380	printf(", ");
8381
8382	if (cnsVal.cnsReloc)
8383	{
8384	emitDispReloc(val);
8385	}
8386	else
8387	{
8388	goto PRINT_CONSTANT;
8389	}
8390
8391	break;
8392	}
8393
8394	case IF_AWR_RRD_CNS:
8395	{
8396	assert(ins == INS_vextracti128 \|\| ins == INS_vextractf128);
8397	// vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
8398	sstr = codeGen->genSizeStr(EA_ATTR(`16`));
8399	printf(sstr);
8400	emitDispAddrMode(id);
8401	printf(", %s", emitRegName(id->idReg1(), attr));
8402
8403	emitGetInsAmdCns(id, &cnsVal);
8404
8405	val = cnsVal.cnsVal;
8406	printf(", ");
8407
8408	if (cnsVal.cnsReloc)
8409	{
8410	emitDispReloc(val);
8411	}
8412	else
8413	{
8414	goto PRINT_CONSTANT;
8415	}
8416
8417	break;
8418	}
8419
8420	case IF_RWR_RRD_ARD:
8421	printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8422	emitDispAddrMode(id);
8423	break;
8424
8425	case IF_RWR_ARD_RRD:
8426	if (ins == INS_vpgatherqd \|\| ins == INS_vgatherqps)
8427	{
8428	attr = EA_16BYTE;
8429	}
8430	sstr = codeGen->genSizeStr(EA_ATTR(`4`));
8431	printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8432	emitDispAddrMode(id);
8433	printf(", %s", emitRegName(id->idReg2(), attr));
8434	break;
8435
8436	case IF_RWR_RRD_ARD_CNS:
8437	{
8438	printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8439	emitDispAddrMode(id);
8440	emitGetInsAmdCns(id, &cnsVal);
8441
8442	val = cnsVal.cnsVal;
8443	printf(", ");
8444
8445	if (cnsVal.cnsReloc)
8446	{
8447	emitDispReloc(val);
8448	}
8449	else
8450	{
8451	goto PRINT_CONSTANT;
8452	}
8453
8454	break;
8455	}
8456
8457	case IF_RWR_RRD_ARD_RRD:
8458	{
8459	printf("%s, ", emitRegName(id->idReg1(), attr));
8460	printf("%s, ", emitRegName(id->idReg2(), attr));
8461	emitDispAddrMode(id);
8462
8463	emitGetInsAmdCns(id, &cnsVal);
8464	val = (cnsVal.cnsVal >> `4`) + XMMBASE;
8465	printf(", %s", emitRegName((regNumber)val, attr));
8466	break;
8467	}
8468
8469	case IF_ARD_RRD:
8470	case IF_AWR_RRD:
8471	case IF_ARW_RRD:
8472
8473	printf("%s", sstr);
8474	emitDispAddrMode(id);
8475	printf(", %s", emitRegName(id->idReg1(), attr));
8476	break;
8477
8478	case IF_AWR_RRD_RRD:
8479	{
8480	printf("%s", sstr);
8481	emitDispAddrMode(id);
8482	printf(", %s", emitRegName(id->idReg1(), attr));
8483	printf(", %s", emitRegName(id->idReg2(), attr));
8484	break;
8485	}
8486
8487	case IF_ARD_CNS:
8488	case IF_AWR_CNS:
8489	case IF_ARW_CNS:
8490	case IF_ARW_SHF:
8491
8492	printf("%s", sstr);
8493	emitDispAddrMode(id);
8494	emitGetInsAmdCns(id, &cnsVal);
8495	val = cnsVal.cnsVal;
8496	#ifdef _TARGET_AMD64_
8497	// no 8-byte immediates allowed here!
8498	assert((val >= (ssize_t)`0xFFFFFFFF80000000LL`) && (val <= `0x000000007FFFFFFFLL`));
8499	#endif
8500	if (id->idInsFmt() == IF_ARW_SHF)
8501	{
8502	emitDispShift(ins, (BYTE)val);
8503	}
8504	else
8505	{
8506	printf(", ");
8507	if (cnsVal.cnsReloc)
8508	{
8509	emitDispReloc(val);
8510	}
8511	else
8512	{
8513	goto PRINT_CONSTANT;
8514	}
8515	}
8516	break;
8517
8518	case IF_SRD:
8519	case IF_SWR:
8520	case IF_SRW:
8521
8522	printf("%s", sstr);
8523
8524	#if !FEATURE_FIXED_OUT_ARGS
8525	if (ins == INS_pop)
8526	emitCurStackLvl -= sizeof(int);
8527	#endif
8528
8529	emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8530	id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8531
8532	#if !FEATURE_FIXED_OUT_ARGS
8533	if (ins == INS_pop)
8534	emitCurStackLvl += sizeof(int);
8535	#endif
8536
8537	emitDispShift(ins);
8538	break;
8539
8540	case IF_SRD_RRD:
8541	case IF_SWR_RRD:
8542	case IF_SRW_RRD:
8543
8544	printf("%s", sstr);
8545
8546	emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8547	id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8548
8549	printf(", %s", emitRegName(id->idReg1(), attr));
8550	break;
8551
8552	case IF_SRD_CNS:
8553	case IF_SWR_CNS:
8554	case IF_SRW_CNS:
8555	case IF_SRW_SHF:
8556
8557	printf("%s", sstr);
8558
8559	emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8560	id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8561
8562	emitGetInsCns(id, &cnsVal);
8563	val = cnsVal.cnsVal;
8564	#ifdef _TARGET_AMD64_
8565	// no 8-byte immediates allowed here!
8566	assert((val >= (ssize_t)`0xFFFFFFFF80000000LL`) && (val <= `0x000000007FFFFFFFLL`));
8567	#endif
8568	if (id->idInsFmt() == IF_SRW_SHF)
8569	{
8570	emitDispShift(ins, (BYTE)val);
8571	}
8572	else
8573	{
8574	printf(", ");
8575	if (cnsVal.cnsReloc)
8576	{
8577	emitDispReloc(val);
8578	}
8579	else
8580	{
8581	goto PRINT_CONSTANT;
8582	}
8583	}
8584	break;
8585
8586	case IF_RRD_SRD:
8587	case IF_RWR_SRD:
8588	case IF_RRW_SRD:
8589	#ifdef _TARGET_AMD64_
8590	if (ins == INS_movsxd)
8591	{
8592	printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
8593	}
8594	else
8595	#endif
8596	if (ins == INS_movsx \|\| ins == INS_movzx)
8597	{
8598	printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
8599	}
8600	else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8601	{
8602	// The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8603	// This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8604	printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8605	}
8606	else
8607	{
8608	printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8609	}
8610
8611	emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8612	id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8613
8614	break;
8615
8616	case IF_RRW_SRD_CNS:
8617	case IF_RWR_SRD_CNS:
8618	{
8619	printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8620	emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8621	id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8622	emitGetInsCns(id, &cnsVal);
8623
8624	val = cnsVal.cnsVal;
8625	printf(", ");
8626
8627	if (cnsVal.cnsReloc)
8628	{
8629	emitDispReloc(val);
8630	}
8631	else
8632	{
8633	goto PRINT_CONSTANT;
8634	}
8635	break;
8636	}
8637
8638	case IF_RWR_RRD_SRD:
8639	printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8640	emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8641	id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8642	break;
8643
8644	case IF_RWR_RRD_SRD_CNS:
8645	{
8646	printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8647	emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8648	id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8649	emitGetInsCns(id, &cnsVal);
8650
8651	val = cnsVal.cnsVal;
8652	printf(", ");
8653
8654	if (cnsVal.cnsReloc)
8655	{
8656	emitDispReloc(val);
8657	}
8658	else
8659	{
8660	goto PRINT_CONSTANT;
8661	}
8662	break;
8663	}
8664
8665	case IF_RWR_RRD_SRD_RRD:
8666	{
8667	printf("%s, ", emitRegName(id->idReg1(), attr));
8668	printf("%s, ", emitRegName(id->idReg2(), attr));
8669	emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
8670	id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
8671
8672	emitGetInsCns(id, &cnsVal);
8673	val = (cnsVal.cnsVal >> `4`) + XMMBASE;
8674	printf(", %s", emitRegName((regNumber)val, attr));
8675	break;
8676	}
8677
8678	case IF_RRD_RRD:
8679	case IF_RWR_RRD:
8680	case IF_RRW_RRD:
8681	if (ins == INS_mov_i2xmm)
8682	{
8683	printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8684	}
8685	else if (ins == INS_mov_xmm2i)
8686	{
8687	printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), EA_16BYTE));
8688	}
8689	else if (ins == INS_pmovmskb)
8690	{
8691	printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8692	}
8693	else if ((ins == INS_cvtsi2ss) \|\| (ins == INS_cvtsi2sd))
8694	{
8695	printf(" %s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
8696	}
8697	else if ((ins == INS_cvttsd2si) \|\| (ins == INS_cvtss2si) \|\| (ins == INS_cvtsd2si) \|\| (ins == INS_cvttss2si))
8698	{
8699	printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
8700	}
8701	#ifdef _TARGET_AMD64_
8702	else if (ins == INS_movsxd)
8703	{
8704	printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), emitRegName(id->idReg2(), EA_4BYTE));
8705	}
8706	#endif // _TARGET_AMD64_
8707	else if (ins == INS_movsx \|\| ins == INS_movzx)
8708	{
8709	printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr));
8710	}
8711	else if (ins == INS_bt)
8712	{
8713	// INS_bt operands are reversed. Display them in the normal order.
8714	printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), attr));
8715	}
8716	#ifdef FEATURE_HW_INTRINSICS
8717	else if (ins == INS_crc32 && attr != EA_8BYTE)
8718	{
8719	// The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8720	// This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8721	printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8722	}
8723	#endif // FEATURE_HW_INTRINSICS
8724	else
8725	{
8726	printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr));
8727	}
8728	break;
8729
8730	case IF_RRW_RRW:
8731	assert(ins == INS_xchg);
8732	printf("%s,", emitRegName(id->idReg1(), attr));
8733	printf(" %s", emitRegName(id->idReg2(), attr));
8734	break;
8735
8736	case IF_RWR_RRD_RRD:
8737	{
8738	assert(IsAVXInstruction(ins));
8739	assert(IsThreeOperandAVXInstruction(ins));
8740	regNumber reg2 = id->idReg2();
8741	regNumber reg3 = id->idReg3();
8742	if (ins == INS_bextr \|\| ins == INS_bzhi)
8743	{
8744	// BMI bextr and bzhi encodes the reg2 in VEX.vvvv and reg3 in modRM,
8745	// which is different from most of other instructions
8746	regNumber tmp = reg2;
8747	reg2 = reg3;
8748	reg3 = tmp;
8749	}
8750	printf("%s, ", emitRegName(id->idReg1(), attr));
8751	printf("%s, ", emitRegName(reg2, attr));
8752	printf("%s", emitRegName(reg3, attr));
8753	break;
8754	}
8755
8756	case IF_RWR_RRD_RRD_CNS:
8757	assert(IsAVXInstruction(ins));
8758	assert(IsThreeOperandAVXInstruction(ins));
8759	printf("%s, ", emitRegName(id->idReg1(), attr));
8760	printf("%s, ", emitRegName(id->idReg2(), attr));
8761	printf("%s, ", emitRegName(id->idReg3(), attr));
8762	val = emitGetInsSC(id);
8763	goto PRINT_CONSTANT;
8764	break;
8765	case IF_RWR_RRD_RRD_RRD:
8766	assert(IsAVXOnlyInstruction(ins));
8767	assert(UseVEXEncoding());
8768	printf("%s, ", emitRegName(id->idReg1(), attr));
8769	printf("%s, ", emitRegName(id->idReg2(), attr));
8770	printf("%s, ", emitRegName(id->idReg3(), attr));
8771	printf("%s", emitRegName(id->idReg4(), attr));
8772	break;
8773	case IF_RRW_RRW_CNS:
8774	printf("%s,", emitRegName(id->idReg1(), attr));
8775	printf(" %s", emitRegName(id->idReg2(), attr));
8776	val = emitGetInsSC(id);
8777	#ifdef _TARGET_AMD64_
8778	// no 8-byte immediates allowed here!
8779	assert((val >= (ssize_t)`0xFFFFFFFF80000000LL`) && (val <= `0x000000007FFFFFFFLL`));
8780	#endif
8781	printf(", ");
8782	if (id->idIsCnsReloc())
8783	{
8784	emitDispReloc(val);
8785	}
8786	else
8787	{
8788	goto PRINT_CONSTANT;
8789	}
8790	break;
8791
8792	case IF_RRD:
8793	case IF_RWR:
8794	case IF_RRW:
8795	printf("%s", emitRegName(id->idReg1(), attr));
8796	emitDispShift(ins);
8797	break;
8798
8799	case IF_RRW_SHF:
8800	printf("%s", emitRegName(id->idReg1(), attr));
8801	emitDispShift(ins, (BYTE)emitGetInsSC(id));
8802	break;
8803
8804	case IF_RRD_MRD:
8805	case IF_RWR_MRD:
8806	case IF_RRW_MRD:
8807
8808	if (ins == INS_movsx \|\| ins == INS_movzx)
8809	{
8810	attr = EA_PTRSIZE;
8811	}
8812	#ifdef _TARGET_AMD64_
8813	else if (ins == INS_movsxd)
8814	{
8815	attr = EA_PTRSIZE;
8816	}
8817	#endif
8818	else if ((ins == INS_crc32) && (attr != EA_8BYTE))
8819	{
8820	// The idReg1 is always 4 bytes, but the size of idReg2 can vary.
8821	// This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx`
8822	printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
8823	}
8824	printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8825	offs = emitGetInsDsp(id);
8826	emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8827	break;
8828
8829	case IF_RRW_MRD_CNS:
8830	case IF_RWR_MRD_CNS:
8831	{
8832	printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
8833	offs = emitGetInsDsp(id);
8834	emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8835	emitGetInsDcmCns(id, &cnsVal);
8836
8837	val = cnsVal.cnsVal;
8838	printf(", ");
8839
8840	if (cnsVal.cnsReloc)
8841	{
8842	emitDispReloc(val);
8843	}
8844	else
8845	{
8846	goto PRINT_CONSTANT;
8847	}
8848	break;
8849	}
8850
8851	case IF_MWR_RRD_CNS:
8852	{
8853	assert(ins == INS_vextracti128 \|\| ins == INS_vextractf128);
8854	// vextracti/f128 extracts 128-bit data, so we fix sstr as "xmm ptr"
8855	sstr = codeGen->genSizeStr(EA_ATTR(`16`));
8856	printf(sstr);
8857	offs = emitGetInsDsp(id);
8858	emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8859	printf(", %s", emitRegName(id->idReg1(), attr));
8860	emitGetInsDcmCns(id, &cnsVal);
8861
8862	val = cnsVal.cnsVal;
8863	printf(", ");
8864
8865	if (cnsVal.cnsReloc)
8866	{
8867	emitDispReloc(val);
8868	}
8869	else
8870	{
8871	goto PRINT_CONSTANT;
8872	}
8873
8874	break;
8875	}
8876
8877	case IF_RWR_RRD_MRD:
8878	printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8879	offs = emitGetInsDsp(id);
8880	emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8881	break;
8882
8883	case IF_RWR_RRD_MRD_CNS:
8884	{
8885	printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
8886	offs = emitGetInsDsp(id);
8887	emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8888	emitGetInsDcmCns(id, &cnsVal);
8889
8890	val = cnsVal.cnsVal;
8891	printf(", ");
8892
8893	if (cnsVal.cnsReloc)
8894	{
8895	emitDispReloc(val);
8896	}
8897	else
8898	{
8899	goto PRINT_CONSTANT;
8900	}
8901	break;
8902	}
8903
8904	case IF_RWR_RRD_MRD_RRD:
8905	{
8906	printf("%s, ", emitRegName(id->idReg1(), attr));
8907	printf("%s, ", emitRegName(id->idReg2(), attr));
8908
8909	offs = emitGetInsDsp(id);
8910	emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8911
8912	emitGetInsDcmCns(id, &cnsVal);
8913	val = (cnsVal.cnsVal >> `4`) + XMMBASE;
8914	printf(", %s", emitRegName((regNumber)val, attr));
8915	break;
8916	}
8917
8918	case IF_RWR_MRD_OFF:
8919
8920	printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
8921	offs = emitGetInsDsp(id);
8922	emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8923	break;
8924
8925	case IF_MRD_RRD:
8926	case IF_MWR_RRD:
8927	case IF_MRW_RRD:
8928
8929	printf("%s", sstr);
8930	offs = emitGetInsDsp(id);
8931	emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8932	printf(", %s", emitRegName(id->idReg1(), attr));
8933	break;
8934
8935	case IF_MRD_CNS:
8936	case IF_MWR_CNS:
8937	case IF_MRW_CNS:
8938	case IF_MRW_SHF:
8939
8940	printf("%s", sstr);
8941	offs = emitGetInsDsp(id);
8942	emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8943	emitGetInsDcmCns(id, &cnsVal);
8944	val = cnsVal.cnsVal;
8945	#ifdef _TARGET_AMD64_
8946	// no 8-byte immediates allowed here!
8947	assert((val >= (ssize_t)`0xFFFFFFFF80000000LL`) && (val <= `0x000000007FFFFFFFLL`));
8948	#endif
8949	if (cnsVal.cnsReloc)
8950	{
8951	emitDispReloc(val);
8952	}
8953	else if (id->idInsFmt() == IF_MRW_SHF)
8954	{
8955	emitDispShift(ins, (BYTE)val);
8956	}
8957	else
8958	{
8959	printf(", ");
8960	goto PRINT_CONSTANT;
8961	}
8962	break;
8963
8964	case IF_MRD:
8965	case IF_MWR:
8966	case IF_MRW:
8967
8968	printf("%s", sstr);
8969	offs = emitGetInsDsp(id);
8970	emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8971	emitDispShift(ins);
8972	break;
8973
8974	case IF_MRD_OFF:
8975
8976	printf("offset ");
8977	offs = emitGetInsDsp(id);
8978	emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
8979	break;
8980
8981	case IF_RRD_CNS:
8982	case IF_RWR_CNS:
8983	case IF_RRW_CNS:
8984	printf("%s, ", emitRegName(id->idReg1(), attr));
8985	val = emitGetInsSC(id);
8986	if (id->idIsCnsReloc())
8987	{
8988	emitDispReloc(val);
8989	}
8990	else
8991	{
8992	goto PRINT_CONSTANT;
8993	}
8994	break;
8995
8996	case IF_LABEL:
8997	case IF_RWR_LABEL:
8998	case IF_SWR_LABEL:
8999
9000	if (ins == INS_lea)
9001	{
9002	printf("%s, ", emitRegName(id->idReg1(), attr));
9003	}
9004	else if (ins == INS_mov)
9005	{
9006	/ mov dword ptr [frame.callSiteReturnAddress], label /
9007	assert(id->idInsFmt() == IF_SWR_LABEL);
9008	instrDescLbl* idlbl = (instrDescLbl*)id;
9009
9010	emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), `0`, asmfm);
9011
9012	printf(", ");
9013	}
9014
9015	if (((instrDescJmp*)id)->idjShort)
9016	{
9017	printf("SHORT ");
9018	}
9019
9020	if (id->idIsBound())
9021	{
9022	printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
9023	}
9024	else
9025	{
9026	printf("L_M%03u_" FMT_BB, Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
9027	}
9028	break;
9029
9030	case IF_METHOD:
9031	case IF_METHPTR:
9032	if (id->idIsCallAddr())
9033	{
9034	offs = (ssize_t)id->idAddr()->iiaAddr;
9035	methodName = "";
9036	}
9037	else
9038	{
9039	offs = `0`;
9040	methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
9041	}
9042
9043	if (id->idInsFmt() == IF_METHPTR)
9044	{
9045	printf("[");
9046	}
9047
9048	if (offs)
9049	{
9050	if (id->idIsDspReloc())
9051	{
9052	printf("reloc ");
9053	}
9054	printf("%08X", offs);
9055	}
9056	else
9057	{
9058	printf("%s", methodName);
9059	}
9060
9061	if (id->idInsFmt() == IF_METHPTR)
9062	{
9063	printf("]");
9064	}
9065
9066	break;
9067
9068	case IF_NONE:
9069	break;
9070
9071	default:
9072	printf("unexpected format %s", emitIfName(id->idInsFmt()));
9073	assert(!"unexpectedFormat");
9074	break;
9075	}
9076
9077	if (sz != `0` && sz != id->idCodeSize() && (!asmfm \|\| emitComp->verbose))
9078	{
9079	// Code size in the instrDesc is different from the actual code size we've been given!
9080	printf(" (ECS:%d, ACS:%d)", id->idCodeSize(), sz);
9081	}
9082
9083	printf("\n");
9084	}
9085
9086	/***************************************************************************/
9087	#endif
9088
9089	/*****************************************************************************
9090	*
9091	* Output nBytes bytes of NOP instructions
9092	*/
9093
9094	static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes)
9095	{
9096	assert(nBytes <= `15`);
9097
9098	#ifndef _TARGET_AMD64_
9099	// TODO-X86-CQ: when VIA C3 CPU's are out of circulation, switch to the
9100	// more efficient real NOP: 0x0F 0x1F +modR/M
9101	// Also can't use AMD recommended, multiple size prefixes (i.e. 0x66 0x66 0x90 for 3 byte NOP)
9102	// because debugger and msdis don't like it, so maybe VIA doesn't either
9103	// So instead just stick to repeating single byte nops
9104
9105	switch (nBytes)
9106	{
9107	case `15`:
9108	*dst++ = `0x90`;
9109	__fallthrough;
9110	case `14`:
9111	*dst++ = `0x90`;
9112	__fallthrough;
9113	case `13`:
9114	*dst++ = `0x90`;
9115	__fallthrough;
9116	case `12`:
9117	*dst++ = `0x90`;
9118	__fallthrough;
9119	case `11`:
9120	*dst++ = `0x90`;
9121	__fallthrough;
9122	case `10`:
9123	*dst++ = `0x90`;
9124	__fallthrough;
9125	case `9`:
9126	*dst++ = `0x90`;
9127	__fallthrough;
9128	case `8`:
9129	*dst++ = `0x90`;
9130	__fallthrough;
9131	case `7`:
9132	*dst++ = `0x90`;
9133	__fallthrough;
9134	case `6`:
9135	*dst++ = `0x90`;
9136	__fallthrough;
9137	case `5`:
9138	*dst++ = `0x90`;
9139	__fallthrough;
9140	case `4`:
9141	*dst++ = `0x90`;
9142	__fallthrough;
9143	case `3`:
9144	*dst++ = `0x90`;
9145	__fallthrough;
9146	case `2`:
9147	*dst++ = `0x90`;
9148	__fallthrough;
9149	case `1`:
9150	*dst++ = `0x90`;
9151	break;
9152	case `0`:
9153	break;
9154	}
9155	#else // _TARGET_AMD64_
9156	switch (nBytes)
9157	{
9158	case `2`:
9159	*dst++ = `0x66`;
9160	__fallthrough;
9161	case `1`:
9162	*dst++ = `0x90`;
9163	break;
9164	case `0`:
9165	break;
9166	case `3`:
9167	*dst++ = `0x0F`;
9168	*dst++ = `0x1F`;
9169	*dst++ = `0x00`;
9170	break;
9171	case `4`:
9172	*dst++ = `0x0F`;
9173	*dst++ = `0x1F`;
9174	*dst++ = `0x40`;
9175	*dst++ = `0x00`;
9176	break;
9177	case `6`:
9178	*dst++ = `0x66`;
9179	__fallthrough;
9180	case `5`:
9181	*dst++ = `0x0F`;
9182	*dst++ = `0x1F`;
9183	*dst++ = `0x44`;
9184	*dst++ = `0x00`;
9185	*dst++ = `0x00`;
9186	break;
9187	case `7`:
9188	*dst++ = `0x0F`;
9189	*dst++ = `0x1F`;
9190	*dst++ = `0x80`;
9191	*dst++ = `0x00`;
9192	*dst++ = `0x00`;
9193	*dst++ = `0x00`;
9194	*dst++ = `0x00`;
9195	break;
9196	case `15`:
9197	// More than 3 prefixes is slower than just 2 NOPs
9198	dst = emitOutputNOP(emitOutputNOP(dst, `7`), `8`);
9199	break;
9200	case `14`:
9201	// More than 3 prefixes is slower than just 2 NOPs
9202	dst = emitOutputNOP(emitOutputNOP(dst, `7`), `7`);
9203	break;
9204	case `13`:
9205	// More than 3 prefixes is slower than just 2 NOPs
9206	dst = emitOutputNOP(emitOutputNOP(dst, `5`), `8`);
9207	break;
9208	case `12`:
9209	// More than 3 prefixes is slower than just 2 NOPs
9210	dst = emitOutputNOP(emitOutputNOP(dst, `4`), `8`);
9211	break;
9212	case `11`:
9213	*dst++ = `0x66`;
9214	__fallthrough;
9215	case `10`:
9216	*dst++ = `0x66`;
9217	__fallthrough;
9218	case `9`:
9219	*dst++ = `0x66`;
9220	__fallthrough;
9221	case `8`:
9222	*dst++ = `0x0F`;
9223	*dst++ = `0x1F`;
9224	*dst++ = `0x84`;
9225	*dst++ = `0x00`;
9226	*dst++ = `0x00`;
9227	*dst++ = `0x00`;
9228	*dst++ = `0x00`;
9229	*dst++ = `0x00`;
9230	break;
9231	}
9232	#endif // _TARGET_AMD64_
9233
9234	return dst;
9235	}
9236
9237	/*****************************************************************************
9238	*
9239	* Output an instruction involving an address mode.
9240	*/
9241
9242	BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
9243	{
9244	regNumber reg;
9245	regNumber rgx;
9246	ssize_t dsp;
9247	bool dspInByte;
9248	bool dspIsZero;
9249
9250	instruction ins = id->idIns();
9251	emitAttr size = id->idOpSize();
9252	size_t opsz = EA_SIZE_IN_BYTES(size);
9253
9254	// Get the base/index registers
9255	reg = id->idAddr()->iiaAddrMode.amBaseReg;
9256	rgx = id->idAddr()->iiaAddrMode.amIndxReg;
9257
9258	// For INS_call the instruction size is actually the return value size
9259	if (ins == INS_call)
9260	{
9261	// Special case: call via a register
9262	if (id->idIsCallRegPtr())
9263	{
9264	code_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call));
9265
9266	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode);
9267	dst += emitOutputWord(dst, opcode);
9268	goto DONE;
9269	}
9270
9271	// The displacement field is in an unusual place for calls
9272	dsp = emitGetInsCIdisp(id);
9273
9274	#ifdef _TARGET_AMD64_
9275
9276	// Compute the REX prefix if it exists
9277	if (IsExtendedReg(reg, EA_PTRSIZE))
9278	{
9279	insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
9280	// TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9281	reg = (regNumber)RegEncoding(reg);
9282	}
9283
9284	if (IsExtendedReg(rgx, EA_PTRSIZE))
9285	{
9286	insEncodeRegSIB(ins, rgx, &code);
9287	// TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9288	rgx = (regNumber)RegEncoding(rgx);
9289	}
9290
9291	// And emit the REX prefix
9292	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9293
9294	#endif // _TARGET_AMD64_
9295
9296	goto GOT_DSP;
9297	}
9298
9299	// Is there a large constant operand?
9300	if (addc && (size > EA_1BYTE))
9301	{
9302	ssize_t cval = addc->cnsVal;
9303
9304	// Does the constant fit in a byte?
9305	if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
9306	{
9307	if (id->idInsFmt() != IF_ARW_SHF)
9308	{
9309	code \|= `2`;
9310	}
9311
9312	opsz = `1`;
9313	}
9314	}
9315
9316	// Emit VEX prefix if required
9317	// There are some callers who already add VEX prefix and call this routine.
9318	// Therefore, add VEX prefix is one is not already present.
9319	code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
9320
9321	// For this format, moves do not support a third operand, so we only need to handle the binary ops.
9322	if (TakesVexPrefix(ins))
9323	{
9324	if (IsDstDstSrcAVXInstruction(ins))
9325	{
9326	regNumber src1 = REG_NA;
9327
9328	switch (id->idInsFmt())
9329	{
9330	case IF_RWR_RRD_ARD:
9331	case IF_RWR_ARD_RRD:
9332	case IF_RWR_RRD_ARD_CNS:
9333	case IF_RWR_RRD_ARD_RRD:
9334	{
9335	src1 = id->idReg2();
9336	break;
9337	}
9338
9339	default:
9340	{
9341	src1 = id->idReg1();
9342	break;
9343	}
9344	}
9345
9346	// encode source operand reg in 'vvvv' bits in 1's complement form
9347	code = insEncodeReg3456(ins, src1, size, code);
9348	}
9349	else if (IsDstSrcSrcAVXInstruction(ins))
9350	{
9351	code = insEncodeReg3456(ins, id->idReg2(), size, code);
9352	}
9353	}
9354
9355	// Emit the REX prefix if required
9356	if (TakesRexWPrefix(ins, size))
9357	{
9358	code = AddRexWPrefix(ins, code);
9359	}
9360
9361	if (IsExtendedReg(reg, EA_PTRSIZE))
9362	{
9363	insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
9364	// TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9365	reg = (regNumber)RegEncoding(reg);
9366	}
9367
9368	if (IsExtendedReg(rgx, EA_PTRSIZE))
9369	{
9370	insEncodeRegSIB(ins, rgx, &code);
9371	// TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
9372	rgx = (regNumber)RegEncoding(rgx);
9373	}
9374
9375	// Special case emitting AVX instructions
9376	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
9377	{
9378	if ((ins == INS_crc32) && (size > EA_1BYTE))
9379	{
9380	code \|= `0x0100`;
9381
9382	if (size == EA_2BYTE)
9383	{
9384	dst += emitOutputByte(dst, `0x66`);
9385	}
9386	}
9387
9388	regNumber reg345 = REG_NA;
9389	if (IsBMIInstruction(ins))
9390	{
9391	reg345 = getBmiRegNumber(ins);
9392	}
9393	if (reg345 == REG_NA)
9394	{
9395	switch (id->idInsFmt())
9396	{
9397	case IF_AWR_RRD_RRD:
9398	{
9399	reg345 = id->idReg2();
9400	break;
9401	}
9402
9403	default:
9404	{
9405	reg345 = id->idReg1();
9406	break;
9407	}
9408	}
9409	}
9410	unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
9411
9412	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9413
9414	if (UseVEXEncoding() && (ins != INS_crc32))
9415	{
9416	// Emit last opcode byte
9417	// TODO-XArch-CQ: Right now support 4-byte opcode instructions only
9418	assert((code & `0xFF`) == `0`);
9419	dst += emitOutputByte(dst, (code >> `8`) & `0xFF`);
9420	}
9421	else
9422	{
9423	dst += emitOutputWord(dst, code >> `16`);
9424	dst += emitOutputWord(dst, code & `0xFFFF`);
9425	}
9426
9427	code = regcode;
9428	}
9429	// Is this a 'big' opcode?
9430	else if (code & `0xFF000000`)
9431	{
9432	// Output the REX prefix
9433	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9434
9435	// Output the highest word of the opcode
9436	// We need to check again as in case of AVX instructions leading opcode bytes are stripped off
9437	// and encoded as part of VEX prefix.
9438	if (code & `0xFF000000`)
9439	{
9440	dst += emitOutputWord(dst, code >> `16`);
9441	code &= `0x0000FFFF`;
9442	}
9443	}
9444	else if (code & `0x00FF0000`)
9445	{
9446	// BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix.
9447	assert(ins != INS_bt);
9448
9449	// Output the REX prefix
9450	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9451
9452	// Output the highest byte of the opcode
9453	if (code & `0x00FF0000`)
9454	{
9455	dst += emitOutputByte(dst, code >> `16`);
9456	code &= `0x0000FFFF`;
9457	}
9458
9459	// Use the large version if this is not a byte. This trick will not
9460	// work in case of SSE2 and AVX instructions.
9461	if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSEInstruction(ins) && !IsAVXInstruction(ins))
9462	{
9463	code++;
9464	}
9465	}
9466	else if (CodeGen::instIsFP(ins))
9467	{
9468	assert(size == EA_4BYTE \|\| size == EA_8BYTE);
9469	if (size == EA_8BYTE)
9470	{
9471	code += `4`;
9472	}
9473	}
9474	else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins))
9475	{
9476	/ Is the operand size larger than a byte? /
9477
9478	switch (size)
9479	{
9480	case EA_1BYTE:
9481	break;
9482
9483	case EA_2BYTE:
9484
9485	/ Output a size prefix for a 16-bit operand /
9486
9487	dst += emitOutputByte(dst, `0x66`);
9488
9489	__fallthrough;
9490
9491	case EA_4BYTE:
9492	#ifdef _TARGET_AMD64_
9493	case EA_8BYTE:
9494	#endif
9495
9496	/ Set the 'w' bit to get the large version /
9497
9498	code \|= `0x1`;
9499	break;
9500
9501	#ifdef _TARGET_X86_
9502	case EA_8BYTE:
9503
9504	/ Double operand - set the appropriate bit /
9505
9506	code \|= `0x04`;
9507	break;
9508
9509	#endif // _TARGET_X86_
9510
9511	default:
9512	NO_WAY("unexpected size");
9513	break;
9514	}
9515	}
9516
9517	// Output the REX prefix
9518	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
9519
9520	// Get the displacement value
9521	dsp = emitGetInsAmdAny(id);
9522
9523	GOT_DSP:
9524
9525	dspInByte = ((signed char)dsp == (ssize_t)dsp);
9526	dspIsZero = (dsp == `0`);
9527
9528	if (id->idIsDspReloc())
9529	{
9530	dspInByte = false; // relocs can't be placed in a byte
9531	}
9532
9533	// Is there a [scaled] index component?
9534	if (rgx == REG_NA)
9535	{
9536	// The address is of the form "[reg+disp]"
9537	switch (reg)
9538	{
9539	case REG_NA:
9540	{
9541	if (id->idIsDspReloc())
9542	{
9543	INT32 addlDelta = `0`;
9544
9545	// The address is of the form "[disp]"
9546	// On x86 - disp is relative to zero
9547	// On Amd64 - disp is relative to RIP
9548	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
9549	{
9550	dst += emitOutputByte(dst, code \| `0x05`);
9551	}
9552	else
9553	{
9554	dst += emitOutputWord(dst, code \| `0x0500`);
9555	}
9556
9557	if (addc)
9558	{
9559	// It is of the form "ins [disp], immed"
9560	// For emitting relocation, we also need to take into account of the
9561	// additional bytes of code emitted for immed val.
9562
9563	ssize_t cval = addc->cnsVal;
9564
9565	#ifdef _TARGET_AMD64_
9566	// all these opcodes only take a sign-extended 4-byte immediate
9567	noway_assert(opsz < `8` \|\| ((int)cval == cval && !addc->cnsReloc));
9568	#else //_TARGET_X86_
9569	noway_assert(opsz <= `4`);
9570	#endif //_TARGET_X86_
9571
9572	switch (opsz)
9573	{
9574	case `0`:
9575	case `4`:
9576	case `8`:
9577	addlDelta = -`4`;
9578	break;
9579	case `2`:
9580	addlDelta = -`2`;
9581	break;
9582	case `1`:
9583	addlDelta = -`1`;
9584	break;
9585
9586	default:
9587	assert(!"unexpected operand size");
9588	unreached();
9589	}
9590	}
9591
9592	#ifdef _TARGET_AMD64_
9593	// We emit zero on Amd64, to avoid the assert in emitOutputLong()
9594	dst += emitOutputLong(dst, `0`);
9595	#else
9596	dst += emitOutputLong(dst, dsp);
9597	#endif
9598	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_DISP32, `0`,
9599	addlDelta);
9600	}
9601	else
9602	{
9603	#ifdef _TARGET_X86_
9604	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
9605	{
9606	dst += emitOutputByte(dst, code \| `0x05`);
9607	}
9608	else
9609	{
9610	dst += emitOutputWord(dst, code \| `0x0500`);
9611	}
9612	#else //_TARGET_AMD64_
9613	// Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
9614	// This addr mode should never be used while generating relocatable ngen code nor if
9615	// the addr can be encoded as pc-relative address.
9616	noway_assert(!emitComp->opts.compReloc);
9617	noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32);
9618	noway_assert((int)dsp == dsp);
9619
9620	// This requires, specifying a SIB byte after ModRM byte.
9621	if (EncodedBySSE38orSSE3A(ins))
9622	{
9623	dst += emitOutputByte(dst, code \| `0x04`);
9624	}
9625	else
9626	{
9627	dst += emitOutputWord(dst, code \| `0x0400`);
9628	}
9629	dst += emitOutputByte(dst, `0x25`);
9630	#endif //_TARGET_AMD64_
9631	dst += emitOutputLong(dst, dsp);
9632	}
9633	break;
9634	}
9635
9636	case REG_EBP:
9637	{
9638	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
9639	{
9640	// Does the offset fit in a byte?
9641	if (dspInByte)
9642	{
9643	dst += emitOutputByte(dst, code \| `0x45`);
9644	dst += emitOutputByte(dst, dsp);
9645	}
9646	else
9647	{
9648	dst += emitOutputByte(dst, code \| `0x85`);
9649	dst += emitOutputLong(dst, dsp);
9650
9651	if (id->idIsDspReloc())
9652	{
9653	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_HIGHLOW);
9654	}
9655	}
9656	}
9657	else
9658	{
9659	// Does the offset fit in a byte?
9660	if (dspInByte)
9661	{
9662	dst += emitOutputWord(dst, code \| `0x4500`);
9663	dst += emitOutputByte(dst, dsp);
9664	}
9665	else
9666	{
9667	dst += emitOutputWord(dst, code \| `0x8500`);
9668	dst += emitOutputLong(dst, dsp);
9669
9670	if (id->idIsDspReloc())
9671	{
9672	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_HIGHLOW);
9673	}
9674	}
9675	}
9676	break;
9677	}
9678
9679	case REG_ESP:
9680	{
9681	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
9682	{
9683	// Is the offset 0 or does it at least fit in a byte?
9684	if (dspIsZero)
9685	{
9686	dst += emitOutputByte(dst, code \| `0x04`);
9687	dst += emitOutputByte(dst, `0x24`);
9688	}
9689	else if (dspInByte)
9690	{
9691	dst += emitOutputByte(dst, code \| `0x44`);
9692	dst += emitOutputByte(dst, `0x24`);
9693	dst += emitOutputByte(dst, dsp);
9694	}
9695	else
9696	{
9697	dst += emitOutputByte(dst, code \| `0x84`);
9698	dst += emitOutputByte(dst, `0x24`);
9699	dst += emitOutputLong(dst, dsp);
9700	if (id->idIsDspReloc())
9701	{
9702	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_HIGHLOW);
9703	}
9704	}
9705	}
9706	else
9707	{
9708	// Is the offset 0 or does it at least fit in a byte?
9709	if (dspIsZero)
9710	{
9711	dst += emitOutputWord(dst, code \| `0x0400`);
9712	dst += emitOutputByte(dst, `0x24`);
9713	}
9714	else if (dspInByte)
9715	{
9716	dst += emitOutputWord(dst, code \| `0x4400`);
9717	dst += emitOutputByte(dst, `0x24`);
9718	dst += emitOutputByte(dst, dsp);
9719	}
9720	else
9721	{
9722	dst += emitOutputWord(dst, code \| `0x8400`);
9723	dst += emitOutputByte(dst, `0x24`);
9724	dst += emitOutputLong(dst, dsp);
9725	if (id->idIsDspReloc())
9726	{
9727	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_HIGHLOW);
9728	}
9729	}
9730	}
9731	break;
9732	}
9733
9734	default:
9735	{
9736	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
9737	{
9738	// Put the register in the opcode
9739	code \|= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr);
9740
9741	// Is there a displacement?
9742	if (dspIsZero)
9743	{
9744	// This is simply "[reg]"
9745	dst += emitOutputByte(dst, code);
9746	}
9747	else
9748	{
9749	// This is [reg + dsp]" -- does the offset fit in a byte?
9750	if (dspInByte)
9751	{
9752	dst += emitOutputByte(dst, code \| `0x40`);
9753	dst += emitOutputByte(dst, dsp);
9754	}
9755	else
9756	{
9757	dst += emitOutputByte(dst, code \| `0x80`);
9758	dst += emitOutputLong(dst, dsp);
9759	if (id->idIsDspReloc())
9760	{
9761	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_HIGHLOW);
9762	}
9763	}
9764	}
9765	}
9766	else
9767	{
9768	// Put the register in the opcode
9769	code \|= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) << `8`;
9770
9771	// Is there a displacement?
9772	if (dspIsZero)
9773	{
9774	// This is simply "[reg]"
9775	dst += emitOutputWord(dst, code);
9776	}
9777	else
9778	{
9779	// This is [reg + dsp]" -- does the offset fit in a byte?
9780	if (dspInByte)
9781	{
9782	dst += emitOutputWord(dst, code \| `0x4000`);
9783	dst += emitOutputByte(dst, dsp);
9784	}
9785	else
9786	{
9787	dst += emitOutputWord(dst, code \| `0x8000`);
9788	dst += emitOutputLong(dst, dsp);
9789	if (id->idIsDspReloc())
9790	{
9791	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_HIGHLOW);
9792	}
9793	}
9794	}
9795	}
9796
9797	break;
9798	}
9799	}
9800	}
9801	else
9802	{
9803	unsigned regByte;
9804
9805	// We have a scaled index operand
9806	unsigned mul = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
9807
9808	// Is the index operand scaled?
9809	if (mul > `1`)
9810	{
9811	// Is there a base register?
9812	if (reg != REG_NA)
9813	{
9814	// The address is "[reg + {2/4/8} rgx + icon]"*
9815	regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) \|
9816	insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) \| insSSval(mul);
9817
9818	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
9819	{
9820	// Emit [ebp + {2/4/8} rgz] as [ebp + {2/4/8} * rgx + 0]*
9821	if (dspIsZero && reg != REG_EBP)
9822	{
9823	// The address is "[reg + {2/4/8} rgx]"*
9824	dst += emitOutputByte(dst, code \| `0x04`);
9825	dst += emitOutputByte(dst, regByte);
9826	}
9827	else
9828	{
9829	// The address is "[reg + {2/4/8} rgx + disp]"*
9830	if (dspInByte)
9831	{
9832	dst += emitOutputByte(dst, code \| `0x44`);
9833	dst += emitOutputByte(dst, regByte);
9834	dst += emitOutputByte(dst, dsp);
9835	}
9836	else
9837	{
9838	dst += emitOutputByte(dst, code \| `0x84`);
9839	dst += emitOutputByte(dst, regByte);
9840	dst += emitOutputLong(dst, dsp);
9841	if (id->idIsDspReloc())
9842	{
9843	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_HIGHLOW);
9844	}
9845	}
9846	}
9847	}
9848	else
9849	{
9850	// Emit [ebp + {2/4/8} rgz] as [ebp + {2/4/8} * rgx + 0]*
9851	if (dspIsZero && reg != REG_EBP)
9852	{
9853	// The address is "[reg + {2/4/8} rgx]"*
9854	dst += emitOutputWord(dst, code \| `0x0400`);
9855	dst += emitOutputByte(dst, regByte);
9856	}
9857	else
9858	{
9859	// The address is "[reg + {2/4/8} rgx + disp]"*
9860	if (dspInByte)
9861	{
9862	dst += emitOutputWord(dst, code \| `0x4400`);
9863	dst += emitOutputByte(dst, regByte);
9864	dst += emitOutputByte(dst, dsp);
9865	}
9866	else
9867	{
9868	dst += emitOutputWord(dst, code \| `0x8400`);
9869	dst += emitOutputByte(dst, regByte);
9870	dst += emitOutputLong(dst, dsp);
9871	if (id->idIsDspReloc())
9872	{
9873	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_HIGHLOW);
9874	}
9875	}
9876	}
9877	}
9878	}
9879	else
9880	{
9881	// The address is "[{2/4/8} rgx + icon]"*
9882	regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) \|
9883	insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) \| insSSval(mul);
9884
9885	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
9886	{
9887	dst += emitOutputByte(dst, code \| `0x04`);
9888	}
9889	else
9890	{
9891	dst += emitOutputWord(dst, code \| `0x0400`);
9892	}
9893
9894	dst += emitOutputByte(dst, regByte);
9895
9896	// Special case: jump through a jump table
9897	if (ins == INS_i_jmp)
9898	{
9899	dsp += (size_t)emitConsBlock;
9900	}
9901
9902	dst += emitOutputLong(dst, dsp);
9903	if (id->idIsDspReloc())
9904	{
9905	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_HIGHLOW);
9906	}
9907	}
9908	}
9909	else
9910	{
9911	// The address is "[reg+rgx+dsp]"
9912	regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) \| insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr);
9913
9914	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
9915	{
9916	if (dspIsZero && reg != REG_EBP)
9917	{
9918	// This is [reg+rgx]"
9919	dst += emitOutputByte(dst, code \| `0x04`);
9920	dst += emitOutputByte(dst, regByte);
9921	}
9922	else
9923	{
9924	// This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9925	if (dspInByte)
9926	{
9927	dst += emitOutputByte(dst, code \| `0x44`);
9928	dst += emitOutputByte(dst, regByte);
9929	dst += emitOutputByte(dst, dsp);
9930	}
9931	else
9932	{
9933	dst += emitOutputByte(dst, code \| `0x84`);
9934	dst += emitOutputByte(dst, regByte);
9935	dst += emitOutputLong(dst, dsp);
9936	if (id->idIsDspReloc())
9937	{
9938	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_HIGHLOW);
9939	}
9940	}
9941	}
9942	}
9943	else
9944	{
9945	if (dspIsZero && reg != REG_EBP)
9946	{
9947	// This is [reg+rgx]"
9948	dst += emitOutputWord(dst, code \| `0x0400`);
9949	dst += emitOutputByte(dst, regByte);
9950	}
9951	else
9952	{
9953	// This is [reg+rgx+dsp]" -- does the offset fit in a byte?
9954	if (dspInByte)
9955	{
9956	dst += emitOutputWord(dst, code \| `0x4400`);
9957	dst += emitOutputByte(dst, regByte);
9958	dst += emitOutputByte(dst, dsp);
9959	}
9960	else
9961	{
9962	dst += emitOutputWord(dst, code \| `0x8400`);
9963	dst += emitOutputByte(dst, regByte);
9964	dst += emitOutputLong(dst, dsp);
9965	if (id->idIsDspReloc())
9966	{
9967	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)dsp, IMAGE_REL_BASED_HIGHLOW);
9968	}
9969	}
9970	}
9971	}
9972	}
9973	}
9974
9975	// Now generate the constant value, if present
9976	if (addc)
9977	{
9978	ssize_t cval = addc->cnsVal;
9979
9980	#ifdef _TARGET_AMD64_
9981	// all these opcodes only take a sign-extended 4-byte immediate
9982	noway_assert(opsz < `8` \|\| ((int)cval == cval && !addc->cnsReloc));
9983	#endif
9984
9985	switch (opsz)
9986	{
9987	case `0`:
9988	case `4`:
9989	case `8`:
9990	dst += emitOutputLong(dst, cval);
9991	break;
9992	case `2`:
9993	dst += emitOutputWord(dst, cval);
9994	break;
9995	case `1`:
9996	dst += emitOutputByte(dst, cval);
9997	break;
9998
9999	default:
10000	assert(!"unexpected operand size");
10001	}
10002
10003	if (addc->cnsReloc)
10004	{
10005	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10006	assert(opsz == `4`);
10007	}
10008	}
10009
10010	DONE:
10011
10012	// Does this instruction operate on a GC ref value?
10013	if (id->idGCref())
10014	{
10015	switch (id->idInsFmt())
10016	{
10017	case IF_ARD:
10018	case IF_AWR:
10019	case IF_ARW:
10020	break;
10021
10022	case IF_RRD_ARD:
10023	break;
10024
10025	case IF_RWR_ARD:
10026	emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10027	break;
10028
10029	case IF_RRW_ARD:
10030	// Mark the destination register as holding a GCT_BYREF
10031	assert(id->idGCref() == GCT_BYREF && (ins == INS_add \|\| ins == INS_sub));
10032	emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10033	break;
10034
10035	case IF_ARD_RRD:
10036	case IF_AWR_RRD:
10037	break;
10038
10039	case IF_AWR_RRD_RRD:
10040	break;
10041
10042	case IF_ARD_CNS:
10043	case IF_AWR_CNS:
10044	break;
10045
10046	case IF_ARW_RRD:
10047	case IF_ARW_CNS:
10048	assert(id->idGCref() == GCT_BYREF && (ins == INS_add \|\| ins == INS_sub));
10049	break;
10050
10051	default:
10052	#ifdef DEBUG
10053	emitDispIns(id, false, false, false);
10054	#endif
10055	assert(!"unexpected GC ref instruction format");
10056	}
10057
10058	// mul can never produce a GC ref
10059	assert(!instrIs3opImul(ins));
10060	assert(ins != INS_mulEAX && ins != INS_imulEAX);
10061	}
10062	else
10063	{
10064	if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10065	{
10066	switch (id->idInsFmt())
10067	{
10068	case IF_RWR_ARD:
10069	case IF_RRW_ARD:
10070	case IF_RWR_RRD_ARD:
10071	emitGCregDeadUpd(id->idReg1(), dst);
10072	break;
10073	default:
10074	break;
10075	}
10076
10077	if (ins == INS_mulEAX \|\| ins == INS_imulEAX)
10078	{
10079	emitGCregDeadUpd(REG_EAX, dst);
10080	emitGCregDeadUpd(REG_EDX, dst);
10081	}
10082
10083	// For the three operand imul instruction the target register
10084	// is encoded in the opcode
10085
10086	if (instrIs3opImul(ins))
10087	{
10088	regNumber tgtReg = inst3opImulReg(ins);
10089	emitGCregDeadUpd(tgtReg, dst);
10090	}
10091	}
10092	}
10093
10094	return dst;
10095	}
10096
10097	/*****************************************************************************
10098	*
10099	* Output an instruction involving a stack frame value.
10100	*/
10101
10102	BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
10103	{
10104	int adr;
10105	int dsp;
10106	bool EBPbased;
10107	bool dspInByte;
10108	bool dspIsZero;
10109
10110	instruction ins = id->idIns();
10111	emitAttr size = id->idOpSize();
10112	size_t opsz = EA_SIZE_IN_BYTES(size);
10113
10114	assert(ins != INS_imul \|\| id->idReg1() == REG_EAX \|\| size == EA_4BYTE \|\| size == EA_8BYTE);
10115
10116	// Is there a large constant operand?
10117	if (addc && (size > EA_1BYTE))
10118	{
10119	ssize_t cval = addc->cnsVal;
10120
10121	// Does the constant fit in a byte?
10122	if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
10123	{
10124	if ((id->idInsFmt() != IF_SRW_SHF) && (id->idInsFmt() != IF_RRW_SRD_CNS) &&
10125	(id->idInsFmt() != IF_RWR_RRD_SRD_CNS))
10126	{
10127	code \|= `2`;
10128	}
10129
10130	opsz = `1`;
10131	}
10132	}
10133
10134	// Add VEX prefix if required.
10135	// There are some callers who already add VEX prefix and call this routine.
10136	// Therefore, add VEX prefix is one is not already present.
10137	code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
10138
10139	// Compute the REX prefix
10140	if (TakesRexWPrefix(ins, size))
10141	{
10142	code = AddRexWPrefix(ins, code);
10143	}
10144
10145	// Special case emitting AVX instructions
10146	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
10147	{
10148	if ((ins == INS_crc32) && (size > EA_1BYTE))
10149	{
10150	code \|= `0x0100`;
10151
10152	if (size == EA_2BYTE)
10153	{
10154	dst += emitOutputByte(dst, `0x66`);
10155	}
10156	}
10157
10158	regNumber reg345 = REG_NA;
10159	if (IsBMIInstruction(ins))
10160	{
10161	reg345 = getBmiRegNumber(ins);
10162	}
10163	if (reg345 == REG_NA)
10164	{
10165	reg345 = id->idReg1();
10166	}
10167	else
10168	{
10169	code = insEncodeReg3456(ins, id->idReg1(), size, code);
10170	}
10171	unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
10172
10173	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10174
10175	if (UseVEXEncoding() && (ins != INS_crc32))
10176	{
10177	// Emit last opcode byte
10178	// TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10179	assert((code & `0xFF`) == `0`);
10180	dst += emitOutputByte(dst, (code >> `8`) & `0xFF`);
10181	}
10182	else
10183	{
10184	dst += emitOutputWord(dst, code >> `16`);
10185	dst += emitOutputWord(dst, code & `0xFFFF`);
10186	}
10187
10188	code = regcode;
10189	}
10190	// Is this a 'big' opcode?
10191	else if (code & `0xFF000000`)
10192	{
10193	// Output the REX prefix
10194	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10195
10196	// Output the highest word of the opcode
10197	// We need to check again because in case of AVX instructions the leading
10198	// escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
10199	if (code & `0xFF000000`)
10200	{
10201	dst += emitOutputWord(dst, code >> `16`);
10202	code &= `0x0000FFFF`;
10203	}
10204	}
10205	else if (code & `0x00FF0000`)
10206	{
10207	// BT supports 16 bit operands and this code doesn't add the necessary 66 prefix.
10208	assert(ins != INS_bt);
10209
10210	// Output the REX prefix
10211	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10212
10213	// Output the highest byte of the opcode.
10214	// We need to check again because in case of AVX instructions the leading
10215	// escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
10216	if (code & `0x00FF0000`)
10217	{
10218	dst += emitOutputByte(dst, code >> `16`);
10219	code &= `0x0000FFFF`;
10220	}
10221
10222	// Use the large version if this is not a byte
10223	if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSEInstruction(ins) &&
10224	!IsAVXInstruction(ins))
10225	{
10226	code \|= `0x1`;
10227	}
10228	}
10229	else if (CodeGen::instIsFP(ins))
10230	{
10231	assert(size == EA_4BYTE \|\| size == EA_8BYTE);
10232
10233	if (size == EA_8BYTE)
10234	{
10235	code += `4`;
10236	}
10237	}
10238	else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins))
10239	{
10240	// Is the operand size larger than a byte?
10241	switch (size)
10242	{
10243	case EA_1BYTE:
10244	break;
10245
10246	case EA_2BYTE:
10247	// Output a size prefix for a 16-bit operand
10248	dst += emitOutputByte(dst, `0x66`);
10249	__fallthrough;
10250
10251	case EA_4BYTE:
10252	#ifdef _TARGET_AMD64_
10253	case EA_8BYTE:
10254	#endif // _TARGET_AMD64_
10255
10256	/ Set the 'w' size bit to indicate 32-bit operation*
10257	* Note that incrementing "code" for INS_call (0xFF) would
10258	* overflow, whereas setting the lower bit to 1 just works out
10259	*/
10260
10261	code \|= `0x01`;
10262	break;
10263
10264	#ifdef _TARGET_X86_
10265	case EA_8BYTE:
10266
10267	// Double operand - set the appropriate bit.
10268	// I don't know what a legitimate reason to end up in this case would be
10269	// considering that FP is taken care of above...
10270	// what is an instruction that takes a double which is not covered by the
10271	// above instIsFP? Of the list in instrsxarch, only INS_fprem
10272	code \|= `0x04`;
10273	NO_WAY("bad 8 byte op");
10274	break;
10275	#endif // _TARGET_X86_
10276
10277	default:
10278	NO_WAY("unexpected size");
10279	break;
10280	}
10281	}
10282
10283	// Output the REX prefix
10284	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10285
10286	// Figure out the variable's frame position
10287	int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
10288
10289	adr = emitComp->lvaFrameAddress(varNum, &EBPbased);
10290	dsp = adr + id->idAddr()->iiaLclVar.lvaOffset();
10291
10292	dspInByte = ((signed char)dsp == (int)dsp);
10293	dspIsZero = (dsp == `0`);
10294
10295	// for stack varaibles the dsp should never be a reloc
10296	assert(id->idIsDspReloc() == `0`);
10297
10298	if (EBPbased)
10299	{
10300	// EBP-based variable: does the offset fit in a byte?
10301	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
10302	{
10303	if (dspInByte)
10304	{
10305	dst += emitOutputByte(dst, code \| `0x45`);
10306	dst += emitOutputByte(dst, dsp);
10307	}
10308	else
10309	{
10310	dst += emitOutputByte(dst, code \| `0x85`);
10311	dst += emitOutputLong(dst, dsp);
10312	}
10313	}
10314	else
10315	{
10316	if (dspInByte)
10317	{
10318	dst += emitOutputWord(dst, code \| `0x4500`);
10319	dst += emitOutputByte(dst, dsp);
10320	}
10321	else
10322	{
10323	dst += emitOutputWord(dst, code \| `0x8500`);
10324	dst += emitOutputLong(dst, dsp);
10325	}
10326	}
10327	}
10328	else
10329	{
10330
10331	#if !FEATURE_FIXED_OUT_ARGS
10332	// Adjust the offset by the amount currently pushed on the CPU stack
10333	dsp += emitCurStackLvl;
10334	#endif
10335
10336	dspInByte = ((signed char)dsp == (int)dsp);
10337	dspIsZero = (dsp == `0`);
10338
10339	// Does the offset fit in a byte?
10340	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
10341	{
10342	if (dspInByte)
10343	{
10344	if (dspIsZero)
10345	{
10346	dst += emitOutputByte(dst, code \| `0x04`);
10347	dst += emitOutputByte(dst, `0x24`);
10348	}
10349	else
10350	{
10351	dst += emitOutputByte(dst, code \| `0x44`);
10352	dst += emitOutputByte(dst, `0x24`);
10353	dst += emitOutputByte(dst, dsp);
10354	}
10355	}
10356	else
10357	{
10358	dst += emitOutputByte(dst, code \| `0x84`);
10359	dst += emitOutputByte(dst, `0x24`);
10360	dst += emitOutputLong(dst, dsp);
10361	}
10362	}
10363	else
10364	{
10365	if (dspInByte)
10366	{
10367	if (dspIsZero)
10368	{
10369	dst += emitOutputWord(dst, code \| `0x0400`);
10370	dst += emitOutputByte(dst, `0x24`);
10371	}
10372	else
10373	{
10374	dst += emitOutputWord(dst, code \| `0x4400`);
10375	dst += emitOutputByte(dst, `0x24`);
10376	dst += emitOutputByte(dst, dsp);
10377	}
10378	}
10379	else
10380	{
10381	dst += emitOutputWord(dst, code \| `0x8400`);
10382	dst += emitOutputByte(dst, `0x24`);
10383	dst += emitOutputLong(dst, dsp);
10384	}
10385	}
10386	}
10387
10388	// Now generate the constant value, if present
10389	if (addc)
10390	{
10391	ssize_t cval = addc->cnsVal;
10392
10393	#ifdef _TARGET_AMD64_
10394	// all these opcodes only take a sign-extended 4-byte immediate
10395	noway_assert(opsz < `8` \|\| ((int)cval == cval && !addc->cnsReloc));
10396	#endif
10397
10398	switch (opsz)
10399	{
10400	case `0`:
10401	case `4`:
10402	case `8`:
10403	dst += emitOutputLong(dst, cval);
10404	break;
10405	case `2`:
10406	dst += emitOutputWord(dst, cval);
10407	break;
10408	case `1`:
10409	dst += emitOutputByte(dst, cval);
10410	break;
10411
10412	default:
10413	assert(!"unexpected operand size");
10414	}
10415
10416	if (addc->cnsReloc)
10417	{
10418	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10419	assert(opsz == `4`);
10420	}
10421	}
10422
10423	// Does this instruction operate on a GC ref value?
10424	if (id->idGCref())
10425	{
10426	// Factor in the sub-variable offset
10427	adr += AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
10428
10429	switch (id->idInsFmt())
10430	{
10431	case IF_SRD:
10432	// Read stack -- no change
10433	break;
10434
10435	case IF_SWR: // Stack Write (So we need to update GC live for stack var)
10436	// Write stack -- GC var may be born
10437	emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
10438	break;
10439
10440	case IF_SRD_CNS:
10441	// Read stack -- no change
10442	break;
10443
10444	case IF_SWR_CNS:
10445	// Write stack -- no change
10446	break;
10447
10448	case IF_SRD_RRD:
10449	case IF_RRD_SRD:
10450	// Read stack , read register -- no change
10451	break;
10452
10453	case IF_RWR_SRD: // Register Write, Stack Read (So we need to update GC live for register)
10454
10455	// Read stack , write register -- GC reg may be born
10456	emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10457	break;
10458
10459	case IF_SWR_RRD: // Stack Write, Register Read (So we need to update GC live for stack var)
10460	// Read register, write stack -- GC var may be born
10461	emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
10462	break;
10463
10464	case IF_RRW_SRD: // Register Read/Write, Stack Read (So we need to update GC live for register)
10465
10466	// reg could have been a GCREF as GCREF + int=BYREF
10467	// or BYREF+/-int=BYREF
10468	assert(id->idGCref() == GCT_BYREF && (ins == INS_add \|\| ins == INS_sub));
10469	emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10470	break;
10471
10472	case IF_SRW_CNS:
10473	case IF_SRW_RRD:
10474	// += -= of a byref, no change
10475
10476	case IF_SRW:
10477	break;
10478
10479	default:
10480	#ifdef DEBUG
10481	emitDispIns(id, false, false, false);
10482	#endif
10483	assert(!"unexpected GC ref instruction format");
10484	}
10485	}
10486	else
10487	{
10488	if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10489	{
10490	switch (id->idInsFmt())
10491	{
10492	case IF_RWR_SRD: // Register Write, Stack Read
10493	case IF_RRW_SRD: // Register Read/Write, Stack Read
10494	case IF_RWR_RRD_SRD:
10495	emitGCregDeadUpd(id->idReg1(), dst);
10496	break;
10497	default:
10498	break;
10499	}
10500
10501	if (ins == INS_mulEAX \|\| ins == INS_imulEAX)
10502	{
10503	emitGCregDeadUpd(REG_EAX, dst);
10504	emitGCregDeadUpd(REG_EDX, dst);
10505	}
10506
10507	// For the three operand imul instruction the target register
10508	// is encoded in the opcode
10509
10510	if (instrIs3opImul(ins))
10511	{
10512	regNumber tgtReg = inst3opImulReg(ins);
10513	emitGCregDeadUpd(tgtReg, dst);
10514	}
10515	}
10516	}
10517
10518	return dst;
10519	}
10520
10521	/*****************************************************************************
10522	*
10523	* Output an instruction with a static data member (class variable).
10524	*/
10525
10526	BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
10527	{
10528	BYTE* addr;
10529	CORINFO_FIELD_HANDLE fldh;
10530	ssize_t offs;
10531	int doff;
10532
10533	emitAttr size = id->idOpSize();
10534	size_t opsz = EA_SIZE_IN_BYTES(size);
10535	instruction ins = id->idIns();
10536	bool isMoffset = false;
10537
10538	// Get hold of the field handle and offset
10539	fldh = id->idAddr()->iiaFieldHnd;
10540	offs = emitGetInsDsp(id);
10541
10542	// Special case: mov reg, fs:[ddd]
10543	if (fldh == FLD_GLOBAL_FS)
10544	{
10545	dst += emitOutputByte(dst, `0x64`);
10546	}
10547
10548	// Compute VEX prefix
10549	// Some of its callers already add VEX prefix and then call this routine.
10550	// Therefore add VEX prefix is not already present.
10551	code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
10552
10553	// Compute the REX prefix
10554	if (TakesRexWPrefix(ins, size))
10555	{
10556	code = AddRexWPrefix(ins, code);
10557	}
10558
10559	// Is there a large constant operand?
10560	if (addc && (size > EA_1BYTE))
10561	{
10562	ssize_t cval = addc->cnsVal;
10563	// Does the constant fit in a byte?
10564	if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
10565	{
10566	if (id->idInsFmt() != IF_MRW_SHF)
10567	{
10568	code \|= `2`;
10569	}
10570
10571	opsz = `1`;
10572	}
10573	}
10574	#ifdef _TARGET_X86_
10575	else
10576	{
10577	// Special case: "mov eax, [addr]" and "mov [addr], eax"
10578	// Amd64: this is one case where addr can be 64-bit in size. This is
10579	// currently unused or not enabled on amd64 as it always uses RIP
10580	// relative addressing which results in smaller instruction size.
10581	if (ins == INS_mov && id->idReg1() == REG_EAX)
10582	{
10583	switch (id->idInsFmt())
10584	{
10585	case IF_RWR_MRD:
10586
10587	assert(code == (insCodeRM(ins) \| (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << `8`) \| `0x0500`));
10588
10589	code &= ~((code_t)`0xFFFFFFFF`);
10590	code \|= `0xA0`;
10591	isMoffset = true;
10592	break;
10593
10594	case IF_MWR_RRD:
10595
10596	assert(code == (insCodeMR(ins) \| (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << `8`) \| `0x0500`));
10597
10598	code &= ~((code_t)`0xFFFFFFFF`);
10599	code \|= `0xA2`;
10600	isMoffset = true;
10601	break;
10602
10603	default:
10604	break;
10605	}
10606	}
10607	}
10608	#endif //_TARGET_X86_
10609
10610	// Special case emitting AVX instructions
10611	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
10612	{
10613	if ((ins == INS_crc32) && (size > EA_1BYTE))
10614	{
10615	code \|= `0x0100`;
10616
10617	if (size == EA_2BYTE)
10618	{
10619	dst += emitOutputByte(dst, `0x66`);
10620	}
10621	}
10622
10623	regNumber reg345 = REG_NA;
10624	if (IsBMIInstruction(ins))
10625	{
10626	reg345 = getBmiRegNumber(ins);
10627	}
10628	if (reg345 == REG_NA)
10629	{
10630	reg345 = id->idReg1();
10631	}
10632	else
10633	{
10634	code = insEncodeReg3456(ins, id->idReg1(), size, code);
10635	}
10636	unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
10637
10638	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10639
10640	if (UseVEXEncoding() && (ins != INS_crc32))
10641	{
10642	// Emit last opcode byte
10643	// TODO-XArch-CQ: Right now support 4-byte opcode instructions only
10644	assert((code & `0xFF`) == `0`);
10645	dst += emitOutputByte(dst, (code >> `8`) & `0xFF`);
10646	}
10647	else
10648	{
10649	dst += emitOutputWord(dst, code >> `16`);
10650	dst += emitOutputWord(dst, code & `0xFFFF`);
10651	}
10652
10653	// Emit Mod,R/M byte
10654	dst += emitOutputByte(dst, regcode \| `0x05`);
10655	code = `0`;
10656	}
10657	// Is this a 'big' opcode?
10658	else if (code & `0xFF000000`)
10659	{
10660	// Output the REX prefix
10661	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10662
10663	// Output the highest word of the opcode.
10664	// Check again since AVX instructions encode leading opcode bytes as part of VEX prefix.
10665	if (code & `0xFF000000`)
10666	{
10667	dst += emitOutputWord(dst, code >> `16`);
10668	}
10669	code &= `0x0000FFFF`;
10670	}
10671	else if (code & `0x00FF0000`)
10672	{
10673	// Output the REX prefix
10674	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10675
10676	// Check again as VEX prefix would have encoded leading opcode byte
10677	if (code & `0x00FF0000`)
10678	{
10679	dst += emitOutputByte(dst, code >> `16`);
10680	code &= `0x0000FFFF`;
10681	}
10682
10683	if ((ins == INS_movsx \|\| ins == INS_movzx \|\| ins == INS_cmpxchg \|\| ins == INS_xchg \|\| ins == INS_xadd \|\|
10684	insIsCMOV(ins)) &&
10685	size != EA_1BYTE)
10686	{
10687	// movsx and movzx are 'big' opcodes but also have the 'w' bit
10688	code++;
10689	}
10690	}
10691	else if (CodeGen::instIsFP(ins))
10692	{
10693	assert(size == EA_4BYTE \|\| size == EA_8BYTE);
10694
10695	if (size == EA_8BYTE)
10696	{
10697	code += `4`;
10698	}
10699	}
10700	else
10701	{
10702	// Is the operand size larger than a byte?
10703	switch (size)
10704	{
10705	case EA_1BYTE:
10706	break;
10707
10708	case EA_2BYTE:
10709	// Output a size prefix for a 16-bit operand
10710	dst += emitOutputByte(dst, `0x66`);
10711	__fallthrough;
10712
10713	case EA_4BYTE:
10714	#ifdef _TARGET_AMD64_
10715	case EA_8BYTE:
10716	#endif
10717	// Set the 'w' bit to get the large version
10718	code \|= `0x1`;
10719	break;
10720
10721	#ifdef _TARGET_X86_
10722	case EA_8BYTE:
10723	// Double operand - set the appropriate bit
10724	code \|= `0x04`;
10725	break;
10726	#endif // _TARGET_X86_
10727
10728	default:
10729	assert(!"unexpected size");
10730	}
10731	}
10732
10733	// Output the REX prefix
10734	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
10735
10736	if (code)
10737	{
10738	if (id->idInsFmt() == IF_MRD_OFF \|\| id->idInsFmt() == IF_RWR_MRD_OFF \|\| isMoffset)
10739	{
10740	dst += emitOutputByte(dst, code);
10741	}
10742	else
10743	{
10744	dst += emitOutputWord(dst, code);
10745	}
10746	}
10747
10748	// Do we have a constant or a static data member?
10749	doff = Compiler::eeGetJitDataOffs(fldh);
10750	if (doff >= `0`)
10751	{
10752	addr = emitConsBlock + doff;
10753
10754	int byteSize = EA_SIZE_IN_BYTES(size);
10755
10756	// this instruction has a fixed size (4) src.
10757	if (ins == INS_cvttss2si \|\| ins == INS_cvtss2sd \|\| ins == INS_vbroadcastss)
10758	{
10759	byteSize = `4`;
10760	}
10761	// This has a fixed size (8) source.
10762	if (ins == INS_vbroadcastsd)
10763	{
10764	byteSize = `8`;
10765	}
10766
10767	// Check that the offset is properly aligned (i.e. the ddd in [ddd])
10768	assert((emitChkAlign == false) \|\| (ins == INS_lea) \|\| (((size_t)addr & (byteSize - `1`)) == `0`));
10769	}
10770	else
10771	{
10772	// Special case: mov reg, fs:[ddd] or mov reg, [ddd]
10773	if (jitStaticFldIsGlobAddr(fldh))
10774	{
10775	addr = nullptr;
10776	}
10777	else
10778	{
10779	addr = (BYTE)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr*);
10780	if (addr == nullptr)
10781	{
10782	NO_WAY("could not obtain address of static field");
10783	}
10784	}
10785	}
10786
10787	BYTE* target = (addr + offs);
10788
10789	if (!isMoffset)
10790	{
10791	INT32 addlDelta = `0`;
10792
10793	if (addc)
10794	{
10795	// It is of the form "ins [disp], immed"
10796	// For emitting relocation, we also need to take into account of the
10797	// additional bytes of code emitted for immed val.
10798
10799	ssize_t cval = addc->cnsVal;
10800
10801	#ifdef _TARGET_AMD64_
10802	// all these opcodes only take a sign-extended 4-byte immediate
10803	noway_assert(opsz < `8` \|\| ((int)cval == cval && !addc->cnsReloc));
10804	#else //_TARGET_X86_
10805	noway_assert(opsz <= `4`);
10806	#endif //_TARGET_X86_
10807
10808	switch (opsz)
10809	{
10810	case `0`:
10811	case `4`:
10812	case `8`:
10813	addlDelta = -`4`;
10814	break;
10815	case `2`:
10816	addlDelta = -`2`;
10817	break;
10818	case `1`:
10819	addlDelta = -`1`;
10820	break;
10821
10822	default:
10823	assert(!"unexpected operand size");
10824	unreached();
10825	}
10826	}
10827
10828	#ifdef _TARGET_AMD64_
10829	// All static field and data section constant accesses should be marked as relocatable
10830	noway_assert(id->idIsDspReloc());
10831	dst += emitOutputLong(dst, `0`);
10832	#else //_TARGET_X86_
10833	dst += emitOutputLong(dst, (int)target);
10834	#endif //_TARGET_X86_
10835
10836	if (id->idIsDspReloc())
10837	{
10838	emitRecordRelocation((void)(dst - sizeof(int*)), target, IMAGE_REL_BASED_DISP32, `0`, addlDelta);
10839	}
10840	}
10841	else
10842	{
10843	#ifdef _TARGET_AMD64_
10844	// This code path should never be hit on amd64 since it always uses RIP relative addressing.
10845	// In future if ever there is a need to enable this special case, also enable the logic
10846	// that sets isMoffset to true on amd64.
10847	unreached();
10848	#else //_TARGET_X86_
10849
10850	dst += emitOutputSizeT(dst, (ssize_t)target);
10851
10852	if (id->idIsDspReloc())
10853	{
10854	emitRecordRelocation((void*)(dst - TARGET_POINTER_SIZE), target, IMAGE_REL_BASED_MOFFSET);
10855	}
10856
10857	#endif //_TARGET_X86_
10858	}
10859
10860	// Now generate the constant value, if present
10861	if (addc)
10862	{
10863	ssize_t cval = addc->cnsVal;
10864
10865	#ifdef _TARGET_AMD64_
10866	// all these opcodes only take a sign-extended 4-byte immediate
10867	noway_assert(opsz < `8` \|\| ((int)cval == cval && !addc->cnsReloc));
10868	#endif
10869
10870	switch (opsz)
10871	{
10872	case `0`:
10873	case `4`:
10874	case `8`:
10875	dst += emitOutputLong(dst, cval);
10876	break;
10877	case `2`:
10878	dst += emitOutputWord(dst, cval);
10879	break;
10880	case `1`:
10881	dst += emitOutputByte(dst, cval);
10882	break;
10883
10884	default:
10885	assert(!"unexpected operand size");
10886	}
10887	if (addc->cnsReloc)
10888	{
10889	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
10890	assert(opsz == `4`);
10891	}
10892	}
10893
10894	// Does this instruction operate on a GC ref value?
10895	if (id->idGCref())
10896	{
10897	switch (id->idInsFmt())
10898	{
10899	case IF_MRD:
10900	case IF_MRW:
10901	case IF_MWR:
10902	break;
10903
10904	case IF_RRD_MRD:
10905	break;
10906
10907	case IF_RWR_MRD:
10908	emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
10909	break;
10910
10911	case IF_MRD_RRD:
10912	case IF_MWR_RRD:
10913	case IF_MRW_RRD:
10914	break;
10915
10916	case IF_MRD_CNS:
10917	case IF_MWR_CNS:
10918	case IF_MRW_CNS:
10919	break;
10920
10921	case IF_RRW_MRD:
10922
10923	assert(id->idGCref() == GCT_BYREF);
10924	assert(ins == INS_add \|\| ins == INS_sub);
10925
10926	// Mark it as holding a GCT_BYREF
10927	emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
10928	break;
10929
10930	default:
10931	#ifdef DEBUG
10932	emitDispIns(id, false, false, false);
10933	#endif
10934	assert(!"unexpected GC ref instruction format");
10935	}
10936	}
10937	else
10938	{
10939	if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
10940	{
10941	switch (id->idInsFmt())
10942	{
10943	case IF_RWR_MRD:
10944	case IF_RRW_MRD:
10945	case IF_RWR_RRD_MRD:
10946	emitGCregDeadUpd(id->idReg1(), dst);
10947	break;
10948	default:
10949	break;
10950	}
10951
10952	if (ins == INS_mulEAX \|\| ins == INS_imulEAX)
10953	{
10954	emitGCregDeadUpd(REG_EAX, dst);
10955	emitGCregDeadUpd(REG_EDX, dst);
10956	}
10957
10958	// For the three operand imul instruction the target register
10959	// is encoded in the opcode
10960
10961	if (instrIs3opImul(ins))
10962	{
10963	regNumber tgtReg = inst3opImulReg(ins);
10964	emitGCregDeadUpd(tgtReg, dst);
10965	}
10966	}
10967	}
10968
10969	return dst;
10970	}
10971
10972	/*****************************************************************************
10973	*
10974	* Output an instruction with one register operand.
10975	*/
10976
10977	BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
10978	{
10979	code_t code;
10980
10981	instruction ins = id->idIns();
10982	regNumber reg = id->idReg1();
10983	emitAttr size = id->idOpSize();
10984
10985	// We would to update GC info correctly
10986	assert(!IsSSEInstruction(ins));
10987	assert(!IsAVXInstruction(ins));
10988
10989	// Get the 'base' opcode
10990	switch (ins)
10991	{
10992	case INS_inc:
10993	case INS_dec:
10994
10995	#ifdef _TARGET_AMD64_
10996	if (true)
10997	#else
10998	if (size == EA_1BYTE)
10999	#endif
11000	{
11001	assert(INS_inc_l == INS_inc + `1`);
11002	assert(INS_dec_l == INS_dec + `1`);
11003
11004	// Can't use the compact form, use the long form
11005	ins = (instruction)(ins + `1`);
11006	if (size == EA_2BYTE)
11007	{
11008	// Output a size prefix for a 16-bit operand
11009	dst += emitOutputByte(dst, `0x66`);
11010	}
11011
11012	code = insCodeRR(ins);
11013	if (size != EA_1BYTE)
11014	{
11015	// Set the 'w' bit to get the large version
11016	code \|= `0x1`;
11017	}
11018
11019	if (TakesRexWPrefix(ins, size))
11020	{
11021	code = AddRexWPrefix(ins, code);
11022	}
11023
11024	// Register...
11025	unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11026
11027	// Output the REX prefix
11028	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11029
11030	dst += emitOutputWord(dst, code \| (regcode << `8`));
11031	}
11032	else
11033	{
11034	if (size == EA_2BYTE)
11035	{
11036	// Output a size prefix for a 16-bit operand
11037	dst += emitOutputByte(dst, `0x66`);
11038	}
11039	dst += emitOutputByte(dst, insCodeRR(ins) \| insEncodeReg012(ins, reg, size, nullptr));
11040	}
11041	break;
11042
11043	case INS_pop:
11044	case INS_pop_hide:
11045	case INS_push:
11046	case INS_push_hide:
11047
11048	assert(size == EA_PTRSIZE);
11049	code = insEncodeOpreg(ins, reg, size);
11050
11051	assert(!TakesVexPrefix(ins));
11052	assert(!TakesRexWPrefix(ins, size));
11053
11054	// Output the REX prefix
11055	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11056
11057	dst += emitOutputByte(dst, code);
11058	break;
11059
11060	case INS_bswap:
11061	{
11062	assert(size >= EA_4BYTE && size <= EA_PTRSIZE); // 16-bit BSWAP is undefined
11063
11064	// The Intel instruction set reference for BSWAP states that extended registers
11065	// should be enabled via REX.R, but per Vol. 2A, Sec. 2.2.1.2 (see also Figure 2-7),
11066	// REX.B should instead be used if the register is encoded in the opcode byte itself.
11067	// Therefore the default logic of insEncodeReg012 is correct for this case.
11068
11069	code = insCodeRR(ins);
11070
11071	if (TakesRexWPrefix(ins, size))
11072	{
11073	code = AddRexWPrefix(ins, code);
11074	}
11075
11076	// Register...
11077	unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11078
11079	// Output the REX prefix
11080	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11081
11082	dst += emitOutputWord(dst, code \| (regcode << `8`));
11083	break;
11084	}
11085
11086	case INS_seto:
11087	case INS_setno:
11088	case INS_setb:
11089	case INS_setae:
11090	case INS_sete:
11091	case INS_setne:
11092	case INS_setbe:
11093	case INS_seta:
11094	case INS_sets:
11095	case INS_setns:
11096	case INS_setpe:
11097	case INS_setpo:
11098	case INS_setl:
11099	case INS_setge:
11100	case INS_setle:
11101	case INS_setg:
11102
11103	assert(id->idGCref() == GCT_NONE);
11104	assert(size == EA_1BYTE);
11105
11106	code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins));
11107
11108	// Output the REX prefix
11109	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11110
11111	// We expect this to always be a 'big' opcode
11112	assert(code & `0x00FF0000`);
11113
11114	dst += emitOutputByte(dst, code >> `16`);
11115	dst += emitOutputWord(dst, code & `0x0000FFFF`);
11116
11117	break;
11118
11119	case INS_mulEAX:
11120	case INS_imulEAX:
11121
11122	// Kill off any GC refs in EAX or EDX
11123	emitGCregDeadUpd(REG_EAX, dst);
11124	emitGCregDeadUpd(REG_EDX, dst);
11125
11126	__fallthrough;
11127
11128	default:
11129
11130	assert(id->idGCref() == GCT_NONE);
11131
11132	code = insEncodeMRreg(ins, reg, size, insCodeMR(ins));
11133
11134	if (size != EA_1BYTE)
11135	{
11136	// Set the 'w' bit to get the large version
11137	code \|= `0x1`;
11138
11139	if (size == EA_2BYTE)
11140	{
11141	// Output a size prefix for a 16-bit operand
11142	dst += emitOutputByte(dst, `0x66`);
11143	}
11144	}
11145
11146	code = AddVexPrefixIfNeeded(ins, code, size);
11147
11148	if (TakesRexWPrefix(ins, size))
11149	{
11150	code = AddRexWPrefix(ins, code);
11151	}
11152
11153	// Output the REX prefix
11154	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11155
11156	dst += emitOutputWord(dst, code);
11157	break;
11158	}
11159
11160	// Are we writing the register? if so then update the GC information
11161	switch (id->idInsFmt())
11162	{
11163	case IF_RRD:
11164	break;
11165	case IF_RWR:
11166	if (id->idGCref())
11167	{
11168	emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11169	}
11170	else
11171	{
11172	emitGCregDeadUpd(id->idReg1(), dst);
11173	}
11174	break;
11175	case IF_RRW:
11176	{
11177	#ifdef DEBUG
11178	regMaskTP regMask = genRegMask(reg);
11179	#endif
11180	if (id->idGCref())
11181	{
11182	// The reg must currently be holding either a gcref or a byref
11183	// and the instruction must be inc or dec
11184	assert(((emitThisGCrefRegs \| emitThisByrefRegs) & regMask) &&
11185	(ins == INS_inc \|\| ins == INS_dec \|\| ins == INS_inc_l \|\| ins == INS_dec_l));
11186	assert(id->idGCref() == GCT_BYREF);
11187	// Mark it as holding a GCT_BYREF
11188	emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11189	}
11190	else
11191	{
11192	// Can't use RRW to trash a GC ref. It's OK for unverifiable code
11193	// to trash Byrefs.
11194	assert((emitThisGCrefRegs & regMask) == `0`);
11195	}
11196	}
11197	break;
11198	default:
11199	#ifdef DEBUG
11200	emitDispIns(id, false, false, false);
11201	#endif
11202	assert(!"unexpected instruction format");
11203	break;
11204	}
11205
11206	return dst;
11207	}
11208
11209	/*****************************************************************************
11210	*
11211	* Output an instruction with two register operands.
11212	*/
11213
11214	BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
11215	{
11216	code_t code;
11217
11218	instruction ins = id->idIns();
11219	regNumber reg1 = id->idReg1();
11220	regNumber reg2 = id->idReg2();
11221	emitAttr size = id->idOpSize();
11222
11223	// Get the 'base' opcode
11224	code = insCodeRM(ins);
11225	code = AddVexPrefixIfNeeded(ins, code, size);
11226	if (IsSSEOrAVXInstruction(ins))
11227	{
11228	code = insEncodeRMreg(ins, code);
11229
11230	if (TakesRexWPrefix(ins, size))
11231	{
11232	code = AddRexWPrefix(ins, code);
11233	}
11234	}
11235	else if ((ins == INS_movsx) \|\| (ins == INS_movzx) \|\| (insIsCMOV(ins)))
11236	{
11237	code = insEncodeRMreg(ins, code) \| (int)(size == EA_2BYTE);
11238	#ifdef _TARGET_AMD64_
11239
11240	assert((size < EA_4BYTE) \|\| (insIsCMOV(ins)));
11241	if ((size == EA_8BYTE) \|\| (ins == INS_movsx))
11242	{
11243	code = AddRexWPrefix(ins, code);
11244	}
11245	}
11246	else if (ins == INS_movsxd)
11247	{
11248	code = insEncodeRMreg(ins, code);
11249
11250	#endif // _TARGET_AMD64_
11251	}
11252	#ifdef FEATURE_HW_INTRINSICS
11253	else if ((ins == INS_crc32) \|\| (ins == INS_lzcnt) \|\| (ins == INS_popcnt) \|\| (ins == INS_tzcnt))
11254	{
11255	code = insEncodeRMreg(ins, code);
11256	if ((ins == INS_crc32) && (size > EA_1BYTE))
11257	{
11258	code \|= `0x0100`;
11259	}
11260
11261	if (size == EA_2BYTE)
11262	{
11263	assert(ins == INS_crc32);
11264	dst += emitOutputByte(dst, `0x66`);
11265	}
11266	else if (size == EA_8BYTE)
11267	{
11268	code = AddRexWPrefix(ins, code);
11269	}
11270	}
11271	#endif // FEATURE_HW_INTRINSICS
11272	else
11273	{
11274	code = insEncodeMRreg(ins, insCodeMR(ins));
11275
11276	if (ins != INS_test)
11277	{
11278	code \|= `2`;
11279	}
11280
11281	switch (size)
11282	{
11283	case EA_1BYTE:
11284	noway_assert(RBM_BYTE_REGS & genRegMask(reg1));
11285	noway_assert(RBM_BYTE_REGS & genRegMask(reg2));
11286	break;
11287
11288	case EA_2BYTE:
11289	// Output a size prefix for a 16-bit operand
11290	dst += emitOutputByte(dst, `0x66`);
11291	__fallthrough;
11292
11293	case EA_4BYTE:
11294	// Set the 'w' bit to get the large version
11295	code \|= `0x1`;
11296	break;
11297
11298	#ifdef _TARGET_AMD64_
11299	case EA_8BYTE:
11300	// TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it
11301	// Don't need to zero out the high bits explicitly
11302	if ((ins != INS_xor) \|\| (reg1 != reg2))
11303	{
11304	code = AddRexWPrefix(ins, code);
11305	}
11306
11307	// Set the 'w' bit to get the large version
11308	code \|= `0x1`;
11309	break;
11310
11311	#endif // _TARGET_AMD64_
11312
11313	default:
11314	assert(!"unexpected size");
11315	}
11316	}
11317
11318	regNumber reg345 = REG_NA;
11319	if (IsBMIInstruction(ins))
11320	{
11321	reg345 = getBmiRegNumber(ins);
11322	}
11323	if (reg345 == REG_NA)
11324	{
11325	reg345 = id->idReg1();
11326	}
11327	unsigned regCode = insEncodeReg345(ins, reg345, size, &code);
11328	regCode \|= insEncodeReg012(ins, reg2, size, &code);
11329
11330	if (TakesVexPrefix(ins))
11331	{
11332	// In case of AVX instructions that take 3 operands, we generally want to encode reg1
11333	// as first source. In this case, reg1 is both a source and a destination.
11334	// The exception is the "merge" 3-operand case, where we have a move instruction, such
11335	// as movss, and we want to merge the source with itself.
11336	//
11337	// TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
11338	// now we use the single source as source1 and source2.
11339	if (IsDstDstSrcAVXInstruction(ins))
11340	{
11341	// encode source/dest operand reg in 'vvvv' bits in 1's complement form
11342	code = insEncodeReg3456(ins, reg1, size, code);
11343	}
11344	else if (IsDstSrcSrcAVXInstruction(ins))
11345	{
11346	// encode source operand reg in 'vvvv' bits in 1's complement form
11347	code = insEncodeReg3456(ins, reg2, size, code);
11348	}
11349	}
11350
11351	// Output the REX prefix
11352	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11353
11354	if (code & `0xFF000000`)
11355	{
11356	// Output the highest word of the opcode
11357	dst += emitOutputWord(dst, code >> `16`);
11358	code &= `0x0000FFFF`;
11359
11360	if (Is4ByteSSEInstruction(ins))
11361	{
11362	// Output 3rd byte of the opcode
11363	dst += emitOutputByte(dst, code);
11364	code &= `0xFF00`;
11365	}
11366	}
11367	else if (code & `0x00FF0000`)
11368	{
11369	dst += emitOutputByte(dst, code >> `16`);
11370	code &= `0x0000FFFF`;
11371	}
11372
11373	// TODO-XArch-CQ: Right now support 4-byte opcode instructions only
11374	if ((code & `0xFF00`) == `0xC000`)
11375	{
11376	dst += emitOutputWord(dst, code \| (regCode << `8`));
11377	}
11378	else if ((code & `0xFF`) == `0x00`)
11379	{
11380	// This case happens for some SSE/AVX instructions only
11381	assert(IsAVXInstruction(ins) \|\| Is4ByteSSEInstruction(ins));
11382
11383	dst += emitOutputByte(dst, (code >> `8`) & `0xFF`);
11384	dst += emitOutputByte(dst, (`0xC0` \| regCode));
11385	}
11386	else
11387	{
11388	dst += emitOutputWord(dst, code);
11389	dst += emitOutputByte(dst, (`0xC0` \| regCode));
11390	}
11391
11392	// Does this instruction operate on a GC ref value?
11393	if (id->idGCref())
11394	{
11395	switch (id->idInsFmt())
11396	{
11397	case IF_RRD_RRD:
11398	break;
11399
11400	case IF_RWR_RRD:
11401
11402	if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0)
11403	{
11404	// We're relocating "this" in the prolog
11405	assert(emitComp->lvaIsOriginalThisArg(`0`));
11406	assert(emitComp->lvaTable[`0`].lvRegister);
11407	assert(emitComp->lvaTable[`0`].lvRegNum == reg1);
11408
11409	if (emitFullGCinfo)
11410	{
11411	emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true);
11412	break;
11413	}
11414	else
11415	{
11416	/ If emitFullGCinfo==false, the we don't use any*
11417	regPtrDsc's and so explictly note the location
11418	of "this" in GCEncode.cpp
11419	*/
11420	}
11421	}
11422
11423	emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11424	break;
11425
11426	case IF_RRW_RRD:
11427
11428	switch (id->idIns())
11429	{
11430	/*
11431	This must be one of the following cases:
11432
11433	xor reg, reg to assign NULL
11434
11435	and r1 , r2 if (ptr1 && ptr2) ...
11436	or r1 , r2 if (ptr1 \|\| ptr2) ...
11437
11438	add r1 , r2 to compute a normal byref
11439	sub r1 , r2 to compute a strange byref (VC only)
11440
11441	*/
11442	case INS_xor:
11443	assert(id->idReg1() == id->idReg2());
11444	emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11445	break;
11446
11447	case INS_or:
11448	case INS_and:
11449	emitGCregDeadUpd(id->idReg1(), dst);
11450	break;
11451
11452	case INS_add:
11453	case INS_sub:
11454	assert(id->idGCref() == GCT_BYREF);
11455
11456	#ifdef DEBUG
11457	regMaskTP regMask;
11458	regMask = genRegMask(reg1) \| genRegMask(reg2);
11459
11460	// r1/r2 could have been a GCREF as GCREF + int=BYREF
11461	// or BYREF+/-int=BYREF
11462	assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) \|\|
11463	((regMask & emitThisByrefRegs) && (ins == INS_add \|\| ins == INS_sub)));
11464	#endif
11465	// Mark r1 as holding a byref
11466	emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11467	break;
11468
11469	default:
11470	#ifdef DEBUG
11471	emitDispIns(id, false, false, false);
11472	#endif
11473	assert(!"unexpected GC reg update instruction");
11474	}
11475
11476	break;
11477
11478	case IF_RRW_RRW:
11479	// This must be "xchg reg1, reg2"
11480	assert(id->idIns() == INS_xchg);
11481
11482	// If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
11483	// register pointer mask.
11484
11485	GCtype gc1, gc2;
11486
11487	gc1 = emitRegGCtype(reg1);
11488	gc2 = emitRegGCtype(reg2);
11489
11490	if (gc1 != gc2)
11491	{
11492	// Kill the GC-info about the GC registers
11493
11494	if (needsGC(gc1))
11495	{
11496	emitGCregDeadUpd(reg1, dst);
11497	}
11498
11499	if (needsGC(gc2))
11500	{
11501	emitGCregDeadUpd(reg2, dst);
11502	}
11503
11504	// Now, swap the info
11505
11506	if (needsGC(gc1))
11507	{
11508	emitGCregLiveUpd(gc1, reg2, dst);
11509	}
11510
11511	if (needsGC(gc2))
11512	{
11513	emitGCregLiveUpd(gc2, reg1, dst);
11514	}
11515	}
11516	break;
11517
11518	default:
11519	#ifdef DEBUG
11520	emitDispIns(id, false, false, false);
11521	#endif
11522	assert(!"unexpected GC ref instruction format");
11523	}
11524	}
11525	else
11526	{
11527	if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
11528	{
11529	switch (id->idInsFmt())
11530	{
11531	case IF_RRD_CNS:
11532	// INS_mulEAX can not be used with any of these formats
11533	assert(ins != INS_mulEAX && ins != INS_imulEAX);
11534
11535	// For the three operand imul instruction the target
11536	// register is encoded in the opcode
11537
11538	if (instrIs3opImul(ins))
11539	{
11540	regNumber tgtReg = inst3opImulReg(ins);
11541	emitGCregDeadUpd(tgtReg, dst);
11542	}
11543	break;
11544
11545	case IF_RWR_RRD:
11546	case IF_RRW_RRD:
11547	case IF_RWR_RRD_RRD:
11548	// INS_movxmm2i writes to reg2.
11549	if (ins == INS_mov_xmm2i)
11550	{
11551	emitGCregDeadUpd(id->idReg2(), dst);
11552	}
11553	else
11554	{
11555	emitGCregDeadUpd(id->idReg1(), dst);
11556	}
11557	break;
11558
11559	default:
11560	break;
11561	}
11562	}
11563	}
11564
11565	return dst;
11566	}
11567
11568	BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
11569	{
11570	code_t code;
11571
11572	instruction ins = id->idIns();
11573	assert(IsAVXInstruction(ins));
11574	assert(IsThreeOperandAVXInstruction(ins) \|\| isAvxBlendv(ins));
11575	regNumber targetReg = id->idReg1();
11576	regNumber src1 = id->idReg2();
11577	regNumber src2 = id->idReg3();
11578	emitAttr size = id->idOpSize();
11579
11580	code = insCodeRM(ins);
11581	code = AddVexPrefixIfNeeded(ins, code, size);
11582	code = insEncodeRMreg(ins, code);
11583
11584	if (TakesRexWPrefix(ins, size))
11585	{
11586	code = AddRexWPrefix(ins, code);
11587	}
11588
11589	unsigned regCode = insEncodeReg345(ins, targetReg, size, &code);
11590	regCode \|= insEncodeReg012(ins, src2, size, &code);
11591	// encode source operand reg in 'vvvv' bits in 1's complement form
11592	code = insEncodeReg3456(ins, src1, size, code);
11593
11594	// Output the REX prefix
11595	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11596
11597	// Is this a 'big' opcode?
11598	if (code & `0xFF000000`)
11599	{
11600	// Output the highest word of the opcode
11601	dst += emitOutputWord(dst, code >> `16`);
11602	code &= `0x0000FFFF`;
11603	}
11604	else if (code & `0x00FF0000`)
11605	{
11606	dst += emitOutputByte(dst, code >> `16`);
11607	code &= `0x0000FFFF`;
11608	}
11609
11610	// TODO-XArch-CQ: Right now support 4-byte opcode instructions only
11611	if ((code & `0xFF00`) == `0xC000`)
11612	{
11613	dst += emitOutputWord(dst, code \| (regCode << `8`));
11614	}
11615	else if ((code & `0xFF`) == `0x00`)
11616	{
11617	// This case happens for AVX instructions only
11618	assert(IsAVXInstruction(ins));
11619
11620	dst += emitOutputByte(dst, (code >> `8`) & `0xFF`);
11621	dst += emitOutputByte(dst, (`0xC0` \| regCode));
11622	}
11623	else
11624	{
11625	dst += emitOutputWord(dst, code);
11626	dst += emitOutputByte(dst, (`0xC0` \| regCode));
11627	}
11628
11629	noway_assert(!id->idGCref());
11630
11631	return dst;
11632	}
11633
11634	/*****************************************************************************
11635	*
11636	* Output an instruction with a register and constant operands.
11637	*/
11638
11639	BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
11640	{
11641	code_t code;
11642	emitAttr size = id->idOpSize();
11643	instruction ins = id->idIns();
11644	regNumber reg = id->idReg1();
11645	ssize_t val = emitGetInsSC(id);
11646	bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
11647
11648	// BT reg,imm might be useful but it requires special handling of the immediate value
11649	// (it is always encoded in a byte). Let's not complicate things until this is needed.
11650	assert(ins != INS_bt);
11651
11652	if (id->idIsCnsReloc())
11653	{
11654	valInByte = false; // relocs can't be placed in a byte
11655	}
11656
11657	noway_assert(emitVerifyEncodable(ins, size, reg));
11658
11659	if (IsSSEOrAVXInstruction(ins))
11660	{
11661	// Handle SSE2 instructions of the form "opcode reg, immed8"
11662
11663	assert(id->idGCref() == GCT_NONE);
11664	assert(valInByte);
11665
11666	// The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
11667	regNumber regOpcode = getSseShiftRegNumber(ins);
11668
11669	// Get the 'base' opcode.
11670	code = insCodeMI(ins);
11671	code = AddVexPrefixIfNeeded(ins, code, size);
11672	code = insEncodeMIreg(ins, reg, size, code);
11673	assert(code & `0x00FF0000`);
11674	if (TakesVexPrefix(ins))
11675	{
11676	// The 'vvvv' bits encode the destination register, which for this case (RI)
11677	// is the same as the source.
11678	code = insEncodeReg3456(ins, reg, size, code);
11679	}
11680
11681	unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) \| insEncodeReg012(ins, reg, size, &code)) << `8`;
11682
11683	// Output the REX prefix
11684	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11685
11686	if (code & `0xFF000000`)
11687	{
11688	dst += emitOutputWord(dst, code >> `16`);
11689	}
11690	else if (code & `0xFF0000`)
11691	{
11692	dst += emitOutputByte(dst, code >> `16`);
11693	}
11694
11695	dst += emitOutputWord(dst, code \| regcode);
11696
11697	dst += emitOutputByte(dst, val);
11698
11699	return dst;
11700	}
11701
11702	// The 'mov' opcode is special
11703	if (ins == INS_mov)
11704	{
11705	code = insCodeACC(ins);
11706	assert(code < `0x100`);
11707
11708	code \|= `0x08`; // Set the 'w' bit
11709	unsigned regcode = insEncodeReg012(ins, reg, size, &code);
11710	code \|= regcode;
11711
11712	// This is INS_mov and will not take VEX prefix
11713	assert(!TakesVexPrefix(ins));
11714
11715	if (TakesRexWPrefix(ins, size))
11716	{
11717	code = AddRexWPrefix(ins, code);
11718	}
11719
11720	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11721
11722	dst += emitOutputByte(dst, code);
11723	if (size == EA_4BYTE)
11724	{
11725	dst += emitOutputLong(dst, val);
11726	}
11727	#ifdef _TARGET_AMD64_
11728	else
11729	{
11730	assert(size == EA_PTRSIZE);
11731	dst += emitOutputSizeT(dst, val);
11732	}
11733	#endif
11734
11735	if (id->idIsCnsReloc())
11736	{
11737	emitRecordRelocation((void)(dst - (unsigned)EA_SIZE(size)), (void**)(size_t)val, IMAGE_REL_BASED_MOFFSET);
11738	}
11739
11740	goto DONE;
11741	}
11742
11743	// Decide which encoding is the shortest
11744	bool useSigned, useACC;
11745
11746	if (reg == REG_EAX && !instrIs3opImul(ins))
11747	{
11748	if (size == EA_1BYTE \|\| (ins == INS_test))
11749	{
11750	// For al, ACC encoding is always the smallest
11751	useSigned = false;
11752	useACC = true;
11753	}
11754	else
11755	{
11756	/ For ax/eax, we avoid ACC encoding for small constants as we*
11757	* can emit the small constant and have it sign-extended.
11758	* For big constants, the ACC encoding is better as we can use
11759	* the 1 byte opcode
11760	*/
11761
11762	if (valInByte)
11763	{
11764	// avoid using ACC encoding
11765	useSigned = true;
11766	useACC = false;
11767	}
11768	else
11769	{
11770	useSigned = false;
11771	useACC = true;
11772	}
11773	}
11774	}
11775	else
11776	{
11777	useACC = false;
11778
11779	if (valInByte)
11780	{
11781	useSigned = true;
11782	}
11783	else
11784	{
11785	useSigned = false;
11786	}
11787	}
11788
11789	// "test" has no 's' bit
11790	if (ins == INS_test)
11791	{
11792	useSigned = false;
11793	}
11794
11795	// Get the 'base' opcode
11796	if (useACC)
11797	{
11798	assert(!useSigned);
11799	code = insCodeACC(ins);
11800	}
11801	else
11802	{
11803	assert(!useSigned \|\| valInByte);
11804
11805	// Some instructions (at least 'imul') do not have a
11806	// r/m, immed form, but do have a dstReg,srcReg,imm8 form.
11807	if (valInByte && useSigned && insNeedsRRIb(ins))
11808	{
11809	code = insEncodeRRIb(ins, reg, size);
11810	}
11811	else
11812	{
11813	code = insCodeMI(ins);
11814	code = AddVexPrefixIfNeeded(ins, code, size);
11815	code = insEncodeMIreg(ins, reg, size, code);
11816	}
11817	}
11818
11819	switch (size)
11820	{
11821	case EA_1BYTE:
11822	break;
11823
11824	case EA_2BYTE:
11825	// Output a size prefix for a 16-bit operand
11826	dst += emitOutputByte(dst, `0x66`);
11827	__fallthrough;
11828
11829	case EA_4BYTE:
11830	// Set the 'w' bit to get the large version
11831	code \|= `0x1`;
11832	break;
11833
11834	#ifdef _TARGET_AMD64_
11835	case EA_8BYTE:
11836	/ Set the 'w' bit to get the large version /
11837	/ and the REX.W bit to get the really large version /
11838
11839	code = AddRexWPrefix(ins, code);
11840	code \|= `0x1`;
11841	break;
11842	#endif
11843
11844	default:
11845	assert(!"unexpected size");
11846	}
11847
11848	// Output the REX prefix
11849	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
11850
11851	// Does the value fit in a sign-extended byte?
11852	// Important! Only set the 's' bit when we have a size larger than EA_1BYTE.
11853	// Note: A sign-extending immediate when (size == EA_1BYTE) is invalid in 64-bit mode.
11854
11855	if (useSigned && (size > EA_1BYTE))
11856	{
11857	// We can just set the 's' bit, and issue an immediate byte
11858
11859	code \|= `0x2`; // Set the 's' bit to use a sign-extended immediate byte.
11860	dst += emitOutputWord(dst, code);
11861	dst += emitOutputByte(dst, val);
11862	}
11863	else
11864	{
11865	// Can we use an accumulator (EAX) encoding?
11866	if (useACC)
11867	{
11868	dst += emitOutputByte(dst, code);
11869	}
11870	else
11871	{
11872	dst += emitOutputWord(dst, code);
11873	}
11874
11875	switch (size)
11876	{
11877	case EA_1BYTE:
11878	dst += emitOutputByte(dst, val);
11879	break;
11880	case EA_2BYTE:
11881	dst += emitOutputWord(dst, val);
11882	break;
11883	case EA_4BYTE:
11884	dst += emitOutputLong(dst, val);
11885	break;
11886	#ifdef _TARGET_AMD64_
11887	case EA_8BYTE:
11888	dst += emitOutputLong(dst, val);
11889	break;
11890	#endif // _TARGET_AMD64_
11891	default:
11892	break;
11893	}
11894
11895	if (id->idIsCnsReloc())
11896	{
11897	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
11898	assert(size == EA_4BYTE);
11899	}
11900	}
11901
11902	DONE:
11903
11904	// Does this instruction operate on a GC ref value?
11905	if (id->idGCref())
11906	{
11907	switch (id->idInsFmt())
11908	{
11909	case IF_RRD_CNS:
11910	break;
11911
11912	case IF_RWR_CNS:
11913	emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
11914	break;
11915
11916	case IF_RRW_CNS:
11917	assert(id->idGCref() == GCT_BYREF);
11918
11919	#ifdef DEBUG
11920	regMaskTP regMask;
11921	regMask = genRegMask(reg);
11922	// FIXNOW review the other places and relax the assert there too
11923
11924	// The reg must currently be holding either a gcref or a byref
11925	// GCT_GCREF+int = GCT_BYREF, and GCT_BYREF+/-int = GCT_BYREF
11926	if (emitThisGCrefRegs & regMask)
11927	{
11928	assert(ins == INS_add);
11929	}
11930	if (emitThisByrefRegs & regMask)
11931	{
11932	assert(ins == INS_add \|\| ins == INS_sub);
11933	}
11934	#endif
11935	// Mark it as holding a GCT_BYREF
11936	emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
11937	break;
11938
11939	default:
11940	#ifdef DEBUG
11941	emitDispIns(id, false, false, false);
11942	#endif
11943	assert(!"unexpected GC ref instruction format");
11944	}
11945
11946	// mul can never produce a GC ref
11947	assert(!instrIs3opImul(ins));
11948	assert(ins != INS_mulEAX && ins != INS_imulEAX);
11949	}
11950	else
11951	{
11952	switch (id->idInsFmt())
11953	{
11954	case IF_RRD_CNS:
11955	// INS_mulEAX can not be used with any of these formats
11956	assert(ins != INS_mulEAX && ins != INS_imulEAX);
11957
11958	// For the three operand imul instruction the target
11959	// register is encoded in the opcode
11960
11961	if (instrIs3opImul(ins))
11962	{
11963	regNumber tgtReg = inst3opImulReg(ins);
11964	emitGCregDeadUpd(tgtReg, dst);
11965	}
11966	break;
11967
11968	case IF_RRW_CNS:
11969	case IF_RWR_CNS:
11970	assert(!instrIs3opImul(ins));
11971
11972	emitGCregDeadUpd(id->idReg1(), dst);
11973	break;
11974
11975	default:
11976	#ifdef DEBUG
11977	emitDispIns(id, false, false, false);
11978	#endif
11979	assert(!"unexpected GC ref instruction format");
11980	}
11981	}
11982
11983	return dst;
11984	}
11985
11986	/*****************************************************************************
11987	*
11988	* Output an instruction with a constant operand.
11989	*/
11990
11991	BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
11992	{
11993	code_t code;
11994	instruction ins = id->idIns();
11995	emitAttr size = id->idOpSize();
11996	ssize_t val = emitGetInsSC(id);
11997	bool valInByte = ((signed char)val == val);
11998
11999	// We would to update GC info correctly
12000	assert(!IsSSEInstruction(ins));
12001	assert(!IsAVXInstruction(ins));
12002
12003	#ifdef _TARGET_AMD64_
12004	// all these opcodes take a sign-extended 4-byte immediate, max
12005	noway_assert(size < EA_8BYTE \|\| ((int)val == val && !id->idIsCnsReloc()));
12006	#endif
12007
12008	if (id->idIsCnsReloc())
12009	{
12010	valInByte = false; // relocs can't be placed in a byte
12011
12012	// Of these instructions only the push instruction can have reloc
12013	assert(ins == INS_push \|\| ins == INS_push_hide);
12014	}
12015
12016	switch (ins)
12017	{
12018	case INS_jge:
12019	assert((val >= -`128`) && (val <= `127`));
12020	dst += emitOutputByte(dst, insCode(ins));
12021	dst += emitOutputByte(dst, val);
12022	break;
12023
12024	case INS_loop:
12025	assert((val >= -`128`) && (val <= `127`));
12026	dst += emitOutputByte(dst, insCodeMI(ins));
12027	dst += emitOutputByte(dst, val);
12028	break;
12029
12030	case INS_ret:
12031	assert(val);
12032	dst += emitOutputByte(dst, insCodeMI(ins));
12033	dst += emitOutputWord(dst, val);
12034	break;
12035
12036	case INS_push_hide:
12037	case INS_push:
12038	code = insCodeMI(ins);
12039
12040	// Does the operand fit in a byte?
12041	if (valInByte)
12042	{
12043	dst += emitOutputByte(dst, code \| `2`);
12044	dst += emitOutputByte(dst, val);
12045	}
12046	else
12047	{
12048	if (TakesRexWPrefix(ins, size))
12049	{
12050	code = AddRexWPrefix(ins, code);
12051	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12052	}
12053
12054	dst += emitOutputByte(dst, code);
12055	dst += emitOutputLong(dst, val);
12056	if (id->idIsCnsReloc())
12057	{
12058	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
12059	}
12060	}
12061
12062	// Did we push a GC ref value?
12063	if (id->idGCref())
12064	{
12065	#ifdef DEBUG
12066	printf("UNDONE: record GCref push [cns]\n");
12067	#endif
12068	}
12069
12070	break;
12071
12072	default:
12073	assert(!"unexpected instruction");
12074	}
12075
12076	return dst;
12077	}
12078
12079	/*****************************************************************************
12080	*
12081	* Output a local jump instruction.
12082	* This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that
12083	* needs to get bound to an actual address and processed by branch shortening.
12084	*/
12085
12086	BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
12087	{
12088	unsigned srcOffs;
12089	unsigned dstOffs;
12090	ssize_t distVal;
12091
12092	instrDescJmp* id = (instrDescJmp*)i;
12093	instruction ins = id->idIns();
12094	bool jmp;
12095	bool relAddr = true; // does the instruction use relative-addressing?
12096
12097	// SSE/AVX doesnt make any sense here
12098	assert(!IsSSEInstruction(ins));
12099	assert(!IsAVXInstruction(ins));
12100
12101	size_t ssz;
12102	size_t lsz;
12103
12104	switch (ins)
12105	{
12106	default:
12107	ssz = JCC_SIZE_SMALL;
12108	lsz = JCC_SIZE_LARGE;
12109	jmp = true;
12110	break;
12111
12112	case INS_jmp:
12113	ssz = JMP_SIZE_SMALL;
12114	lsz = JMP_SIZE_LARGE;
12115	jmp = true;
12116	break;
12117
12118	case INS_call:
12119	ssz = lsz = CALL_INST_SIZE;
12120	jmp = false;
12121	break;
12122
12123	case INS_push_hide:
12124	case INS_push:
12125	ssz = lsz = `5`;
12126	jmp = false;
12127	relAddr = false;
12128	break;
12129
12130	case INS_mov:
12131	case INS_lea:
12132	ssz = lsz = id->idCodeSize();
12133	jmp = false;
12134	relAddr = false;
12135	break;
12136	}
12137
12138	// Figure out the distance to the target
12139	srcOffs = emitCurCodeOffs(dst);
12140	dstOffs = id->idAddr()->iiaIGlabel->igOffs;
12141
12142	if (relAddr)
12143	{
12144	distVal = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
12145	}
12146	else
12147	{
12148	distVal = (ssize_t)emitOffsetToPtr(dstOffs);
12149	}
12150
12151	if (dstOffs <= srcOffs)
12152	{
12153	// This is a backward jump - distance is known at this point
12154	CLANG_FORMAT_COMMENT_ANCHOR;
12155
12156	#if DEBUG_EMIT
12157	if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM \|\| INTERESTING_JUMP_NUM == `0`)
12158	{
12159	size_t blkOffs = id->idjIG->igOffs;
12160
12161	if (INTERESTING_JUMP_NUM == `0`)
12162	{
12163	printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
12164	}
12165	printf("[3] Jump block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
12166	printf("[3] Jump is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
12167	printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
12168	}
12169	#endif
12170
12171	// Can we use a short jump?
12172	if (jmp && distVal - ssz >= (size_t)JMP_DIST_SMALL_MAX_NEG)
12173	{
12174	emitSetShortJump(id);
12175	}
12176	}
12177	else
12178	{
12179	// This is a forward jump - distance will be an upper limit
12180	emitFwdJumps = true;
12181
12182	// The target offset will be closer by at least 'emitOffsAdj', but only if this
12183	// jump doesn't cross the hot-cold boundary.
12184	if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
12185	{
12186	dstOffs -= emitOffsAdj;
12187	distVal -= emitOffsAdj;
12188	}
12189
12190	// Record the location of the jump for later patching
12191	id->idjOffs = dstOffs;
12192
12193	// Are we overflowing the id->idjOffs bitfield?
12194	if (id->idjOffs != dstOffs)
12195	{
12196	IMPL_LIMITATION("Method is too large");
12197	}
12198
12199	#if DEBUG_EMIT
12200	if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM \|\| INTERESTING_JUMP_NUM == `0`)
12201	{
12202	size_t blkOffs = id->idjIG->igOffs;
12203
12204	if (INTERESTING_JUMP_NUM == `0`)
12205	{
12206	printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
12207	}
12208	printf("[4] Jump block is at %08X\n", blkOffs);
12209	printf("[4] Jump is at %08X\n", srcOffs);
12210	printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
12211	}
12212	#endif
12213
12214	// Can we use a short jump?
12215	if (jmp && distVal - ssz <= (size_t)JMP_DIST_SMALL_MAX_POS)
12216	{
12217	emitSetShortJump(id);
12218	}
12219	}
12220
12221	// Adjust the offset to emit relative to the end of the instruction
12222	if (relAddr)
12223	{
12224	distVal -= id->idjShort ? ssz : lsz;
12225	}
12226
12227	#ifdef DEBUG
12228	if (`0` && emitComp->verbose)
12229	{
12230	size_t sz = id->idjShort ? ssz : lsz;
12231	int distValSize = id->idjShort ? `4` : `8`;
12232	printf("; %s jump [%08X/%03u] from %0X to %0X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
12233	emitComp->dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs,
12234	distVal);
12235	}
12236	#endif
12237
12238	// What size jump should we use?
12239	if (id->idjShort)
12240	{
12241	// Short jump
12242	assert(!id->idjKeepLong);
12243	assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
12244
12245	assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
12246	assert(JMP_SIZE_SMALL == `2`);
12247
12248	assert(jmp);
12249
12250	if (emitInstCodeSz(id) != JMP_SIZE_SMALL)
12251	{
12252	emitOffsAdj += emitInstCodeSz(id) - JMP_SIZE_SMALL;
12253
12254	#ifdef DEBUG
12255	if (emitComp->verbose)
12256	{
12257	printf("; NOTE: size of jump [%08X] mis-predicted\n", emitComp->dspPtr(id));
12258	}
12259	#endif
12260	}
12261
12262	dst += emitOutputByte(dst, insCode(ins));
12263
12264	// For forward jumps, record the address of the distance value
12265	id->idjTemp.idjAddr = (distVal > `0`) ? dst : nullptr;
12266
12267	dst += emitOutputByte(dst, distVal);
12268	}
12269	else
12270	{
12271	code_t code;
12272
12273	// Long jump
12274	if (jmp)
12275	{
12276	// clang-format off
12277	assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp);
12278	assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo);
12279	assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb);
12280	assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae);
12281	assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je);
12282	assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne);
12283	assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe);
12284	assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja);
12285	assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js);
12286	assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns);
12287	assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe);
12288	assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo);
12289	assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl);
12290	assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge);
12291	assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle);
12292	assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg);
12293	// clang-format on
12294
12295	code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp)));
12296	}
12297	else if (ins == INS_push \|\| ins == INS_push_hide)
12298	{
12299	assert(insCodeMI(INS_push) == `0x68`);
12300	code = `0x68`;
12301	}
12302	else if (ins == INS_mov)
12303	{
12304	// Make it look like IF_SWR_CNS so that emitOutputSV emits the r/m32 for us
12305	insFormat tmpInsFmt = id->idInsFmt();
12306	insGroup* tmpIGlabel = id->idAddr()->iiaIGlabel;
12307	bool tmpDspReloc = id->idIsDspReloc();
12308
12309	id->idInsFmt(IF_SWR_CNS);
12310	id->idAddr()->iiaLclVar = ((instrDescLbl*)id)->dstLclVar;
12311	id->idSetIsDspReloc(false);
12312
12313	dst = emitOutputSV(dst, id, insCodeMI(ins));
12314
12315	// Restore id fields with original values
12316	id->idInsFmt(tmpInsFmt);
12317	id->idAddr()->iiaIGlabel = tmpIGlabel;
12318	id->idSetIsDspReloc(tmpDspReloc);
12319	code = `0xCC`;
12320	}
12321	else if (ins == INS_lea)
12322	{
12323	// Make an instrDesc that looks like IF_RWR_ARD so that emitOutputAM emits the r/m32 for us.
12324	// We basically are doing what emitIns_R_AI does.
12325	// TODO-XArch-Cleanup: revisit this.
12326	instrDescAmd idAmdStackLocal;
12327	instrDescAmd* idAmd = &idAmdStackLocal;
12328	(instrDesc)idAmd = (instrDesc)id; // copy all the "core" fields
12329	memset((BYTE)idAmd + sizeof*(instrDesc), `0`,
12330	sizeof(instrDescAmd) - sizeof(instrDesc)); // zero out the tail that wasn't copied
12331
12332	idAmd->idInsFmt(IF_RWR_ARD);
12333	idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
12334	idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
12335	emitSetAmdDisp(idAmd, distVal); // set the displacement
12336	idAmd->idSetIsDspReloc(id->idIsDspReloc());
12337	assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly
12338
12339	UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins));
12340	idAmd->idCodeSize(sz);
12341
12342	code = insCodeRM(ins);
12343	code \|= (insEncodeReg345(ins, id->idReg1(), EA_PTRSIZE, &code) << `8`);
12344
12345	dst = emitOutputAM(dst, idAmd, code, nullptr);
12346
12347	code = `0xCC`;
12348
12349	// For forward jumps, record the address of the distance value
12350	// Hard-coded 4 here because we already output the displacement, as the last thing.
12351	id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - `4`) : nullptr;
12352
12353	// We're done
12354	return dst;
12355	}
12356	else
12357	{
12358	code = `0xE8`;
12359	}
12360
12361	if (ins != INS_mov)
12362	{
12363	dst += emitOutputByte(dst, code);
12364
12365	if (code & `0xFF00`)
12366	{
12367	dst += emitOutputByte(dst, code >> `8`);
12368	}
12369	}
12370
12371	// For forward jumps, record the address of the distance value
12372	id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : nullptr;
12373
12374	dst += emitOutputLong(dst, distVal);
12375
12376	#ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation
12377	if (emitComp->opts.compReloc)
12378	#endif
12379	{
12380	if (!relAddr)
12381	{
12382	emitRecordRelocation((void)(dst - sizeof(INT32)), (void**)distVal, IMAGE_REL_BASED_HIGHLOW);
12383	}
12384	else if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
12385	{
12386	assert(id->idjKeepLong);
12387	emitRecordRelocation((void)(dst - sizeof*(INT32)), dst + distVal, IMAGE_REL_BASED_REL32);
12388	}
12389	}
12390	}
12391
12392	// Local calls kill all registers
12393	if (ins == INS_call && (emitThisGCrefRegs \| emitThisByrefRegs))
12394	{
12395	emitGCregDeadUpdMask(emitThisGCrefRegs \| emitThisByrefRegs, dst);
12396	}
12397
12398	return dst;
12399	}
12400
12401	/*****************************************************************************
12402	*
12403	* Append the machine code corresponding to the given instruction descriptor
12404	* to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
12405	* is the instruction group that contains the instruction. Updates '*dp' to
12406	* point past the generated code, and returns the size of the instruction
12407	* descriptor in bytes.
12408	*/
12409
12410	#ifdef _PREFAST_
12411	#pragma warning(push)
12412	#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
12413	#endif
12414	size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
12415	{
12416	assert(emitIssuing);
12417
12418	BYTE* dst = *dp;
12419	size_t sz = sizeof(instrDesc);
12420	instruction ins = id->idIns();
12421	unsigned char callInstrSize = `0`;
12422
12423	#ifdef DEBUG
12424	bool dspOffs = emitComp->opts.dspGCtbls;
12425	#endif // DEBUG
12426
12427	emitAttr size = id->idOpSize();
12428
12429	assert(REG_NA == (int)REG_NA);
12430
12431	assert(ins != INS_imul \|\| size >= EA_4BYTE); // Has no 'w' bit
12432	assert(instrIs3opImul(id->idIns()) == `0` \|\| size >= EA_4BYTE); // Has no 'w' bit
12433
12434	VARSET_TP GCvars(VarSetOps::UninitVal());
12435
12436	// What instruction format have we got?
12437	switch (id->idInsFmt())
12438	{
12439	code_t code;
12440	unsigned regcode;
12441	int args;
12442	CnsVal cnsVal;
12443
12444	BYTE* addr;
12445	bool recCall;
12446
12447	regMaskTP gcrefRegs;
12448	regMaskTP byrefRegs;
12449
12450	/******************************************************************/
12451	/ No operands /
12452	/******************************************************************/
12453	case IF_NONE:
12454	// the loop alignment pseudo instruction
12455	if (ins == INS_align)
12456	{
12457	sz = SMALL_IDSC_SIZE;
12458	dst = emitOutputNOP(dst, (-(int)(size_t)dst) & `0x0f`);
12459	assert(((size_t)dst & `0x0f`) == `0`);
12460	break;
12461	}
12462
12463	if (ins == INS_nop)
12464	{
12465	dst = emitOutputNOP(dst, id->idCodeSize());
12466	break;
12467	}
12468
12469	// the cdq instruction kills the EDX register implicitly
12470	if (ins == INS_cdq)
12471	{
12472	emitGCregDeadUpd(REG_EDX, dst);
12473	}
12474
12475	assert(id->idGCref() == GCT_NONE);
12476
12477	code = insCodeMR(ins);
12478
12479	#ifdef _TARGET_AMD64_
12480	// Support only scalar AVX instructions and hence size is hard coded to 4-byte.
12481	code = AddVexPrefixIfNeeded(ins, code, EA_4BYTE);
12482
12483	if (ins == INS_cdq && TakesRexWPrefix(ins, id->idOpSize()))
12484	{
12485	code = AddRexWPrefix(ins, code);
12486	}
12487	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12488	#endif
12489	// Is this a 'big' opcode?
12490	if (code & `0xFF000000`)
12491	{
12492	// The high word and then the low word
12493	dst += emitOutputWord(dst, code >> `16`);
12494	code &= `0x0000FFFF`;
12495	dst += emitOutputWord(dst, code);
12496	}
12497	else if (code & `0x00FF0000`)
12498	{
12499	// The high byte and then the low word
12500	dst += emitOutputByte(dst, code >> `16`);
12501	code &= `0x0000FFFF`;
12502	dst += emitOutputWord(dst, code);
12503	}
12504	else if (code & `0xFF00`)
12505	{
12506	// The 2 byte opcode
12507	dst += emitOutputWord(dst, code);
12508	}
12509	else
12510	{
12511	// The 1 byte opcode
12512	dst += emitOutputByte(dst, code);
12513	}
12514
12515	break;
12516
12517	/******************************************************************/
12518	/ Simple constant, local label, method /
12519	/******************************************************************/
12520
12521	case IF_CNS:
12522	dst = emitOutputIV(dst, id);
12523	sz = emitSizeOfInsDsc(id);
12524	break;
12525
12526	case IF_LABEL:
12527	case IF_RWR_LABEL:
12528	case IF_SWR_LABEL:
12529	assert(id->idGCref() == GCT_NONE);
12530	assert(id->idIsBound());
12531
12532	// TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
12533	dst = emitOutputLJ(dst, id);
12534	sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));
12535	break;
12536
12537	case IF_METHOD:
12538	case IF_METHPTR:
12539	// Assume we'll be recording this call
12540	recCall = true;
12541
12542	// Get hold of the argument count and field Handle
12543	args = emitGetInsCDinfo(id);
12544
12545	// Is this a "fat" call descriptor?
12546	if (id->idIsLargeCall())
12547	{
12548	instrDescCGCA* idCall = (instrDescCGCA*)id;
12549	gcrefRegs = idCall->idcGcrefRegs;
12550	byrefRegs = idCall->idcByrefRegs;
12551	VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
12552	sz = sizeof(instrDescCGCA);
12553	}
12554	else
12555	{
12556	assert(!id->idIsLargeDsp());
12557	assert(!id->idIsLargeCns());
12558
12559	gcrefRegs = emitDecodeCallGCregs(id);
12560	byrefRegs = `0`;
12561	VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
12562	sz = sizeof(instrDesc);
12563	}
12564
12565	addr = (BYTE*)id->idAddr()->iiaAddr;
12566	assert(addr != nullptr);
12567
12568	// Some helpers don't get recorded in GC tables
12569	if (id->idIsNoGC())
12570	{
12571	recCall = false;
12572	}
12573
12574	// What kind of a call do we have here?
12575	if (id->idInsFmt() == IF_METHPTR)
12576	{
12577	// This is call indirect via a method pointer
12578
12579	code = insCodeMR(ins);
12580	if (ins == INS_i_jmp)
12581	{
12582	code \|= `1`;
12583	}
12584
12585	if (id->idIsDspReloc())
12586	{
12587	dst += emitOutputWord(dst, code \| `0x0500`);
12588	#ifdef _TARGET_AMD64_
12589	dst += emitOutputLong(dst, `0`);
12590	#else
12591	dst += emitOutputLong(dst, (int)addr);
12592	#endif
12593	emitRecordRelocation((void)(dst - sizeof(int*)), addr, IMAGE_REL_BASED_DISP32);
12594	}
12595	else
12596	{
12597	#ifdef _TARGET_X86_
12598	dst += emitOutputWord(dst, code \| `0x0500`);
12599	#else //_TARGET_AMD64_
12600	// Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
12601	// This addr mode should never be used while generating relocatable ngen code nor if
12602	// the addr can be encoded as pc-relative address.
12603	noway_assert(!emitComp->opts.compReloc);
12604	noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32);
12605	noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (ssize_t)addr);
12606
12607	// This requires, specifying a SIB byte after ModRM byte.
12608	dst += emitOutputWord(dst, code \| `0x0400`);
12609	dst += emitOutputByte(dst, `0x25`);
12610	#endif //_TARGET_AMD64_
12611	dst += emitOutputLong(dst, static_cast<int>(reinterpret_cast<intptr_t>(addr)));
12612	}
12613	goto DONE_CALL;
12614	}
12615
12616	// Else
12617	// This is call direct where we know the target, thus we can
12618	// use a direct call; the target to jump to is in iiaAddr.
12619	assert(id->idInsFmt() == IF_METHOD);
12620
12621	// Output the call opcode followed by the target distance
12622	dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins));
12623
12624	ssize_t offset;
12625	#ifdef _TARGET_AMD64_
12626	// All REL32 on Amd64 go through recordRelocation. Here we will output zero to advance dst.
12627	offset = `0`;
12628	assert(id->idIsDspReloc());
12629	#else
12630	// Calculate PC relative displacement.
12631	// Although you think we should be using sizeof(void), the x86 and x64 instruction set*
12632	// only allow a 32-bit offset, so we correctly use sizeof(INT32)
12633	offset = addr - (dst + sizeof(INT32));
12634	#endif
12635
12636	dst += emitOutputLong(dst, offset);
12637
12638	if (id->idIsDspReloc())
12639	{
12640	emitRecordRelocation((void)(dst - sizeof*(INT32)), addr, IMAGE_REL_BASED_REL32);
12641	}
12642
12643	DONE_CALL:
12644
12645	/ We update the GC info before the call as the variables cannot be*
12646	used by the call. Killing variables before the call helps with
12647	boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
12648	If we ever track aliased variables (which could be used by the
12649	call), we would have to keep them alive past the call.
12650	*/
12651	assert(FitsIn<unsigned char>(dst - *dp));
12652	callInstrSize = static_cast<unsigned char>(dst - *dp);
12653	emitUpdateLiveGCvars(GCvars, *dp);
12654
12655	// If the method returns a GC ref, mark EAX appropriately
12656	if (id->idGCref() == GCT_GCREF)
12657	{
12658	gcrefRegs \|= RBM_EAX;
12659	}
12660	else if (id->idGCref() == GCT_BYREF)
12661	{
12662	byrefRegs \|= RBM_EAX;
12663	}
12664
12665	#ifdef UNIX_AMD64_ABI
12666	// If is a multi-register return method is called, mark RDX appropriately (for System V AMD64).
12667	if (id->idIsLargeCall())
12668	{
12669	instrDescCGCA* idCall = (instrDescCGCA*)id;
12670	if (idCall->idSecondGCref() == GCT_GCREF)
12671	{
12672	gcrefRegs \|= RBM_RDX;
12673	}
12674	else if (idCall->idSecondGCref() == GCT_BYREF)
12675	{
12676	byrefRegs \|= RBM_RDX;
12677	}
12678	}
12679	#endif // UNIX_AMD64_ABI
12680
12681	// If the GC register set has changed, report the new set
12682	if (gcrefRegs != emitThisGCrefRegs)
12683	{
12684	emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
12685	}
12686
12687	if (byrefRegs != emitThisByrefRegs)
12688	{
12689	emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
12690	}
12691
12692	if (recCall \|\| args)
12693	{
12694	// For callee-pop, all arguments will be popped after the call.
12695	// For caller-pop, any GC arguments will go dead after the call.
12696
12697	assert(callInstrSize != `0`);
12698
12699	if (args >= `0`)
12700	{
12701	emitStackPop(dst, /isCall/ true, callInstrSize, args);
12702	}
12703	else
12704	{
12705	emitStackKillArgs(dst, -args, callInstrSize);
12706	}
12707	}
12708
12709	// Do we need to record a call location for GC purposes?
12710	if (!emitFullGCinfo && recCall)
12711	{
12712	assert(callInstrSize != `0`);
12713	emitRecordGCcall(dst, callInstrSize);
12714	}
12715
12716	#ifdef DEBUG
12717	if (ins == INS_call)
12718	{
12719	emitRecordCallSite(emitCurCodeOffs(*dp), id->idDebugOnlyInfo()->idCallSig,
12720	(CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
12721	}
12722	#endif // DEBUG
12723
12724	break;
12725
12726	/******************************************************************/
12727	/ One register operand /
12728	/******************************************************************/
12729
12730	case IF_RRD:
12731	case IF_RWR:
12732	case IF_RRW:
12733	dst = emitOutputR(dst, id);
12734	sz = SMALL_IDSC_SIZE;
12735	break;
12736
12737	/******************************************************************/
12738	/ Register and register/constant /
12739	/******************************************************************/
12740
12741	case IF_RRW_SHF:
12742	code = insCodeMR(ins);
12743	// Emit the VEX prefix if it exists
12744	code = AddVexPrefixIfNeeded(ins, code, size);
12745	code = insEncodeMRreg(ins, id->idReg1(), size, code);
12746
12747	// set the W bit
12748	if (size != EA_1BYTE)
12749	{
12750	code \|= `1`;
12751	}
12752
12753	// Emit the REX prefix if it exists
12754	if (TakesRexWPrefix(ins, size))
12755	{
12756	code = AddRexWPrefix(ins, code);
12757	}
12758
12759	// Output a size prefix for a 16-bit operand
12760	if (size == EA_2BYTE)
12761	{
12762	dst += emitOutputByte(dst, `0x66`);
12763	}
12764
12765	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12766	dst += emitOutputWord(dst, code);
12767	dst += emitOutputByte(dst, emitGetInsSC(id));
12768	sz = emitSizeOfInsDsc(id);
12769
12770	// Update GC info.
12771	assert(!id->idGCref());
12772	emitGCregDeadUpd(id->idReg1(), dst);
12773	break;
12774
12775	case IF_RRD_RRD:
12776	case IF_RWR_RRD:
12777	case IF_RRW_RRD:
12778	case IF_RRW_RRW:
12779	dst = emitOutputRR(dst, id);
12780	sz = SMALL_IDSC_SIZE;
12781	break;
12782
12783	case IF_RRD_CNS:
12784	case IF_RWR_CNS:
12785	case IF_RRW_CNS:
12786	dst = emitOutputRI(dst, id);
12787	sz = emitSizeOfInsDsc(id);
12788	break;
12789
12790	case IF_RWR_RRD_RRD:
12791	dst = emitOutputRRR(dst, id);
12792	sz = emitSizeOfInsDsc(id);
12793	break;
12794	case IF_RWR_RRD_RRD_CNS:
12795	case IF_RWR_RRD_RRD_RRD:
12796	dst = emitOutputRRR(dst, id);
12797	sz = emitSizeOfInsDsc(id);
12798	dst += emitOutputByte(dst, emitGetInsSC(id));
12799	break;
12800
12801	case IF_RRW_RRW_CNS:
12802	assert(id->idGCref() == GCT_NONE);
12803
12804	// Get the 'base' opcode (it's a big one)
12805	// Also, determine which operand goes where in the ModRM byte.
12806	regNumber mReg;
12807	regNumber rReg;
12808	if (hasCodeMR(ins))
12809	{
12810	code = insCodeMR(ins);
12811	// Emit the VEX prefix if it exists
12812	code = AddVexPrefixIfNeeded(ins, code, size);
12813	code = insEncodeMRreg(ins, code);
12814	mReg = id->idReg1();
12815	rReg = id->idReg2();
12816	}
12817	else if (hasCodeMI(ins))
12818	{
12819	code = insCodeMI(ins);
12820
12821	// Emit the VEX prefix if it exists
12822	code = AddVexPrefixIfNeeded(ins, code, size);
12823
12824	assert((code & `0xC000`) == `0`);
12825	code \|= `0xC000`;
12826
12827	mReg = id->idReg2();
12828
12829	// The left and right shifts use the same encoding, and are distinguished by the Reg/Opcode field.
12830	rReg = getSseShiftRegNumber(ins);
12831	}
12832	else
12833	{
12834	code = insCodeRM(ins);
12835	// Emit the VEX prefix if it exists
12836	code = AddVexPrefixIfNeeded(ins, code, size);
12837	code = insEncodeRMreg(ins, code);
12838	mReg = id->idReg2();
12839	rReg = id->idReg1();
12840	}
12841	assert(code & `0x00FF0000`);
12842
12843	if (TakesRexWPrefix(ins, size))
12844	{
12845	code = AddRexWPrefix(ins, code);
12846	}
12847
12848	if (TakesVexPrefix(ins))
12849	{
12850	if (IsDstDstSrcAVXInstruction(ins))
12851	{
12852	// Encode source/dest operand reg in 'vvvv' bits in 1's complement form
12853	// This code will have to change when we support 3 operands.
12854	// For now, we always overload this source with the destination (always reg1).
12855	// (Though we will need to handle the few ops that can have the 'vvvv' bits as destination,
12856	// e.g. pslldq, when/if we support those instructions with 2 registers.)
12857	// (see x64 manual Table 2-9. Instructions with a VEX.vvvv destination)
12858	code = insEncodeReg3456(ins, id->idReg1(), size, code);
12859	}
12860	else if (IsDstSrcSrcAVXInstruction(ins))
12861	{
12862	// This is a "merge" move instruction.
12863	// Encode source operand reg in 'vvvv' bits in 1's complement form
12864	code = insEncodeReg3456(ins, id->idReg2(), size, code);
12865	}
12866	}
12867
12868	regcode = (insEncodeReg345(ins, rReg, size, &code) \| insEncodeReg012(ins, mReg, size, &code));
12869
12870	// Output the REX prefix
12871	dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
12872
12873	if (code & `0xFF000000`)
12874	{
12875	// Output the highest word of the opcode
12876	dst += emitOutputWord(dst, code >> `16`);
12877	code &= `0x0000FFFF`;
12878
12879	if (Is4ByteSSEInstruction(ins))
12880	{
12881	// Output 3rd byte of the opcode
12882	dst += emitOutputByte(dst, code);
12883	code &= `0xFF00`;
12884	}
12885	}
12886	else if (code & `0x00FF0000`)
12887	{
12888	dst += emitOutputByte(dst, code >> `16`);
12889	code &= `0x0000FFFF`;
12890	}
12891
12892	// TODO-XArch-CQ: Right now support 4-byte opcode instructions only
12893	if ((code & `0xFF00`) == `0xC000`)
12894	{
12895	dst += emitOutputWord(dst, code \| (regcode << `8`));
12896	}
12897	else if ((code & `0xFF`) == `0x00`)
12898	{
12899	// This case happens for some SSE/AVX instructions only
12900	assert(IsAVXInstruction(ins) \|\| Is4ByteSSEInstruction(ins));
12901
12902	dst += emitOutputByte(dst, (code >> `8`) & `0xFF`);
12903	dst += emitOutputByte(dst, (`0xC0` \| regcode));
12904	}
12905	else
12906	{
12907	dst += emitOutputWord(dst, code);
12908	dst += emitOutputByte(dst, (`0xC0` \| regcode));
12909	}
12910
12911	dst += emitOutputByte(dst, emitGetInsSC(id));
12912	sz = emitSizeOfInsDsc(id);
12913
12914	// Kill any GC ref in the destination register if necessary.
12915	if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
12916	{
12917	emitGCregDeadUpd(id->idReg1(), dst);
12918	}
12919	break;
12920
12921	/******************************************************************/
12922	/ Address mode operand /
12923	/******************************************************************/
12924
12925	case IF_ARD:
12926	case IF_AWR:
12927	case IF_ARW:
12928
12929	dst = emitCodeWithInstructionSize(dst, emitOutputAM(dst, id, insCodeMR(ins)), &callInstrSize);
12930
12931	switch (ins)
12932	{
12933	case INS_call:
12934
12935	IND_CALL:
12936	// Get hold of the argument count and method handle
12937	args = emitGetInsCIargs(id);
12938
12939	// Is this a "fat" call descriptor?
12940	if (id->idIsLargeCall())
12941	{
12942	instrDescCGCA* idCall = (instrDescCGCA*)id;
12943
12944	gcrefRegs = idCall->idcGcrefRegs;
12945	byrefRegs = idCall->idcByrefRegs;
12946	VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
12947	sz = sizeof(instrDescCGCA);
12948	}
12949	else
12950	{
12951	assert(!id->idIsLargeDsp());
12952	assert(!id->idIsLargeCns());
12953
12954	gcrefRegs = emitDecodeCallGCregs(id);
12955	byrefRegs = `0`;
12956	VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
12957	sz = sizeof(instrDesc);
12958	}
12959
12960	recCall = true;
12961
12962	goto DONE_CALL;
12963
12964	default:
12965	sz = emitSizeOfInsDsc(id);
12966	break;
12967	}
12968	break;
12969
12970	case IF_RRW_ARD_CNS:
12971	case IF_RWR_ARD_CNS:
12972	emitGetInsAmdCns(id, &cnsVal);
12973	code = insCodeRM(ins);
12974
12975	// Special case 4-byte AVX instructions
12976	if (EncodedBySSE38orSSE3A(ins))
12977	{
12978	dst = emitOutputAM(dst, id, code, &cnsVal);
12979	}
12980	else
12981	{
12982	code = AddVexPrefixIfNeeded(ins, code, size);
12983	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
12984	dst = emitOutputAM(dst, id, code \| regcode, &cnsVal);
12985	}
12986
12987	sz = emitSizeOfInsDsc(id);
12988	break;
12989
12990	case IF_AWR_RRD_CNS:
12991	assert(ins == INS_vextracti128 \|\| ins == INS_vextractf128);
12992	assert(UseVEXEncoding());
12993	emitGetInsAmdCns(id, &cnsVal);
12994	code = insCodeMR(ins);
12995	dst = emitOutputAM(dst, id, code, &cnsVal);
12996	sz = emitSizeOfInsDsc(id);
12997	break;
12998
12999	case IF_RRD_ARD:
13000	case IF_RWR_ARD:
13001	case IF_RRW_ARD:
13002	case IF_RWR_RRD_ARD:
13003	{
13004	code = insCodeRM(ins);
13005	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
13006	{
13007	dst = emitOutputAM(dst, id, code);
13008	}
13009	else
13010	{
13011	code = AddVexPrefixIfNeeded(ins, code, size);
13012	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13013	dst = emitOutputAM(dst, id, code \| regcode);
13014	}
13015	sz = emitSizeOfInsDsc(id);
13016	break;
13017	}
13018
13019	case IF_RWR_ARD_RRD:
13020	{
13021	assert(IsAVX2GatherInstruction(ins));
13022	code = insCodeRM(ins);
13023	dst = emitOutputAM(dst, id, code);
13024	sz = emitSizeOfInsDsc(id);
13025	break;
13026	}
13027
13028	case IF_RWR_RRD_ARD_CNS:
13029	case IF_RWR_RRD_ARD_RRD:
13030	{
13031	emitGetInsAmdCns(id, &cnsVal);
13032	code = insCodeRM(ins);
13033	if (EncodedBySSE38orSSE3A(ins))
13034	{
13035	dst = emitOutputAM(dst, id, code, &cnsVal);
13036	}
13037	else
13038	{
13039	code = AddVexPrefixIfNeeded(ins, code, size);
13040	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13041	dst = emitOutputAM(dst, id, code \| regcode, &cnsVal);
13042	}
13043	sz = emitSizeOfInsDsc(id);
13044	break;
13045	}
13046
13047	case IF_ARD_RRD:
13048	case IF_AWR_RRD:
13049	case IF_ARW_RRD:
13050	code = insCodeMR(ins);
13051	code = AddVexPrefixIfNeeded(ins, code, size);
13052	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13053	dst = emitOutputAM(dst, id, code \| regcode);
13054	sz = emitSizeOfInsDsc(id);
13055	break;
13056
13057	case IF_AWR_RRD_RRD:
13058	{
13059	code = insCodeMR(ins);
13060	code = AddVexPrefixIfNeeded(ins, code, size);
13061	dst = emitOutputAM(dst, id, code);
13062	sz = emitSizeOfInsDsc(id);
13063	break;
13064	}
13065
13066	case IF_ARD_CNS:
13067	case IF_AWR_CNS:
13068	case IF_ARW_CNS:
13069	emitGetInsAmdCns(id, &cnsVal);
13070	dst = emitOutputAM(dst, id, insCodeMI(ins), &cnsVal);
13071	sz = emitSizeOfInsDsc(id);
13072	break;
13073
13074	case IF_ARW_SHF:
13075	emitGetInsAmdCns(id, &cnsVal);
13076	dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal);
13077	sz = emitSizeOfInsDsc(id);
13078	break;
13079
13080	/******************************************************************/
13081	/ Stack-based operand /
13082	/******************************************************************/
13083
13084	case IF_SRD:
13085	case IF_SWR:
13086	case IF_SRW:
13087
13088	assert(ins != INS_pop_hide);
13089	if (ins == INS_pop)
13090	{
13091	// The offset in "pop [ESP+xxx]" is relative to the new ESP value
13092	CLANG_FORMAT_COMMENT_ANCHOR;
13093
13094	#if !FEATURE_FIXED_OUT_ARGS
13095	emitCurStackLvl -= sizeof(int);
13096	#endif
13097	dst = emitOutputSV(dst, id, insCodeMR(ins));
13098
13099	#if !FEATURE_FIXED_OUT_ARGS
13100	emitCurStackLvl += sizeof(int);
13101	#endif
13102	break;
13103	}
13104
13105	dst = emitCodeWithInstructionSize(dst, emitOutputSV(dst, id, insCodeMR(ins)), &callInstrSize);
13106
13107	if (ins == INS_call)
13108	{
13109	goto IND_CALL;
13110	}
13111
13112	break;
13113
13114	case IF_SRD_CNS:
13115	case IF_SWR_CNS:
13116	case IF_SRW_CNS:
13117	emitGetInsCns(id, &cnsVal);
13118	dst = emitOutputSV(dst, id, insCodeMI(ins), &cnsVal);
13119	sz = emitSizeOfInsDsc(id);
13120	break;
13121
13122	case IF_SRW_SHF:
13123	emitGetInsCns(id, &cnsVal);
13124	dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal);
13125	sz = emitSizeOfInsDsc(id);
13126	break;
13127
13128	case IF_RRW_SRD_CNS:
13129	case IF_RWR_SRD_CNS:
13130	emitGetInsCns(id, &cnsVal);
13131	code = insCodeRM(ins);
13132
13133	// Special case 4-byte AVX instructions
13134	if (EncodedBySSE38orSSE3A(ins))
13135	{
13136	dst = emitOutputSV(dst, id, code, &cnsVal);
13137	}
13138	else
13139	{
13140	code = AddVexPrefixIfNeeded(ins, code, size);
13141
13142	// In case of AVX instructions that take 3 operands, encode reg1 as first source.
13143	// Note that reg1 is both a source and a destination.
13144	//
13145	// TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13146	// now we use the single source as source1 and source2.
13147	// For this format, moves do not support a third operand, so we only need to handle the binary ops.
13148	if (IsDstDstSrcAVXInstruction(ins))
13149	{
13150	// encode source operand reg in 'vvvv' bits in 1's complement form
13151	code = insEncodeReg3456(ins, id->idReg1(), size, code);
13152	}
13153
13154	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13155	dst = emitOutputSV(dst, id, code \| regcode, &cnsVal);
13156	}
13157
13158	sz = emitSizeOfInsDsc(id);
13159	break;
13160
13161	case IF_RRD_SRD:
13162	case IF_RWR_SRD:
13163	case IF_RRW_SRD:
13164	{
13165	code = insCodeRM(ins);
13166
13167	// 4-byte AVX instructions are special cased inside emitOutputSV
13168	// since they do not have space to encode ModRM byte.
13169	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
13170	{
13171	dst = emitOutputSV(dst, id, code);
13172	}
13173	else
13174	{
13175	code = AddVexPrefixIfNeeded(ins, code, size);
13176
13177	if (IsDstDstSrcAVXInstruction(ins))
13178	{
13179	// encode source operand reg in 'vvvv' bits in 1's complement form
13180	code = insEncodeReg3456(ins, id->idReg1(), size, code);
13181	}
13182
13183	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13184	dst = emitOutputSV(dst, id, code \| regcode);
13185	}
13186
13187	sz = emitSizeOfInsDsc(id);
13188	break;
13189	}
13190
13191	case IF_RWR_RRD_SRD:
13192	{
13193	// This should only be called on AVX instructions
13194	assert(IsAVXInstruction(ins));
13195
13196	code = insCodeRM(ins);
13197	code = AddVexPrefixIfNeeded(ins, code, size);
13198	code = insEncodeReg3456(ins, id->idReg2(), size,
13199	code); // encode source operand reg in 'vvvv' bits in 1's complement form
13200
13201	// 4-byte AVX instructions are special cased inside emitOutputSV
13202	// since they do not have space to encode ModRM byte.
13203	if (EncodedBySSE38orSSE3A(ins))
13204	{
13205	dst = emitOutputSV(dst, id, code);
13206	}
13207	else
13208	{
13209	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13210	dst = emitOutputSV(dst, id, code \| regcode);
13211	}
13212	break;
13213	}
13214
13215	case IF_RWR_RRD_SRD_CNS:
13216	case IF_RWR_RRD_SRD_RRD:
13217	{
13218	// This should only be called on AVX instructions
13219	assert(IsAVXInstruction(ins));
13220	emitGetInsCns(id, &cnsVal);
13221
13222	code = insCodeRM(ins);
13223	code = AddVexPrefixIfNeeded(ins, code, size);
13224	code = insEncodeReg3456(ins, id->idReg2(), size,
13225	code); // encode source operand reg in 'vvvv' bits in 1's complement form
13226
13227	// 4-byte AVX instructions are special cased inside emitOutputSV
13228	// since they do not have space to encode ModRM byte.
13229	if (EncodedBySSE38orSSE3A(ins))
13230	{
13231	dst = emitOutputSV(dst, id, code, &cnsVal);
13232	}
13233	else
13234	{
13235	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13236	dst = emitOutputSV(dst, id, code \| regcode, &cnsVal);
13237	}
13238
13239	sz = emitSizeOfInsDsc(id);
13240	break;
13241	}
13242
13243	case IF_SRD_RRD:
13244	case IF_SWR_RRD:
13245	case IF_SRW_RRD:
13246	code = insCodeMR(ins);
13247	code = AddVexPrefixIfNeeded(ins, code, size);
13248
13249	// In case of AVX instructions that take 3 operands, encode reg1 as first source.
13250	// Note that reg1 is both a source and a destination.
13251	//
13252	// TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13253	// now we use the single source as source1 and source2.
13254	// For this format, moves do not support a third operand, so we only need to handle the binary ops.
13255	if (IsDstDstSrcAVXInstruction(ins))
13256	{
13257	// encode source operand reg in 'vvvv' bits in 1's complement form
13258	code = insEncodeReg3456(ins, id->idReg1(), size, code);
13259	}
13260
13261	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13262	dst = emitOutputSV(dst, id, code \| regcode);
13263	break;
13264
13265	/******************************************************************/
13266	/ Direct memory address /
13267	/******************************************************************/
13268
13269	case IF_MRD:
13270	case IF_MRW:
13271	case IF_MWR:
13272
13273	noway_assert(ins != INS_call);
13274	dst = emitOutputCV(dst, id, insCodeMR(ins) \| `0x0500`);
13275	sz = emitSizeOfInsDsc(id);
13276	break;
13277
13278	case IF_MRD_OFF:
13279	dst = emitOutputCV(dst, id, insCodeMI(ins));
13280	break;
13281
13282	case IF_RRW_MRD_CNS:
13283	case IF_RWR_MRD_CNS:
13284	emitGetInsDcmCns(id, &cnsVal);
13285	code = insCodeRM(ins);
13286
13287	// Special case 4-byte AVX instructions
13288	if (EncodedBySSE38orSSE3A(ins))
13289	{
13290	dst = emitOutputCV(dst, id, code, &cnsVal);
13291	}
13292	else
13293	{
13294	code = AddVexPrefixIfNeeded(ins, code, size);
13295
13296	// In case of AVX instructions that take 3 operands, encode reg1 as first source.
13297	// Note that reg1 is both a source and a destination.
13298	//
13299	// TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13300	// now we use the single source as source1 and source2.
13301	// For this format, moves do not support a third operand, so we only need to handle the binary ops.
13302	if (IsDstDstSrcAVXInstruction(ins))
13303	{
13304	// encode source operand reg in 'vvvv' bits in 1's complement form
13305	code = insEncodeReg3456(ins, id->idReg1(), size, code);
13306	}
13307
13308	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13309	dst = emitOutputCV(dst, id, code \| regcode \| `0x0500`, &cnsVal);
13310	}
13311
13312	sz = emitSizeOfInsDsc(id);
13313	break;
13314
13315	case IF_MWR_RRD_CNS:
13316	assert(ins == INS_vextracti128 \|\| ins == INS_vextractf128);
13317	assert(UseVEXEncoding());
13318	emitGetInsDcmCns(id, &cnsVal);
13319	code = insCodeMR(ins);
13320	// only AVX2 vextracti128 and AVX vextractf128 can reach this path,
13321	// they do not need VEX.vvvv to encode the register operand
13322	dst = emitOutputCV(dst, id, code, &cnsVal);
13323	sz = emitSizeOfInsDsc(id);
13324	break;
13325
13326	case IF_RRD_MRD:
13327	case IF_RWR_MRD:
13328	case IF_RRW_MRD:
13329	{
13330	code = insCodeRM(ins);
13331
13332	// Special case 4-byte AVX instructions
13333	if (EncodedBySSE38orSSE3A(ins) \|\| (ins == INS_crc32))
13334	{
13335	dst = emitOutputCV(dst, id, code);
13336	}
13337	else
13338	{
13339	code = AddVexPrefixIfNeeded(ins, code, size);
13340
13341	if (IsDstDstSrcAVXInstruction(ins))
13342	{
13343	// encode source operand reg in 'vvvv' bits in 1's complement form
13344	code = insEncodeReg3456(ins, id->idReg1(), size, code);
13345	}
13346
13347	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13348	dst = emitOutputCV(dst, id, code \| regcode \| `0x0500`);
13349	}
13350
13351	sz = emitSizeOfInsDsc(id);
13352	break;
13353	}
13354
13355	case IF_RWR_RRD_MRD:
13356	{
13357	// This should only be called on AVX instructions
13358	assert(IsAVXInstruction(ins));
13359
13360	code = insCodeRM(ins);
13361	code = AddVexPrefixIfNeeded(ins, code, size);
13362	code = insEncodeReg3456(ins, id->idReg2(), size,
13363	code); // encode source operand reg in 'vvvv' bits in 1's complement form
13364
13365	// Special case 4-byte AVX instructions
13366	if (EncodedBySSE38orSSE3A(ins))
13367	{
13368	dst = emitOutputCV(dst, id, code);
13369	}
13370	else
13371	{
13372	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13373	dst = emitOutputCV(dst, id, code \| regcode \| `0x0500`);
13374	}
13375	sz = emitSizeOfInsDsc(id);
13376	break;
13377	}
13378
13379	case IF_RWR_RRD_MRD_CNS:
13380	case IF_RWR_RRD_MRD_RRD:
13381	{
13382	// This should only be called on AVX instructions
13383	assert(IsAVXInstruction(ins));
13384	emitGetInsCns(id, &cnsVal);
13385
13386	code = insCodeRM(ins);
13387	code = AddVexPrefixIfNeeded(ins, code, size);
13388	code = insEncodeReg3456(ins, id->idReg2(), size,
13389	code); // encode source operand reg in 'vvvv' bits in 1's complement form
13390
13391	// Special case 4-byte AVX instructions
13392	if (EncodedBySSE38orSSE3A(ins))
13393	{
13394	dst = emitOutputCV(dst, id, code, &cnsVal);
13395	}
13396	else
13397	{
13398	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13399	dst = emitOutputCV(dst, id, code \| regcode \| `0x0500`, &cnsVal);
13400	}
13401	sz = emitSizeOfInsDsc(id);
13402	break;
13403	}
13404
13405	case IF_RWR_MRD_OFF:
13406	code = insCode(ins);
13407	code = AddVexPrefixIfNeeded(ins, code, size);
13408
13409	// In case of AVX instructions that take 3 operands, encode reg1 as first source.
13410	// Note that reg1 is both a source and a destination.
13411	//
13412	// TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13413	// now we use the single source as source1 and source2.
13414	// For this format, moves do not support a third operand, so we only need to handle the binary ops.
13415	if (IsDstDstSrcAVXInstruction(ins))
13416	{
13417	// encode source operand reg in 'vvvv' bits in 1's complement form
13418	code = insEncodeReg3456(ins, id->idReg1(), size, code);
13419	}
13420
13421	regcode = insEncodeReg012(id->idIns(), id->idReg1(), size, &code);
13422	dst = emitOutputCV(dst, id, code \| `0x30` \| regcode);
13423	sz = emitSizeOfInsDsc(id);
13424	break;
13425
13426	case IF_MRD_RRD:
13427	case IF_MWR_RRD:
13428	case IF_MRW_RRD:
13429	code = insCodeMR(ins);
13430	code = AddVexPrefixIfNeeded(ins, code, size);
13431
13432	// In case of AVX instructions that take 3 operands, encode reg1 as first source.
13433	// Note that reg1 is both a source and a destination.
13434	//
13435	// TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
13436	// now we use the single source as source1 and source2.
13437	// For this format, moves do not support a third operand, so we only need to handle the binary ops.
13438	if (IsDstDstSrcAVXInstruction(ins))
13439	{
13440	// encode source operand reg in 'vvvv' bits in 1's complement form
13441	code = insEncodeReg3456(ins, id->idReg1(), size, code);
13442	}
13443
13444	regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << `8`);
13445	dst = emitOutputCV(dst, id, code \| regcode \| `0x0500`);
13446	sz = emitSizeOfInsDsc(id);
13447	break;
13448
13449	case IF_MRD_CNS:
13450	case IF_MWR_CNS:
13451	case IF_MRW_CNS:
13452	emitGetInsDcmCns(id, &cnsVal);
13453	dst = emitOutputCV(dst, id, insCodeMI(ins) \| `0x0500`, &cnsVal);
13454	sz = emitSizeOfInsDsc(id);
13455	break;
13456
13457	case IF_MRW_SHF:
13458	emitGetInsDcmCns(id, &cnsVal);
13459	dst = emitOutputCV(dst, id, insCodeMR(ins) \| `0x0500`, &cnsVal);
13460	sz = emitSizeOfInsDsc(id);
13461	break;
13462
13463	/******************************************************************/
13464	/ oops /
13465	/******************************************************************/
13466
13467	default:
13468
13469	#ifdef DEBUG
13470	printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
13471	assert(!"don't know how to encode this instruction");
13472	#endif
13473	break;
13474	}
13475
13476	// Make sure we set the instruction descriptor size correctly
13477	assert(sz == emitSizeOfInsDsc(id));
13478
13479	#if !FEATURE_FIXED_OUT_ARGS
13480	bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig);
13481
13482	#if FEATURE_EH_FUNCLETS
13483	updateStackLevel = updateStackLevel && !emitIGisInFuncletProlog(ig) && !emitIGisInFuncletEpilog(ig);
13484	#endif // FEATURE_EH_FUNCLETS
13485
13486	// Make sure we keep the current stack level up to date
13487	if (updateStackLevel)
13488	{
13489	switch (ins)
13490	{
13491	case INS_push:
13492	// Please note: {INS_push_hide,IF_LABEL} is used to push the address of the
13493	// finally block for calling it locally for an op_leave.
13494	emitStackPush(dst, id->idGCref());
13495	break;
13496
13497	case INS_pop:
13498	emitStackPop(dst, false, /callInstrSize/ `0`, `1`);
13499	break;
13500
13501	case INS_sub:
13502	// Check for "sub ESP, icon"
13503	if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
13504	{
13505	assert((size_t)emitGetInsSC(id) < `0x00000000FFFFFFFFLL`);
13506	emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
13507	}
13508	break;
13509
13510	case INS_add:
13511	// Check for "add ESP, icon"
13512	if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
13513	{
13514	assert((size_t)emitGetInsSC(id) < `0x00000000FFFFFFFFLL`);
13515	emitStackPop(dst, /isCall/ false, /callInstrSize/ `0`,
13516	(unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE));
13517	}
13518	break;
13519
13520	default:
13521	break;
13522	}
13523	}
13524
13525	#endif // !FEATURE_FIXED_OUT_ARGS
13526
13527	assert((int)emitCurStackLvl >= `0`);
13528
13529	// Only epilog "instructions" and some pseudo-instrs
13530	// are allowed not to generate any code
13531
13532	assert(*dp != dst \|\| emitInstHasNoCode(ins));
13533
13534	#ifdef DEBUG
13535	if (emitComp->opts.disAsm \|\| emitComp->opts.dspEmit \|\| emitComp->verbose)
13536	{
13537	emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(dp), dp, (dst - *dp));
13538	}
13539
13540	if (emitComp->compDebugBreak)
13541	{
13542	// set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
13543	// at the beginning of this method.
13544	if (JitConfig.JitEmitPrintRefRegs() != `0`)
13545	{
13546	printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
13547	printf(" emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs));
13548	printRegMaskInt(emitThisGCrefRegs);
13549	emitDispRegSet(emitThisGCrefRegs);
13550	printf("\n");
13551	printf(" emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs));
13552	printRegMaskInt(emitThisByrefRegs);
13553	emitDispRegSet(emitThisByrefRegs);
13554	printf("\n");
13555	}
13556
13557	// For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
13558	// emitting instruction a6, (i.e. IN00a6 in jitdump).
13559	if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
13560	{
13561	assert(!"JitBreakEmitOutputInstr reached");
13562	}
13563	}
13564	#endif
13565
13566	#ifdef TRANSLATE_PDB
13567	if (*dp != dst)
13568	{
13569	// only map instruction groups to instruction groups
13570	MapCode(id->idDebugOnlyInfo()->idilStart, *dp);
13571	}
13572	#endif
13573
13574	*dp = dst;
13575
13576	#ifdef DEBUG
13577	if (ins == INS_mulEAX \|\| ins == INS_imulEAX)
13578	{
13579	// INS_mulEAX has implicit target of Edx:Eax. Make sure
13580	// that we detected this cleared its GC-status.
13581
13582	assert(((RBM_EAX \| RBM_EDX) & (emitThisGCrefRegs \| emitThisByrefRegs)) == `0`);
13583	}
13584
13585	if (instrIs3opImul(ins))
13586	{
13587	// The target of the 3-operand imul is implicitly encoded. Make sure
13588	// that we detected the implicit register and cleared its GC-status.
13589
13590	regMaskTP regMask = genRegMask(inst3opImulReg(ins));
13591	assert((regMask & (emitThisGCrefRegs \| emitThisByrefRegs)) == `0`);
13592	}
13593	#endif
13594
13595	return sz;
13596	}
13597	#ifdef _PREFAST_
13598	#pragma warning(pop)
13599	#endif
13600
13601	/***************************************************************************/
13602	/***************************************************************************/
13603
13604	#endif // defined(_TARGET_XARCH_)
13605

Browse the source code of CoreCLR/jit/emitxarch.cpp